lib/human.c

   1 /* human.c -- print human readable file size
   2
   3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
   4    Free Software Foundation, Inc.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 /* Written by Paul Eggert and Larry McVoy.  */
  21
  22 #if HAVE_CONFIG_H
  23 # include <config.h>
  24 #endif
  25
  26 #include "human.h"
  27
  28 #include <locale.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "gettext.h"
  34 #define _(msgid) gettext (msgid)
  35
  36 #include <argmatch.h>
  37 #include <error.h>
  38 #include <xstrtol.h>
  39
  40 #ifndef SIZE_MAX
  41 # define SIZE_MAX ((size_t) -1)
  42 #endif
  43 #ifndef UINTMAX_MAX
  44 # define UINTMAX_MAX ((uintmax_t) -1)
  45 #endif
  46
  47 /* The maximum length of a suffix like "KiB".  */
  48 #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
  49
  50 static const char power_letter[] =
  51 {
  52   0,    /* not used */
  53   'K',  /* kibi ('k' for kilo is a special case) */
  54   'M',  /* mega or mebi */
  55   'G',  /* giga or gibi */
  56   'T',  /* tera or tebi */
  57   'P',  /* peta or pebi */
  58   'E',  /* exa or exbi */
  59   'Z',  /* zetta or 2**70 */
  60   'Y'   /* yotta or 2**80 */
  61 };
  62
  63
  64 /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
  65    possible, adjust VALUE according to the style.  */
  66
  67 static long double
  68 adjust_value (int inexact_style, long double value)
  69 {
  70   /* Do not use the floorl or ceill functions, as that would mean
  71      checking for their presence and possibly linking with the
  72      standard math library, which is a porting pain.  So leave the
  73      value alone if it is too large to easily round.  */
  74   if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
  75     {
  76       uintmax_t u = value;
  77       value = u + (inexact_style == human_ceiling && u != value);
  78     }
  79
  80   return value;
  81 }
  82
  83 /* Group the digits of NUMBER according to the grouping rules of the
  84    current locale.  NUMBER contains NUMBERLEN digits.  Modify the
  85    bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
  86    each byte inserted.  Return the starting address of the modified
  87    number.
  88
  89    To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
  90    lconv' from <locale.h>.  */
  91
  92 static char *
  93 group_number (char *number, size_t numberlen,
  94               char const *grouping, char const *thousands_sep)
  95 {
  96   register char *d;
  97   size_t grouplen = SIZE_MAX;
  98   size_t thousands_seplen = strlen (thousands_sep);
  99   size_t i = numberlen;
 100
 101   /* The maximum possible value for NUMBERLEN is the number of digits
 102      in the square of the largest uintmax_t, so double the size of
 103      uintmax_t before converting to a bound.  302 / 1000 is ceil
 104      (log10 (2.0)).  Add 1 for integer division truncation.  */
 105   char buf[2 * sizeof (uintmax_t) * CHAR_BIT * 302 / 1000 + 1];
 106
 107   memcpy (buf, number, numberlen);
 108   d = number + numberlen;
 109
 110   for (;;)
 111     {
 112       unsigned char g = *grouping;
 113
 114       if (g)
 115         {
 116           grouplen = g < CHAR_MAX ? g : i;
 117           grouping++;
 118         }
 119
 120       if (i < grouplen)
 121         grouplen = i;
 122
 123       d -= grouplen;
 124       i -= grouplen;
 125       memcpy (d, buf + i, grouplen);
 126       if (i == 0)
 127         return d;
 128
 129       d -= thousands_seplen;
 130       memcpy (d, thousands_sep, thousands_seplen);
 131     }
 132 }
 133
 134 /* Convert N to a human readable format in BUF, using the options OPTS.
 135
 136    N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
 137    be nonnegative.
 138
 139    Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
 140    must be positive.
 141
 142    Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
 143    to determine whether to take the ceiling or floor of any result
 144    that cannot be expressed exactly.
 145
 146    If (OPTS & human_group_digits), group the thousands digits
 147    according to the locale, e.g., `1,000,000' in an American English
 148    locale.
 149
 150    If (OPTS & human_autoscale), deduce the output block size
 151    automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
 152    output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
 153    of 1000 otherwise.  For example, assuming powers of 1024, 8500
 154    would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
 155    so on.  Numbers smaller than the power aren't modified.
 156    human_autoscale is normally used together with human_SI.
 157
 158    If (OPTS & human_space_before_unit), use a space to separate the
 159    number from any suffix that is appended as described below.
 160
 161    If (OPTS & human_SI), append an SI prefix indicating which power is
 162    being used.  If in addition (OPTS & human_B), append "B" (if base
 163    1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
 164    human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
 165    power of 1024 or of 1000, depending on (OPTS &
 166    human_base_1024).  */
 167
 168 char *
 169 human_readable (uintmax_t n, char *buf, int opts,
 170                 uintmax_t from_block_size, uintmax_t to_block_size)
 171 {
 172   int inexact_style =
 173     opts & (human_round_to_nearest | human_floor | human_ceiling);
 174   unsigned int base = opts & human_base_1024 ? 1024 : 1000;
 175   uintmax_t amt;
 176   int tenths;
 177   int exponent = -1;
 178   int exponent_max = sizeof power_letter - 1;
 179   char *p;
 180   char *psuffix;
 181   char const *integerlim;
 182
 183   /* 0 means adjusted N == AMT.TENTHS;
 184      1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
 185      2 means adjusted N == AMT.TENTHS + 0.05;
 186      3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
 187   int rounding;
 188
 189   char const *decimal_point = ".";
 190   size_t decimal_pointlen = 1;
 191   char const *grouping = "";
 192   char const *thousands_sep = "";
 193   struct lconv const *l = localeconv ();
 194   size_t pointlen = strlen (l->decimal_point);
 195   if (0 < pointlen && pointlen <= MB_LEN_MAX)
 196     {
 197       decimal_point = l->decimal_point;
 198       decimal_pointlen = pointlen;
 199     }
 200   grouping = l->grouping;
 201   if (strlen (l->thousands_sep) <= MB_LEN_MAX)
 202     thousands_sep = l->thousands_sep;
 203
 204   psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
 205   p = psuffix;
 206
 207   /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
 208      units.  If this can be done exactly with integer arithmetic, do
 209      not use floating point operations.  */
 210   if (to_block_size <= from_block_size)
 211     {
 212       if (from_block_size % to_block_size == 0)
 213         {
 214           uintmax_t multiplier = from_block_size / to_block_size;
 215           amt = n * multiplier;
 216           if (amt / multiplier == n)
 217             {
 218               tenths = 0;
 219               rounding = 0;
 220               goto use_integer_arithmetic;
 221             }
 222         }
 223     }
 224   else if (from_block_size != 0 && to_block_size % from_block_size == 0)
 225     {
 226       uintmax_t divisor = to_block_size / from_block_size;
 227       uintmax_t r10 = (n % divisor) * 10;
 228       uintmax_t r2 = (r10 % divisor) * 2;
 229       amt = n / divisor;
 230       tenths = r10 / divisor;
 231       rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
 232       goto use_integer_arithmetic;
 233     }
 234
 235   {
 236     /* Either the result cannot be computed easily using uintmax_t,
 237        or from_block_size is zero.  Fall back on floating point.
 238        FIXME: This can yield answers that are slightly off.  */
 239
 240     long double dto_block_size = to_block_size;
 241     long double damt = n * (from_block_size / dto_block_size);
 242     size_t buflen;
 243     size_t nonintegerlen;
 244
 245     if (! (opts & human_autoscale))
 246       {
 247         sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
 248         buflen = strlen (buf);
 249         nonintegerlen = 0;
 250       }
 251     else
 252       {
 253         long double e = 1;
 254         exponent = 0;
 255
 256         do
 257           {
 258             e *= base;
 259             exponent++;
 260           }
 261         while (e * base <= damt && exponent < exponent_max);
 262
 263         damt /= e;
 264
 265         sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
 266         buflen = strlen (buf);
 267         nonintegerlen = decimal_pointlen + 1;
 268
 269         if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
 270             || ((opts & human_suppress_point_zero)
 271                 && buf[buflen - 1] == '0'))
 272           {
 273             sprintf (buf, "%.0Lf",
 274                      adjust_value (inexact_style, damt * 10) / 10);
 275             buflen = strlen (buf);
 276             nonintegerlen = 0;
 277           }
 278       }
 279
 280     p = psuffix - buflen;
 281     memmove (p, buf, buflen);
 282     integerlim = p + buflen - nonintegerlen;
 283   }
 284   goto do_grouping;
 285
 286  use_integer_arithmetic:
 287   {
 288     /* The computation can be done exactly, with integer arithmetic.
 289
 290        Use power of BASE notation if requested and if adjusted AMT is
 291        large enough.  */
 292
 293     if (opts & human_autoscale)
 294       {
 295         exponent = 0;
 296
 297         if (base <= amt)
 298           {
 299             do
 300               {
 301                 unsigned int r10 = (amt % base) * 10 + tenths;
 302                 unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
 303                 amt /= base;
 304                 tenths = r10 / base;
 305                 rounding = (r2 < base
 306                             ? (r2 + rounding) != 0
 307                             : 2 + (base < r2 + rounding));
 308                 exponent++;
 309               }
 310             while (base <= amt && exponent < exponent_max);
 311
 312             if (amt < 10)
 313               {
 314                 if (inexact_style == human_round_to_nearest
 315                     ? 2 < rounding + (tenths & 1)
 316                     : inexact_style == human_ceiling && 0 < rounding)
 317                   {
 318                     tenths++;
 319                     rounding = 0;
 320
 321                     if (tenths == 10)
 322                       {
 323                         amt++;
 324                         tenths = 0;
 325                       }
 326                   }
 327
 328                 if (amt < 10
 329                     && (tenths || ! (opts & human_suppress_point_zero)))
 330                   {
 331                     *--p = '0' + tenths;
 332                     p -= decimal_pointlen;
 333                     memcpy (p, decimal_point, decimal_pointlen);
 334                     tenths = rounding = 0;
 335                   }
 336               }
 337           }
 338       }
 339
 340     if (inexact_style == human_round_to_nearest
 341         ? 5 < tenths + (0 < rounding + (amt & 1))
 342         : inexact_style == human_ceiling && 0 < tenths + rounding)
 343       {
 344         amt++;
 345
 346         if ((opts & human_autoscale)
 347             && amt == base && exponent < exponent_max)
 348           {
 349             exponent++;
 350             if (! (opts & human_suppress_point_zero))
 351               {
 352                 *--p = '0';
 353                 p -= decimal_pointlen;
 354                 memcpy (p, decimal_point, decimal_pointlen);
 355               }
 356             amt = 1;
 357           }
 358       }
 359
 360     integerlim = p;
 361
 362     do
 363       {
 364         int digit = amt % 10;
 365         *--p = digit + '0';
 366       }
 367     while ((amt /= 10) != 0);
 368   }
 369
 370  do_grouping:
 371   if (opts & human_group_digits)
 372     p = group_number (p, integerlim - p, grouping, thousands_sep);
 373
 374   if (opts & human_SI)
 375     {
 376       if (exponent < 0)
 377         {
 378           uintmax_t power;
 379           exponent = 0;
 380           for (power = 1; power < to_block_size; power *= base)
 381             if (++exponent == exponent_max)
 382               break;
 383         }
 384
 385       if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
 386         *psuffix++ = ' ';
 387
 388       if (exponent)
 389         *psuffix++ = (! (opts & human_base_1024) && exponent == 1
 390                       ? 'k'
 391                       : power_letter[exponent]);
 392
 393       if (opts & human_B)
 394         {
 395           if ((opts & human_base_1024) && exponent)
 396             *psuffix++ = 'i';
 397           *psuffix++ = 'B';
 398         }
 399     }
 400
 401   *psuffix = '\0';
 402
 403   return p;
 404 }
 405
 406
 407 /* The default block size used for output.  This number may change in
 408    the future as disks get larger.  */
 409 #ifndef DEFAULT_BLOCK_SIZE
 410 # define DEFAULT_BLOCK_SIZE 1024
 411 #endif
 412
 413 static char const *const block_size_args[] = { "human-readable", "si", 0 };
 414 static int const block_size_opts[] =
 415   {
 416     human_autoscale + human_SI + human_base_1024,
 417     human_autoscale + human_SI
 418   };
 419
 420 static uintmax_t
 421 default_block_size (void)
 422 {
 423   return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
 424 }
 425
 426 static strtol_error
 427 humblock (char const *spec, uintmax_t *block_size, int *options)
 428 {
 429   int i;
 430   int opts = 0;
 431
 432   if (! spec
 433       && ! (spec = getenv ("BLOCK_SIZE"))
 434       && ! (spec = getenv ("BLOCKSIZE")))
 435     *block_size = default_block_size ();
 436   else
 437     {
 438       if (*spec == '\'')
 439         {
 440           opts |= human_group_digits;
 441           spec++;
 442         }
 443
 444       if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
 445         {
 446           opts |= block_size_opts[i];
 447           *block_size = 1;
 448         }
 449       else
 450         {
 451           char *ptr;
 452           strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
 453                                        "eEgGkKmMpPtTyYzZ0");
 454           if (e != LONGINT_OK)
 455             return e;
 456           for (; ! ('0' <= *spec && *spec <= '9'); spec++)
 457             if (spec == ptr)
 458               {
 459                 opts |= human_SI;
 460                 if (ptr[-1] == 'B')
 461                   opts |= human_B;
 462                 if (ptr[-1] != 'B' || ptr[-2] == 'i')
 463                   opts |= human_base_1024;
 464                 break;
 465               }
 466         }
 467     }
 468
 469   *options = opts;
 470   return LONGINT_OK;
 471 }
 472
 473 int
 474 human_options (char const *spec, bool report_errors, uintmax_t *block_size)
 475 {
 476   int opts;
 477   strtol_error e = humblock (spec, block_size, &opts);
 478   if (*block_size == 0)
 479     {
 480       *block_size = default_block_size ();
 481       e = LONGINT_INVALID;
 482     }
 483   if (e != LONGINT_OK && report_errors)
 484     STRTOL_FATAL_ERROR (spec, _("block size"), e);
 485   return opts;
 486 }