lib/human.c

   1 /* human.c -- print human readable file size
   2
   3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4    2005, 2006 Free Software Foundation, Inc.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  19
  20 /* Written by Paul Eggert and Larry McVoy.  */
  21
  22 #include <config.h>
  23
  24 #include "human.h"
  25
  26 #include <locale.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "gettext.h"
  32 #define _(msgid) gettext (msgid)
  33
  34 #include <argmatch.h>
  35 #include <error.h>
  36 #include <intprops.h>
  37 #include <xstrtol.h>
  38
  39 /* The maximum length of a suffix like "KiB".  */
  40 #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
  41
  42 static const char power_letter[] =
  43 {
  44   0,    /* not used */
  45   'K',  /* kibi ('k' for kilo is a special case) */
  46   'M',  /* mega or mebi */
  47   'G',  /* giga or gibi */
  48   'T',  /* tera or tebi */
  49   'P',  /* peta or pebi */
  50   'E',  /* exa or exbi */
  51   'Z',  /* zetta or 2**70 */
  52   'Y'   /* yotta or 2**80 */
  53 };
  54
  55
  56 /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
  57    possible, adjust VALUE according to the style.  */
  58
  59 static long double
  60 adjust_value (int inexact_style, long double value)
  61 {
  62   /* Do not use the floorl or ceill functions, as that would mean
  63      checking for their presence and possibly linking with the
  64      standard math library, which is a porting pain.  So leave the
  65      value alone if it is too large to easily round.  */
  66   if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
  67     {
  68       uintmax_t u = value;
  69       value = u + (inexact_style == human_ceiling && u != value);
  70     }
  71
  72   return value;
  73 }
  74
  75 /* Group the digits of NUMBER according to the grouping rules of the
  76    current locale.  NUMBER contains NUMBERLEN digits.  Modify the
  77    bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
  78    each byte inserted.  Return the starting address of the modified
  79    number.
  80
  81    To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
  82    lconv' from <locale.h>.  */
  83
  84 static char *
  85 group_number (char *number, size_t numberlen,
  86               char const *grouping, char const *thousands_sep)
  87 {
  88   register char *d;
  89   size_t grouplen = SIZE_MAX;
  90   size_t thousands_seplen = strlen (thousands_sep);
  91   size_t i = numberlen;
  92
  93   /* The maximum possible value for NUMBERLEN is the number of digits
  94      in the square of the largest uintmax_t, so double the size needed.  */
  95   char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
  96
  97   memcpy (buf, number, numberlen);
  98   d = number + numberlen;
  99
 100   for (;;)
 101     {
 102       unsigned char g = *grouping;
 103
 104       if (g)
 105         {
 106           grouplen = g < CHAR_MAX ? g : i;
 107           grouping++;
 108         }
 109
 110       if (i < grouplen)
 111         grouplen = i;
 112
 113       d -= grouplen;
 114       i -= grouplen;
 115       memcpy (d, buf + i, grouplen);
 116       if (i == 0)
 117         return d;
 118
 119       d -= thousands_seplen;
 120       memcpy (d, thousands_sep, thousands_seplen);
 121     }
 122 }
 123
 124 /* Convert N to a human readable format in BUF, using the options OPTS.
 125
 126    N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
 127    be nonnegative.
 128
 129    Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
 130    must be positive.
 131
 132    Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
 133    to determine whether to take the ceiling or floor of any result
 134    that cannot be expressed exactly.
 135
 136    If (OPTS & human_group_digits), group the thousands digits
 137    according to the locale, e.g., `1,000,000' in an American English
 138    locale.
 139
 140    If (OPTS & human_autoscale), deduce the output block size
 141    automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
 142    output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
 143    of 1000 otherwise.  For example, assuming powers of 1024, 8500
 144    would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
 145    so on.  Numbers smaller than the power aren't modified.
 146    human_autoscale is normally used together with human_SI.
 147
 148    If (OPTS & human_space_before_unit), use a space to separate the
 149    number from any suffix that is appended as described below.
 150
 151    If (OPTS & human_SI), append an SI prefix indicating which power is
 152    being used.  If in addition (OPTS & human_B), append "B" (if base
 153    1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
 154    human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
 155    power of 1024 or of 1000, depending on (OPTS &
 156    human_base_1024).  */
 157
 158 char *
 159 human_readable (uintmax_t n, char *buf, int opts,
 160                 uintmax_t from_block_size, uintmax_t to_block_size)
 161 {
 162   int inexact_style =
 163     opts & (human_round_to_nearest | human_floor | human_ceiling);
 164   unsigned int base = opts & human_base_1024 ? 1024 : 1000;
 165   uintmax_t amt;
 166   int tenths;
 167   int exponent = -1;
 168   int exponent_max = sizeof power_letter - 1;
 169   char *p;
 170   char *psuffix;
 171   char const *integerlim;
 172
 173   /* 0 means adjusted N == AMT.TENTHS;
 174      1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
 175      2 means adjusted N == AMT.TENTHS + 0.05;
 176      3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
 177   int rounding;
 178
 179   char const *decimal_point = ".";
 180   size_t decimal_pointlen = 1;
 181   char const *grouping = "";
 182   char const *thousands_sep = "";
 183   struct lconv const *l = localeconv ();
 184   size_t pointlen = strlen (l->decimal_point);
 185   if (0 < pointlen && pointlen <= MB_LEN_MAX)
 186     {
 187       decimal_point = l->decimal_point;
 188       decimal_pointlen = pointlen;
 189     }
 190   grouping = l->grouping;
 191   if (strlen (l->thousands_sep) <= MB_LEN_MAX)
 192     thousands_sep = l->thousands_sep;
 193
 194   psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
 195   p = psuffix;
 196
 197   /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
 198      units.  If this can be done exactly with integer arithmetic, do
 199      not use floating point operations.  */
 200   if (to_block_size <= from_block_size)
 201     {
 202       if (from_block_size % to_block_size == 0)
 203         {
 204           uintmax_t multiplier = from_block_size / to_block_size;
 205           amt = n * multiplier;
 206           if (amt / multiplier == n)
 207             {
 208               tenths = 0;
 209               rounding = 0;
 210               goto use_integer_arithmetic;
 211             }
 212         }
 213     }
 214   else if (from_block_size != 0 && to_block_size % from_block_size == 0)
 215     {
 216       uintmax_t divisor = to_block_size / from_block_size;
 217       uintmax_t r10 = (n % divisor) * 10;
 218       uintmax_t r2 = (r10 % divisor) * 2;
 219       amt = n / divisor;
 220       tenths = r10 / divisor;
 221       rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
 222       goto use_integer_arithmetic;
 223     }
 224
 225   {
 226     /* Either the result cannot be computed easily using uintmax_t,
 227        or from_block_size is zero.  Fall back on floating point.
 228        FIXME: This can yield answers that are slightly off.  */
 229
 230     long double dto_block_size = to_block_size;
 231     long double damt = n * (from_block_size / dto_block_size);
 232     size_t buflen;
 233     size_t nonintegerlen;
 234
 235     if (! (opts & human_autoscale))
 236       {
 237         sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
 238         buflen = strlen (buf);
 239         nonintegerlen = 0;
 240       }
 241     else
 242       {
 243         long double e = 1;
 244         exponent = 0;
 245
 246         do
 247           {
 248             e *= base;
 249             exponent++;
 250           }
 251         while (e * base <= damt && exponent < exponent_max);
 252
 253         damt /= e;
 254
 255         sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
 256         buflen = strlen (buf);
 257         nonintegerlen = decimal_pointlen + 1;
 258
 259         if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
 260             || ((opts & human_suppress_point_zero)
 261                 && buf[buflen - 1] == '0'))
 262           {
 263             sprintf (buf, "%.0Lf",
 264                      adjust_value (inexact_style, damt * 10) / 10);
 265             buflen = strlen (buf);
 266             nonintegerlen = 0;
 267           }
 268       }
 269
 270     p = psuffix - buflen;
 271     memmove (p, buf, buflen);
 272     integerlim = p + buflen - nonintegerlen;
 273   }
 274   goto do_grouping;
 275
 276  use_integer_arithmetic:
 277   {
 278     /* The computation can be done exactly, with integer arithmetic.
 279
 280        Use power of BASE notation if requested and if adjusted AMT is
 281        large enough.  */
 282
 283     if (opts & human_autoscale)
 284       {
 285         exponent = 0;
 286
 287         if (base <= amt)
 288           {
 289             do
 290               {
 291                 unsigned int r10 = (amt % base) * 10 + tenths;
 292                 unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
 293                 amt /= base;
 294                 tenths = r10 / base;
 295                 rounding = (r2 < base
 296                             ? (r2 + rounding) != 0
 297                             : 2 + (base < r2 + rounding));
 298                 exponent++;
 299               }
 300             while (base <= amt && exponent < exponent_max);
 301
 302             if (amt < 10)
 303               {
 304                 if (inexact_style == human_round_to_nearest
 305                     ? 2 < rounding + (tenths & 1)
 306                     : inexact_style == human_ceiling && 0 < rounding)
 307                   {
 308                     tenths++;
 309                     rounding = 0;
 310
 311                     if (tenths == 10)
 312                       {
 313                         amt++;
 314                         tenths = 0;
 315                       }
 316                   }
 317
 318                 if (amt < 10
 319                     && (tenths || ! (opts & human_suppress_point_zero)))
 320                   {
 321                     *--p = '0' + tenths;
 322                     p -= decimal_pointlen;
 323                     memcpy (p, decimal_point, decimal_pointlen);
 324                     tenths = rounding = 0;
 325                   }
 326               }
 327           }
 328       }
 329
 330     if (inexact_style == human_round_to_nearest
 331         ? 5 < tenths + (0 < rounding + (amt & 1))
 332         : inexact_style == human_ceiling && 0 < tenths + rounding)
 333       {
 334         amt++;
 335
 336         if ((opts & human_autoscale)
 337             && amt == base && exponent < exponent_max)
 338           {
 339             exponent++;
 340             if (! (opts & human_suppress_point_zero))
 341               {
 342                 *--p = '0';
 343                 p -= decimal_pointlen;
 344                 memcpy (p, decimal_point, decimal_pointlen);
 345               }
 346             amt = 1;
 347           }
 348       }
 349
 350     integerlim = p;
 351
 352     do
 353       {
 354         int digit = amt % 10;
 355         *--p = digit + '0';
 356       }
 357     while ((amt /= 10) != 0);
 358   }
 359
 360  do_grouping:
 361   if (opts & human_group_digits)
 362     p = group_number (p, integerlim - p, grouping, thousands_sep);
 363
 364   if (opts & human_SI)
 365     {
 366       if (exponent < 0)
 367         {
 368           uintmax_t power;
 369           exponent = 0;
 370           for (power = 1; power < to_block_size; power *= base)
 371             if (++exponent == exponent_max)
 372               break;
 373         }
 374
 375       if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
 376         *psuffix++ = ' ';
 377
 378       if (exponent)
 379         *psuffix++ = (! (opts & human_base_1024) && exponent == 1
 380                       ? 'k'
 381                       : power_letter[exponent]);
 382
 383       if (opts & human_B)
 384         {
 385           if ((opts & human_base_1024) && exponent)
 386             *psuffix++ = 'i';
 387           *psuffix++ = 'B';
 388         }
 389     }
 390
 391   *psuffix = '\0';
 392
 393   return p;
 394 }
 395
 396
 397 /* The default block size used for output.  This number may change in
 398    the future as disks get larger.  */
 399 #ifndef DEFAULT_BLOCK_SIZE
 400 # define DEFAULT_BLOCK_SIZE 1024
 401 #endif
 402
 403 static char const *const block_size_args[] = { "human-readable", "si", 0 };
 404 static int const block_size_opts[] =
 405   {
 406     human_autoscale + human_SI + human_base_1024,
 407     human_autoscale + human_SI
 408   };
 409
 410 static uintmax_t
 411 default_block_size (void)
 412 {
 413   return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
 414 }
 415
 416 static strtol_error
 417 humblock (char const *spec, uintmax_t *block_size, int *options)
 418 {
 419   int i;
 420   int opts = 0;
 421
 422   if (! spec
 423       && ! (spec = getenv ("BLOCK_SIZE"))
 424       && ! (spec = getenv ("BLOCKSIZE")))
 425     *block_size = default_block_size ();
 426   else
 427     {
 428       if (*spec == '\'')
 429         {
 430           opts |= human_group_digits;
 431           spec++;
 432         }
 433
 434       if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
 435         {
 436           opts |= block_size_opts[i];
 437           *block_size = 1;
 438         }
 439       else
 440         {
 441           char *ptr;
 442           strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
 443                                        "eEgGkKmMpPtTyYzZ0");
 444           if (e != LONGINT_OK)
 445             {
 446               *options = 0;
 447               return e;
 448             }
 449           for (; ! ('0' <= *spec && *spec <= '9'); spec++)
 450             if (spec == ptr)
 451               {
 452                 opts |= human_SI;
 453                 if (ptr[-1] == 'B')
 454                   opts |= human_B;
 455                 if (ptr[-1] != 'B' || ptr[-2] == 'i')
 456                   opts |= human_base_1024;
 457                 break;
 458               }
 459         }
 460     }
 461
 462   *options = opts;
 463   return LONGINT_OK;
 464 }
 465
 466 int
 467 human_options (char const *spec, bool report_errors, uintmax_t *block_size)
 468 {
 469   int opts;
 470   strtol_error e = humblock (spec, block_size, &opts);
 471   if (*block_size == 0)
 472     {
 473       *block_size = default_block_size ();
 474       e = LONGINT_INVALID;
 475     }
 476   if (e != LONGINT_OK && report_errors)
 477     STRTOL_FATAL_ERROR (spec, _("block size"), e);
 478   return opts;
 479 }