lib/human.c

   1 /* human.c -- print human readable file size
   2
   3    Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
   4    Free Software Foundation, Inc.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software Foundation,
  18    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  19
  20 /* Written by Paul Eggert and Larry McVoy.  */
  21
  22 #if HAVE_CONFIG_H
  23 # include <config.h>
  24 #endif
  25
  26 #include "human.h"
  27
  28 #include <locale.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "gettext.h"
  34 #define _(msgid) gettext (msgid)
  35
  36 #include <argmatch.h>
  37 #include <error.h>
  38 #include <intprops.h>
  39 #include <xstrtol.h>
  40
  41 #ifndef SIZE_MAX
  42 # define SIZE_MAX ((size_t) -1)
  43 #endif
  44 #ifndef UINTMAX_MAX
  45 # define UINTMAX_MAX ((uintmax_t) -1)
  46 #endif
  47
  48 /* The maximum length of a suffix like "KiB".  */
  49 #define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
  50
  51 static const char power_letter[] =
  52 {
  53   0,    /* not used */
  54   'K',  /* kibi ('k' for kilo is a special case) */
  55   'M',  /* mega or mebi */
  56   'G',  /* giga or gibi */
  57   'T',  /* tera or tebi */
  58   'P',  /* peta or pebi */
  59   'E',  /* exa or exbi */
  60   'Z',  /* zetta or 2**70 */
  61   'Y'   /* yotta or 2**80 */
  62 };
  63
  64
  65 /* If INEXACT_STYLE is not human_round_to_nearest, and if easily
  66    possible, adjust VALUE according to the style.  */
  67
  68 static long double
  69 adjust_value (int inexact_style, long double value)
  70 {
  71   /* Do not use the floorl or ceill functions, as that would mean
  72      checking for their presence and possibly linking with the
  73      standard math library, which is a porting pain.  So leave the
  74      value alone if it is too large to easily round.  */
  75   if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
  76     {
  77       uintmax_t u = value;
  78       value = u + (inexact_style == human_ceiling && u != value);
  79     }
  80
  81   return value;
  82 }
  83
  84 /* Group the digits of NUMBER according to the grouping rules of the
  85    current locale.  NUMBER contains NUMBERLEN digits.  Modify the
  86    bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
  87    each byte inserted.  Return the starting address of the modified
  88    number.
  89
  90    To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
  91    lconv' from <locale.h>.  */
  92
  93 static char *
  94 group_number (char *number, size_t numberlen,
  95               char const *grouping, char const *thousands_sep)
  96 {
  97   register char *d;
  98   size_t grouplen = SIZE_MAX;
  99   size_t thousands_seplen = strlen (thousands_sep);
 100   size_t i = numberlen;
 101
 102   /* The maximum possible value for NUMBERLEN is the number of digits
 103      in the square of the largest uintmax_t, so double the size needed.  */
 104   char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
 105
 106   memcpy (buf, number, numberlen);
 107   d = number + numberlen;
 108
 109   for (;;)
 110     {
 111       unsigned char g = *grouping;
 112
 113       if (g)
 114         {
 115           grouplen = g < CHAR_MAX ? g : i;
 116           grouping++;
 117         }
 118
 119       if (i < grouplen)
 120         grouplen = i;
 121
 122       d -= grouplen;
 123       i -= grouplen;
 124       memcpy (d, buf + i, grouplen);
 125       if (i == 0)
 126         return d;
 127
 128       d -= thousands_seplen;
 129       memcpy (d, thousands_sep, thousands_seplen);
 130     }
 131 }
 132
 133 /* Convert N to a human readable format in BUF, using the options OPTS.
 134
 135    N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
 136    be nonnegative.
 137
 138    Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
 139    must be positive.
 140
 141    Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
 142    to determine whether to take the ceiling or floor of any result
 143    that cannot be expressed exactly.
 144
 145    If (OPTS & human_group_digits), group the thousands digits
 146    according to the locale, e.g., `1,000,000' in an American English
 147    locale.
 148
 149    If (OPTS & human_autoscale), deduce the output block size
 150    automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
 151    output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
 152    of 1000 otherwise.  For example, assuming powers of 1024, 8500
 153    would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
 154    so on.  Numbers smaller than the power aren't modified.
 155    human_autoscale is normally used together with human_SI.
 156
 157    If (OPTS & human_space_before_unit), use a space to separate the
 158    number from any suffix that is appended as described below.
 159
 160    If (OPTS & human_SI), append an SI prefix indicating which power is
 161    being used.  If in addition (OPTS & human_B), append "B" (if base
 162    1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
 163    human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
 164    power of 1024 or of 1000, depending on (OPTS &
 165    human_base_1024).  */
 166
 167 char *
 168 human_readable (uintmax_t n, char *buf, int opts,
 169                 uintmax_t from_block_size, uintmax_t to_block_size)
 170 {
 171   int inexact_style =
 172     opts & (human_round_to_nearest | human_floor | human_ceiling);
 173   unsigned int base = opts & human_base_1024 ? 1024 : 1000;
 174   uintmax_t amt;
 175   int tenths;
 176   int exponent = -1;
 177   int exponent_max = sizeof power_letter - 1;
 178   char *p;
 179   char *psuffix;
 180   char const *integerlim;
 181
 182   /* 0 means adjusted N == AMT.TENTHS;
 183      1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
 184      2 means adjusted N == AMT.TENTHS + 0.05;
 185      3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
 186   int rounding;
 187
 188   char const *decimal_point = ".";
 189   size_t decimal_pointlen = 1;
 190   char const *grouping = "";
 191   char const *thousands_sep = "";
 192   struct lconv const *l = localeconv ();
 193   size_t pointlen = strlen (l->decimal_point);
 194   if (0 < pointlen && pointlen <= MB_LEN_MAX)
 195     {
 196       decimal_point = l->decimal_point;
 197       decimal_pointlen = pointlen;
 198     }
 199   grouping = l->grouping;
 200   if (strlen (l->thousands_sep) <= MB_LEN_MAX)
 201     thousands_sep = l->thousands_sep;
 202
 203   psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
 204   p = psuffix;
 205
 206   /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
 207      units.  If this can be done exactly with integer arithmetic, do
 208      not use floating point operations.  */
 209   if (to_block_size <= from_block_size)
 210     {
 211       if (from_block_size % to_block_size == 0)
 212         {
 213           uintmax_t multiplier = from_block_size / to_block_size;
 214           amt = n * multiplier;
 215           if (amt / multiplier == n)
 216             {
 217               tenths = 0;
 218               rounding = 0;
 219               goto use_integer_arithmetic;
 220             }
 221         }
 222     }
 223   else if (from_block_size != 0 && to_block_size % from_block_size == 0)
 224     {
 225       uintmax_t divisor = to_block_size / from_block_size;
 226       uintmax_t r10 = (n % divisor) * 10;
 227       uintmax_t r2 = (r10 % divisor) * 2;
 228       amt = n / divisor;
 229       tenths = r10 / divisor;
 230       rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
 231       goto use_integer_arithmetic;
 232     }
 233
 234   {
 235     /* Either the result cannot be computed easily using uintmax_t,
 236        or from_block_size is zero.  Fall back on floating point.
 237        FIXME: This can yield answers that are slightly off.  */
 238
 239     long double dto_block_size = to_block_size;
 240     long double damt = n * (from_block_size / dto_block_size);
 241     size_t buflen;
 242     size_t nonintegerlen;
 243
 244     if (! (opts & human_autoscale))
 245       {
 246         sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
 247         buflen = strlen (buf);
 248         nonintegerlen = 0;
 249       }
 250     else
 251       {
 252         long double e = 1;
 253         exponent = 0;
 254
 255         do
 256           {
 257             e *= base;
 258             exponent++;
 259           }
 260         while (e * base <= damt && exponent < exponent_max);
 261
 262         damt /= e;
 263
 264         sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
 265         buflen = strlen (buf);
 266         nonintegerlen = decimal_pointlen + 1;
 267
 268         if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
 269             || ((opts & human_suppress_point_zero)
 270                 && buf[buflen - 1] == '0'))
 271           {
 272             sprintf (buf, "%.0Lf",
 273                      adjust_value (inexact_style, damt * 10) / 10);
 274             buflen = strlen (buf);
 275             nonintegerlen = 0;
 276           }
 277       }
 278
 279     p = psuffix - buflen;
 280     memmove (p, buf, buflen);
 281     integerlim = p + buflen - nonintegerlen;
 282   }
 283   goto do_grouping;
 284
 285  use_integer_arithmetic:
 286   {
 287     /* The computation can be done exactly, with integer arithmetic.
 288
 289        Use power of BASE notation if requested and if adjusted AMT is
 290        large enough.  */
 291
 292     if (opts & human_autoscale)
 293       {
 294         exponent = 0;
 295
 296         if (base <= amt)
 297           {
 298             do
 299               {
 300                 unsigned int r10 = (amt % base) * 10 + tenths;
 301                 unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
 302                 amt /= base;
 303                 tenths = r10 / base;
 304                 rounding = (r2 < base
 305                             ? (r2 + rounding) != 0
 306                             : 2 + (base < r2 + rounding));
 307                 exponent++;
 308               }
 309             while (base <= amt && exponent < exponent_max);
 310
 311             if (amt < 10)
 312               {
 313                 if (inexact_style == human_round_to_nearest
 314                     ? 2 < rounding + (tenths & 1)
 315                     : inexact_style == human_ceiling && 0 < rounding)
 316                   {
 317                     tenths++;
 318                     rounding = 0;
 319
 320                     if (tenths == 10)
 321                       {
 322                         amt++;
 323                         tenths = 0;
 324                       }
 325                   }
 326
 327                 if (amt < 10
 328                     && (tenths || ! (opts & human_suppress_point_zero)))
 329                   {
 330                     *--p = '0' + tenths;
 331                     p -= decimal_pointlen;
 332                     memcpy (p, decimal_point, decimal_pointlen);
 333                     tenths = rounding = 0;
 334                   }
 335               }
 336           }
 337       }
 338
 339     if (inexact_style == human_round_to_nearest
 340         ? 5 < tenths + (0 < rounding + (amt & 1))
 341         : inexact_style == human_ceiling && 0 < tenths + rounding)
 342       {
 343         amt++;
 344
 345         if ((opts & human_autoscale)
 346             && amt == base && exponent < exponent_max)
 347           {
 348             exponent++;
 349             if (! (opts & human_suppress_point_zero))
 350               {
 351                 *--p = '0';
 352                 p -= decimal_pointlen;
 353                 memcpy (p, decimal_point, decimal_pointlen);
 354               }
 355             amt = 1;
 356           }
 357       }
 358
 359     integerlim = p;
 360
 361     do
 362       {
 363         int digit = amt % 10;
 364         *--p = digit + '0';
 365       }
 366     while ((amt /= 10) != 0);
 367   }
 368
 369  do_grouping:
 370   if (opts & human_group_digits)
 371     p = group_number (p, integerlim - p, grouping, thousands_sep);
 372
 373   if (opts & human_SI)
 374     {
 375       if (exponent < 0)
 376         {
 377           uintmax_t power;
 378           exponent = 0;
 379           for (power = 1; power < to_block_size; power *= base)
 380             if (++exponent == exponent_max)
 381               break;
 382         }
 383
 384       if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
 385         *psuffix++ = ' ';
 386
 387       if (exponent)
 388         *psuffix++ = (! (opts & human_base_1024) && exponent == 1
 389                       ? 'k'
 390                       : power_letter[exponent]);
 391
 392       if (opts & human_B)
 393         {
 394           if ((opts & human_base_1024) && exponent)
 395             *psuffix++ = 'i';
 396           *psuffix++ = 'B';
 397         }
 398     }
 399
 400   *psuffix = '\0';
 401
 402   return p;
 403 }
 404
 405
 406 /* The default block size used for output.  This number may change in
 407    the future as disks get larger.  */
 408 #ifndef DEFAULT_BLOCK_SIZE
 409 # define DEFAULT_BLOCK_SIZE 1024
 410 #endif
 411
 412 static char const *const block_size_args[] = { "human-readable", "si", 0 };
 413 static int const block_size_opts[] =
 414   {
 415     human_autoscale + human_SI + human_base_1024,
 416     human_autoscale + human_SI
 417   };
 418
 419 static uintmax_t
 420 default_block_size (void)
 421 {
 422   return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
 423 }
 424
 425 static strtol_error
 426 humblock (char const *spec, uintmax_t *block_size, int *options)
 427 {
 428   int i;
 429   int opts = 0;
 430
 431   if (! spec
 432       && ! (spec = getenv ("BLOCK_SIZE"))
 433       && ! (spec = getenv ("BLOCKSIZE")))
 434     *block_size = default_block_size ();
 435   else
 436     {
 437       if (*spec == '\'')
 438         {
 439           opts |= human_group_digits;
 440           spec++;
 441         }
 442
 443       if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
 444         {
 445           opts |= block_size_opts[i];
 446           *block_size = 1;
 447         }
 448       else
 449         {
 450           char *ptr;
 451           strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
 452                                        "eEgGkKmMpPtTyYzZ0");
 453           if (e != LONGINT_OK)
 454             return e;
 455           for (; ! ('0' <= *spec && *spec <= '9'); spec++)
 456             if (spec == ptr)
 457               {
 458                 opts |= human_SI;
 459                 if (ptr[-1] == 'B')
 460                   opts |= human_B;
 461                 if (ptr[-1] != 'B' || ptr[-2] == 'i')
 462                   opts |= human_base_1024;
 463                 break;
 464               }
 465         }
 466     }
 467
 468   *options = opts;
 469   return LONGINT_OK;
 470 }
 471
 472 int
 473 human_options (char const *spec, bool report_errors, uintmax_t *block_size)
 474 {
 475   int opts;
 476   strtol_error e = humblock (spec, block_size, &opts);
 477   if (*block_size == 0)
 478     {
 479       *block_size = default_block_size ();
 480       e = LONGINT_INVALID;
 481     }
 482   if (e != LONGINT_OK && report_errors)
 483     STRTOL_FATAL_ERROR (spec, _("block size"), e);
 484   return opts;
 485 }