lib/base32.c

   1 /* base32.c -- Encode binary data using printable characters.
   2    Copyright (C) 1999-2001, 2004-2006, 2009-2011 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  17
  18 /* Adapted from Simon Josefsson's base64 code by Gijs van Tulder.
  19  *
  20  * See also RFC 4648 <http://www.ietf.org/rfc/rfc4648.txt>.
  21  *
  22  * Be careful with error checking.  Here is how you would typically
  23  * use these functions:
  24  *
  25  * bool ok = base32_decode_alloc (in, inlen, &out, &outlen);
  26  * if (!ok)
  27  *   FAIL: input was not valid base32
  28  * if (out == NULL)
  29  *   FAIL: memory allocation error
  30  * OK: data in OUT/OUTLEN
  31  *
  32  * size_t outlen = base32_encode_alloc (in, inlen, &out);
  33  * if (out == NULL && outlen == 0 && inlen != 0)
  34  *   FAIL: input too long
  35  * if (out == NULL)
  36  *   FAIL: memory allocation error
  37  * OK: data in OUT/OUTLEN.
  38  *
  39  */
  40
  41 #include <config.h>
  42
  43 /* Get prototype. */
  44 #include "base32.h"
  45
  46 /* Get malloc. */
  47 #include <stdlib.h>
  48
  49 /* Get UCHAR_MAX. */
  50 #include <limits.h>
  51
  52 #include <string.h>
  53
  54 /* C89 compliant way to cast 'char' to 'unsigned char'. */
  55 static inline unsigned char
  56 to_uchar (char ch)
  57 {
  58   return ch;
  59 }
  60
  61 /* Base32 encode IN array of size INLEN into OUT array of size OUTLEN.
  62    If OUTLEN is less than BASE32_LENGTH(INLEN), write as many bytes as
  63    possible.  If OUTLEN is larger than BASE32_LENGTH(INLEN), also zero
  64    terminate the output buffer. */
  65 void
  66 base32_encode (const char *restrict in, size_t inlen,
  67                char *restrict out, size_t outlen)
  68 {
  69   static const char b32str[32] =
  70     "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
  71
  72   while (inlen && outlen)
  73     {
  74       *out++ = b32str[(to_uchar (in[0]) >> 3) & 0x1f];
  75       if (!--outlen)
  76         break;
  77       *out++ = b32str[((to_uchar (in[0]) << 2)
  78                        + (--inlen ? to_uchar (in[1]) >> 6 : 0))
  79                       & 0x1f];
  80       if (!--outlen)
  81         break;
  82       *out++ =
  83         (inlen
  84          ? b32str[(to_uchar (in[1]) >> 1) & 0x1f]
  85          : '=');
  86       if (!--outlen)
  87         break;
  88       *out++ =
  89         (inlen
  90          ? b32str[((to_uchar (in[1]) << 4)
  91                    + (--inlen ? to_uchar (in[2]) >> 4 : 0))
  92                   & 0x1f]
  93          : '=');
  94       if (!--outlen)
  95         break;
  96       *out++ =
  97         (inlen
  98          ? b32str[((to_uchar (in[2]) << 1)
  99                    + (--inlen ? to_uchar (in[3]) >> 7 : 0))
 100                   & 0x1f]
 101          : '=');
 102       if (!--outlen)
 103         break;
 104       *out++ =
 105         (inlen
 106          ? b32str[(to_uchar (in[3]) >> 2) & 0x1f]
 107          : '=');
 108       if (!--outlen)
 109         break;
 110       *out++ =
 111         (inlen
 112          ? b32str[((to_uchar (in[3]) << 3)
 113                    + (--inlen ? to_uchar (in[4]) >> 5 : 0))
 114                   & 0x1f]
 115          : '=');
 116       if (!--outlen)
 117         break;
 118       *out++ = inlen ? b32str[to_uchar (in[4]) & 0x1f] : '=';
 119       if (!--outlen)
 120         break;
 121       if (inlen)
 122         inlen--;
 123       if (inlen)
 124         in += 5;
 125     }
 126
 127   if (outlen)
 128     *out = '\0';
 129 }
 130
 131 /* Allocate a buffer and store zero terminated base32 encoded data
 132    from array IN of size INLEN, returning BASE32_LENGTH(INLEN), i.e.,
 133    the length of the encoded data, excluding the terminating zero.  On
 134    return, the OUT variable will hold a pointer to newly allocated
 135    memory that must be deallocated by the caller.  If output string
 136    length would overflow, 0 is returned and OUT is set to NULL.  If
 137    memory allocation failed, OUT is set to NULL, and the return value
 138    indicates length of the requested memory block, i.e.,
 139    BASE32_LENGTH(inlen) + 1. */
 140 size_t
 141 base32_encode_alloc (const char *in, size_t inlen, char **out)
 142 {
 143   size_t outlen = 1 + BASE32_LENGTH (inlen);
 144
 145   /* Check for overflow in outlen computation.
 146    *
 147    * If there is no overflow, outlen >= inlen.
 148    *
 149    * TODO Is this a sufficient check?  (See the notes in base64.c.)
 150    */
 151   if (inlen > outlen)
 152     {
 153       *out = NULL;
 154       return 0;
 155     }
 156
 157   *out = malloc (outlen);
 158   if (!*out)
 159     return outlen;
 160
 161   base32_encode (in, inlen, *out, outlen);
 162
 163   return outlen - 1;
 164 }
 165
 166 /* With this approach this file works independent of the charset used
 167    (think EBCDIC).  However, it does assume that the characters in the
 168    Base32 alphabet (A-Z2-7) are encoded in 0..255.  POSIX
 169    1003.1-2001 require that char and unsigned char are 8-bit
 170    quantities, though, taking care of that problem.  But this may be a
 171    potential problem on non-POSIX C99 platforms.
 172
 173    IBM C V6 for AIX mishandles "#define B32(x) ...'x'...", so use "_"
 174    as the formal parameter rather than "x".  */
 175 #define B32(_)                                  \
 176   ((_) == 'A' ? 0                               \
 177    : (_) == 'B' ? 1                             \
 178    : (_) == 'C' ? 2                             \
 179    : (_) == 'D' ? 3                             \
 180    : (_) == 'E' ? 4                             \
 181    : (_) == 'F' ? 5                             \
 182    : (_) == 'G' ? 6                             \
 183    : (_) == 'H' ? 7                             \
 184    : (_) == 'I' ? 8                             \
 185    : (_) == 'J' ? 9                             \
 186    : (_) == 'K' ? 10                            \
 187    : (_) == 'L' ? 11                            \
 188    : (_) == 'M' ? 12                            \
 189    : (_) == 'N' ? 13                            \
 190    : (_) == 'O' ? 14                            \
 191    : (_) == 'P' ? 15                            \
 192    : (_) == 'Q' ? 16                            \
 193    : (_) == 'R' ? 17                            \
 194    : (_) == 'S' ? 18                            \
 195    : (_) == 'T' ? 19                            \
 196    : (_) == 'U' ? 20                            \
 197    : (_) == 'V' ? 21                            \
 198    : (_) == 'W' ? 22                            \
 199    : (_) == 'X' ? 23                            \
 200    : (_) == 'Y' ? 24                            \
 201    : (_) == 'Z' ? 25                            \
 202    : (_) == '2' ? 26                            \
 203    : (_) == '3' ? 27                            \
 204    : (_) == '4' ? 28                            \
 205    : (_) == '5' ? 29                            \
 206    : (_) == '6' ? 30                            \
 207    : (_) == '7' ? 31                            \
 208    : -1)
 209
 210 static const signed char b32[0x100] = {
 211   B32 (0), B32 (1), B32 (2), B32 (3),
 212   B32 (4), B32 (5), B32 (6), B32 (7),
 213   B32 (8), B32 (9), B32 (10), B32 (11),
 214   B32 (12), B32 (13), B32 (14), B32 (15),
 215   B32 (16), B32 (17), B32 (18), B32 (19),
 216   B32 (20), B32 (21), B32 (22), B32 (23),
 217   B32 (24), B32 (25), B32 (26), B32 (27),
 218   B32 (28), B32 (29), B32 (30), B32 (31),
 219   B32 (32), B32 (33), B32 (34), B32 (35),
 220   B32 (36), B32 (37), B32 (38), B32 (39),
 221   B32 (40), B32 (41), B32 (42), B32 (43),
 222   B32 (44), B32 (45), B32 (46), B32 (47),
 223   B32 (48), B32 (49), B32 (50), B32 (51),
 224   B32 (52), B32 (53), B32 (54), B32 (55),
 225   B32 (56), B32 (57), B32 (58), B32 (59),
 226   B32 (60), B32 (61), B32 (62), B32 (63),
 227   B32 (32), B32 (65), B32 (66), B32 (67),
 228   B32 (68), B32 (69), B32 (70), B32 (71),
 229   B32 (72), B32 (73), B32 (74), B32 (75),
 230   B32 (76), B32 (77), B32 (78), B32 (79),
 231   B32 (80), B32 (81), B32 (82), B32 (83),
 232   B32 (84), B32 (85), B32 (86), B32 (87),
 233   B32 (88), B32 (89), B32 (90), B32 (91),
 234   B32 (92), B32 (93), B32 (94), B32 (95),
 235   B32 (96), B32 (97), B32 (98), B32 (99),
 236   B32 (100), B32 (101), B32 (102), B32 (103),
 237   B32 (104), B32 (105), B32 (106), B32 (107),
 238   B32 (108), B32 (109), B32 (110), B32 (111),
 239   B32 (112), B32 (113), B32 (114), B32 (115),
 240   B32 (116), B32 (117), B32 (118), B32 (119),
 241   B32 (120), B32 (121), B32 (122), B32 (123),
 242   B32 (124), B32 (125), B32 (126), B32 (127),
 243   B32 (128), B32 (129), B32 (130), B32 (131),
 244   B32 (132), B32 (133), B32 (134), B32 (135),
 245   B32 (136), B32 (137), B32 (138), B32 (139),
 246   B32 (140), B32 (141), B32 (142), B32 (143),
 247   B32 (144), B32 (145), B32 (146), B32 (147),
 248   B32 (148), B32 (149), B32 (150), B32 (151),
 249   B32 (152), B32 (153), B32 (154), B32 (155),
 250   B32 (156), B32 (157), B32 (158), B32 (159),
 251   B32 (160), B32 (161), B32 (162), B32 (163),
 252   B32 (132), B32 (165), B32 (166), B32 (167),
 253   B32 (168), B32 (169), B32 (170), B32 (171),
 254   B32 (172), B32 (173), B32 (174), B32 (175),
 255   B32 (176), B32 (177), B32 (178), B32 (179),
 256   B32 (180), B32 (181), B32 (182), B32 (183),
 257   B32 (184), B32 (185), B32 (186), B32 (187),
 258   B32 (188), B32 (189), B32 (190), B32 (191),
 259   B32 (192), B32 (193), B32 (194), B32 (195),
 260   B32 (196), B32 (197), B32 (198), B32 (199),
 261   B32 (200), B32 (201), B32 (202), B32 (203),
 262   B32 (204), B32 (205), B32 (206), B32 (207),
 263   B32 (208), B32 (209), B32 (210), B32 (211),
 264   B32 (212), B32 (213), B32 (214), B32 (215),
 265   B32 (216), B32 (217), B32 (218), B32 (219),
 266   B32 (220), B32 (221), B32 (222), B32 (223),
 267   B32 (224), B32 (225), B32 (226), B32 (227),
 268   B32 (228), B32 (229), B32 (230), B32 (231),
 269   B32 (232), B32 (233), B32 (234), B32 (235),
 270   B32 (236), B32 (237), B32 (238), B32 (239),
 271   B32 (240), B32 (241), B32 (242), B32 (243),
 272   B32 (244), B32 (245), B32 (246), B32 (247),
 273   B32 (248), B32 (249), B32 (250), B32 (251),
 274   B32 (252), B32 (253), B32 (254), B32 (255)
 275 };
 276
 277 #if UCHAR_MAX == 255
 278 # define uchar_in_range(c) true
 279 #else
 280 # define uchar_in_range(c) ((c) <= 255)
 281 #endif
 282
 283 /* Return true if CH is a character from the Base32 alphabet, and
 284    false otherwise.  Note that '=' is padding and not considered to be
 285    part of the alphabet.  */
 286 bool
 287 isbase32 (char ch)
 288 {
 289   return uchar_in_range (to_uchar (ch)) && 0 <= b32[to_uchar (ch)];
 290 }
 291
 292 /* Initialize decode-context buffer, CTX.  */
 293 void
 294 base32_decode_ctx_init (struct base32_decode_context *ctx)
 295 {
 296   ctx->i = 0;
 297 }
 298
 299 /* If CTX->i is 0 or 8, there are eight or more bytes in [*IN..IN_END), and
 300    none of those eight is a newline, then return *IN.  Otherwise, copy up to
 301    4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
 302    index CTX->i and setting CTX->i to reflect the number of bytes copied,
 303    and return CTX->buf.  In either case, advance *IN to point to the byte
 304    after the last one processed, and set *N_NON_NEWLINE to the number of
 305    verified non-newline bytes accessible through the returned pointer.  */
 306 static inline char *
 307 get_8 (struct base32_decode_context *ctx,
 308        char const *restrict *in, char const *restrict in_end,
 309        size_t *n_non_newline)
 310 {
 311   if (ctx->i == 8)
 312     ctx->i = 0;
 313
 314   if (ctx->i == 0)
 315     {
 316       char const *t = *in;
 317       if (8 <= in_end - *in && memchr (t, '\n', 8) == NULL)
 318         {
 319           /* This is the common case: no newline.  */
 320           *in += 8;
 321           *n_non_newline = 8;
 322           return (char *) t;
 323         }
 324     }
 325
 326   {
 327     /* Copy non-newline bytes into BUF.  */
 328     char const *p = *in;
 329     while (p < in_end)
 330       {
 331         char c = *p++;
 332         if (c != '\n')
 333           {
 334             ctx->buf[ctx->i++] = c;
 335             if (ctx->i == 8)
 336               break;
 337           }
 338       }
 339
 340     *in = p;
 341     *n_non_newline = ctx->i;
 342     return ctx->buf;
 343   }
 344 }
 345
 346 #define return_false                            \
 347   do                                            \
 348     {                                           \
 349       *outp = out;                              \
 350       return false;                             \
 351     }                                           \
 352   while (false)
 353
 354 /* Decode eight bytes of base32-encoded data, IN, of length INLEN
 355    into the output buffer, *OUT, of size *OUTLEN bytes.  Return true if
 356    decoding is successful, false otherwise.  If *OUTLEN is too small,
 357    as many bytes as possible are written to *OUT.  On return, advance
 358    *OUT to point to the byte after the last one written, and decrement
 359    *OUTLEN to reflect the number of bytes remaining in *OUT.  */
 360 static inline bool
 361 decode_8 (char const *restrict in, size_t inlen,
 362           char *restrict *outp, size_t *outleft)
 363 {
 364   char *out = *outp;
 365   if (inlen < 8)
 366     return false;
 367
 368   if (!isbase32 (in[0]) || !isbase32 (in[1]) )
 369     return false;
 370
 371   if (*outleft)
 372     {
 373       *out++ = ((b32[to_uchar (in[0])] << 3)
 374                 | (b32[to_uchar (in[1])] >> 2));
 375       --*outleft;
 376     }
 377
 378   if (in[2] == '=')
 379     {
 380       if (in[3] != '=' || in[4] != '=' || in[5] != '='
 381           || in[6] != '=' || in[7] != '=')
 382         return_false;
 383     }
 384   else
 385     {
 386       if (!isbase32 (in[2]) || !isbase32 (in[3]))
 387         return_false;
 388
 389       if (*outleft)
 390         {
 391           *out++ = ((b32[to_uchar (in[1])] << 6)
 392                     | (b32[to_uchar (in[2])] << 1)
 393                     | (b32[to_uchar (in[3])] >> 4));
 394           --*outleft;
 395         }
 396
 397       if (in[4] == '=')
 398         {
 399           if (in[5] != '=' || in[6] != '=' || in[7] != '=')
 400             return_false;
 401         }
 402       else
 403         {
 404           if (!isbase32 (in[4]))
 405             return_false;
 406
 407           if (*outleft)
 408             {
 409               *out++ = ((b32[to_uchar (in[3])] << 4)
 410                         | (b32[to_uchar (in[4])] >> 1));
 411               --*outleft;
 412             }
 413
 414           if (in[5] == '=')
 415             {
 416               if (in[6] != '=' || in[7] != '=')
 417                 return_false;
 418             }
 419           else
 420             {
 421               if (!isbase32 (in[5]) || !isbase32 (in[6]))
 422                 return_false;
 423
 424               if (*outleft)
 425                 {
 426                   *out++ = ((b32[to_uchar (in[4])] << 7)
 427                             | (b32[to_uchar (in[5])] << 2)
 428                             | (b32[to_uchar (in[6])] >> 3));
 429                   --*outleft;
 430                 }
 431
 432               if (in[7] != '=')
 433                 {
 434                   if (!isbase32 (in[7]))
 435                     return_false;
 436
 437                   if (*outleft)
 438                     {
 439                       *out++ = ((b32[to_uchar (in[6])] << 5)
 440                                 | (b32[to_uchar (in[7])]));
 441                       --*outleft;
 442                     }
 443                 }
 444             }
 445         }
 446     }
 447
 448   *outp = out;
 449   return true;
 450 }
 451
 452 /* Decode base32-encoded input array IN of length INLEN to output array
 453    OUT that can hold *OUTLEN bytes.  The input data may be interspersed
 454    with newlines.  Return true if decoding was successful, i.e. if the
 455    input was valid base32 data, false otherwise.  If *OUTLEN is too
 456    small, as many bytes as possible will be written to OUT.  On return,
 457    *OUTLEN holds the length of decoded bytes in OUT.  Note that as soon
 458    as any non-alphabet, non-newline character is encountered, decoding
 459    is stopped and false is returned.  If INLEN is zero, then process
 460    only whatever data is stored in CTX.
 461
 462    Initially, CTX must have been initialized via base32_decode_ctx_init.
 463    Subsequent calls to this function must reuse whatever state is recorded
 464    in that buffer.  It is necessary for when a octuple of base32 input
 465    bytes spans two input buffers.
 466
 467    If CTX is NULL then newlines are treated as garbage and the input
 468    buffer is processed as a unit.  */
 469
 470 bool
 471 base32_decode_ctx (struct base32_decode_context *ctx,
 472                    const char *restrict in, size_t inlen,
 473                    char *restrict out, size_t *outlen)
 474 {
 475   size_t outleft = *outlen;
 476   bool ignore_newlines = ctx != NULL;
 477   bool flush_ctx = false;
 478   unsigned int ctx_i = 0;
 479
 480   if (ignore_newlines)
 481     {
 482       ctx_i = ctx->i;
 483       flush_ctx = inlen == 0;
 484     }
 485
 486
 487   while (true)
 488     {
 489       size_t outleft_save = outleft;
 490       if (ctx_i == 0 && !flush_ctx)
 491         {
 492           while (true)
 493             {
 494               /* Save a copy of outleft, in case we need to re-parse this
 495                  block of four bytes.  */
 496               outleft_save = outleft;
 497               if (!decode_8 (in, inlen, &out, &outleft))
 498                 break;
 499
 500               in += 8;
 501               inlen -= 8;
 502             }
 503         }
 504
 505       if (inlen == 0 && !flush_ctx)
 506         break;
 507
 508       /* Handle the common case of 72-byte wrapped lines.
 509          This also handles any other multiple-of-8-byte wrapping.  */
 510       if (inlen && *in == '\n' && ignore_newlines)
 511         {
 512           ++in;
 513           --inlen;
 514           continue;
 515         }
 516
 517       /* Restore OUT and OUTLEFT.  */
 518       out -= outleft_save - outleft;
 519       outleft = outleft_save;
 520
 521       {
 522         char const *in_end = in + inlen;
 523         char const *non_nl;
 524
 525         if (ignore_newlines)
 526           non_nl = get_8 (ctx, &in, in_end, &inlen);
 527         else
 528           non_nl = in;  /* Might have nl in this case. */
 529
 530         /* If the input is empty or consists solely of newlines (0 non-newlines),
 531            then we're done.  Likewise if there are fewer than 8 bytes when not
 532            flushing context and not treating newlines as garbage.  */
 533         if (inlen == 0 || (inlen < 8 && !flush_ctx && ignore_newlines))
 534           {
 535             inlen = 0;
 536             break;
 537           }
 538         if (!decode_8 (non_nl, inlen, &out, &outleft))
 539           break;
 540
 541         inlen = in_end - in;
 542       }
 543     }
 544
 545   *outlen -= outleft;
 546
 547   return inlen == 0;
 548 }
 549
 550 /* Allocate an output buffer in *OUT, and decode the base32 encoded
 551    data stored in IN of size INLEN to the *OUT buffer.  On return, the
 552    size of the decoded data is stored in *OUTLEN.  OUTLEN may be NULL,
 553    if the caller is not interested in the decoded length.  *OUT may be
 554    NULL to indicate an out of memory error, in which case *OUTLEN
 555    contains the size of the memory block needed.  The function returns
 556    true on successful decoding and memory allocation errors.  (Use the
 557    *OUT and *OUTLEN parameters to differentiate between successful
 558    decoding and memory error.)  The function returns false if the
 559    input was invalid, in which case *OUT is NULL and *OUTLEN is
 560    undefined. */
 561 bool
 562 base32_decode_alloc_ctx (struct base32_decode_context *ctx,
 563                          const char *in, size_t inlen, char **out,
 564                          size_t *outlen)
 565 {
 566   /* This may allocate a few bytes too many, depending on input,
 567      but it's not worth the extra CPU time to compute the exact size.
 568      The exact size is 5 * inlen / 8, minus one or more bytes if the
 569      input is padded with one or more "=".
 570      Dividing before multiplying avoids the possibility of overflow.  */
 571   size_t needlen = 5 * (inlen / 8) + 5;
 572
 573   *out = malloc (needlen);
 574   if (!*out)
 575     return true;
 576
 577   if (!base32_decode_ctx (ctx, in, inlen, *out, &needlen))
 578     {
 579       free (*out);
 580       *out = NULL;
 581       return false;
 582     }
 583
 584   if (outlen)
 585     *outlen = needlen;
 586
 587   return true;
 588 }