Fix out-of-memory handling of vasnprintf.
[gnulib.git] / lib / printf-parse.c
1 /* Formatted output to strings.
2    Copyright (C) 1999-2000, 2002-2003, 2006-2007 Free Software Foundation, Inc.
3
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License along
15    with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17
18 /* This file can be parametrized with the following macros:
19      CHAR_T             The element type of the format string.
20      CHAR_T_ONLY_ASCII  Set to 1 to enable verification that all characters
21                         in the format string are ASCII.
22      DIRECTIVE          Structure denoting a format directive.
23                         Depends on CHAR_T.
24      DIRECTIVES         Structure denoting the set of format directives of a
25                         format string.  Depends on CHAR_T.
26      PRINTF_PARSE       Function that parses a format string.
27                         Depends on CHAR_T.
28      STATIC             Set to 'static' to declare the function static.
29      ENABLE_UNISTDIO    Set to 1 to enable the unistdio extensions.  */
30
31 #ifndef PRINTF_PARSE
32 # include <config.h>
33 #endif
34
35 /* Specification.  */
36 #ifndef PRINTF_PARSE
37 # include "printf-parse.h"
38 #endif
39
40 /* Default parameters.  */
41 #ifndef PRINTF_PARSE
42 # define PRINTF_PARSE printf_parse
43 # define CHAR_T char
44 # define DIRECTIVE char_directive
45 # define DIRECTIVES char_directives
46 #endif
47
48 /* Get size_t, NULL.  */
49 #include <stddef.h>
50
51 /* Get intmax_t.  */
52 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
53 # if HAVE_STDINT_H_WITH_UINTMAX
54 #  include <stdint.h>
55 # endif
56 # if HAVE_INTTYPES_H_WITH_UINTMAX
57 #  include <inttypes.h>
58 # endif
59 #else
60 # include <stdint.h>
61 #endif
62
63 /* malloc(), realloc(), free().  */
64 #include <stdlib.h>
65
66 /* errno.  */
67 #include <errno.h>
68
69 /* Checked size_t computations.  */
70 #include "xsize.h"
71
72 #if CHAR_T_ONLY_ASCII
73 /* c_isascii().  */
74 # include "c-ctype.h"
75 #endif
76
77 #ifdef STATIC
78 STATIC
79 #endif
80 int
81 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
82 {
83   const CHAR_T *cp = format;            /* pointer into format */
84   size_t arg_posn = 0;          /* number of regular arguments consumed */
85   size_t d_allocated;                   /* allocated elements of d->dir */
86   size_t a_allocated;                   /* allocated elements of a->arg */
87   size_t max_width_length = 0;
88   size_t max_precision_length = 0;
89
90   d->count = 0;
91   d_allocated = 1;
92   d->dir = (DIRECTIVE *) malloc (d_allocated * sizeof (DIRECTIVE));
93   if (d->dir == NULL)
94     /* Out of memory.  */
95     goto out_of_memory_1;
96
97   a->count = 0;
98   a_allocated = 0;
99   a->arg = NULL;
100
101 #define REGISTER_ARG(_index_,_type_) \
102   {                                                                     \
103     size_t n = (_index_);                                               \
104     if (n >= a_allocated)                                               \
105       {                                                                 \
106         size_t memory_size;                                             \
107         argument *memory;                                               \
108                                                                         \
109         a_allocated = xtimes (a_allocated, 2);                          \
110         if (a_allocated <= n)                                           \
111           a_allocated = xsum (n, 1);                                    \
112         memory_size = xtimes (a_allocated, sizeof (argument));          \
113         if (size_overflow_p (memory_size))                              \
114           /* Overflow, would lead to out of memory.  */                 \
115           goto out_of_memory;                                           \
116         memory = (argument *) (a->arg                                   \
117                                ? realloc (a->arg, memory_size)          \
118                                : malloc (memory_size));                 \
119         if (memory == NULL)                                             \
120           /* Out of memory.  */                                         \
121           goto out_of_memory;                                           \
122         a->arg = memory;                                                \
123       }                                                                 \
124     while (a->count <= n)                                               \
125       a->arg[a->count++].type = TYPE_NONE;                              \
126     if (a->arg[n].type == TYPE_NONE)                                    \
127       a->arg[n].type = (_type_);                                        \
128     else if (a->arg[n].type != (_type_))                                \
129       /* Ambiguous type for positional argument.  */                    \
130       goto error;                                                       \
131   }
132
133   while (*cp != '\0')
134     {
135       CHAR_T c = *cp++;
136       if (c == '%')
137         {
138           size_t arg_index = ARG_NONE;
139           DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
140
141           /* Initialize the next directive.  */
142           dp->dir_start = cp - 1;
143           dp->flags = 0;
144           dp->width_start = NULL;
145           dp->width_end = NULL;
146           dp->width_arg_index = ARG_NONE;
147           dp->precision_start = NULL;
148           dp->precision_end = NULL;
149           dp->precision_arg_index = ARG_NONE;
150           dp->arg_index = ARG_NONE;
151
152           /* Test for positional argument.  */
153           if (*cp >= '0' && *cp <= '9')
154             {
155               const CHAR_T *np;
156
157               for (np = cp; *np >= '0' && *np <= '9'; np++)
158                 ;
159               if (*np == '$')
160                 {
161                   size_t n = 0;
162
163                   for (np = cp; *np >= '0' && *np <= '9'; np++)
164                     n = xsum (xtimes (n, 10), *np - '0');
165                   if (n == 0)
166                     /* Positional argument 0.  */
167                     goto error;
168                   if (size_overflow_p (n))
169                     /* n too large, would lead to out of memory later.  */
170                     goto error;
171                   arg_index = n - 1;
172                   cp = np + 1;
173                 }
174             }
175
176           /* Read the flags.  */
177           for (;;)
178             {
179               if (*cp == '\'')
180                 {
181                   dp->flags |= FLAG_GROUP;
182                   cp++;
183                 }
184               else if (*cp == '-')
185                 {
186                   dp->flags |= FLAG_LEFT;
187                   cp++;
188                 }
189               else if (*cp == '+')
190                 {
191                   dp->flags |= FLAG_SHOWSIGN;
192                   cp++;
193                 }
194               else if (*cp == ' ')
195                 {
196                   dp->flags |= FLAG_SPACE;
197                   cp++;
198                 }
199               else if (*cp == '#')
200                 {
201                   dp->flags |= FLAG_ALT;
202                   cp++;
203                 }
204               else if (*cp == '0')
205                 {
206                   dp->flags |= FLAG_ZERO;
207                   cp++;
208                 }
209               else
210                 break;
211             }
212
213           /* Parse the field width.  */
214           if (*cp == '*')
215             {
216               dp->width_start = cp;
217               cp++;
218               dp->width_end = cp;
219               if (max_width_length < 1)
220                 max_width_length = 1;
221
222               /* Test for positional argument.  */
223               if (*cp >= '0' && *cp <= '9')
224                 {
225                   const CHAR_T *np;
226
227                   for (np = cp; *np >= '0' && *np <= '9'; np++)
228                     ;
229                   if (*np == '$')
230                     {
231                       size_t n = 0;
232
233                       for (np = cp; *np >= '0' && *np <= '9'; np++)
234                         n = xsum (xtimes (n, 10), *np - '0');
235                       if (n == 0)
236                         /* Positional argument 0.  */
237                         goto error;
238                       if (size_overflow_p (n))
239                         /* n too large, would lead to out of memory later.  */
240                         goto error;
241                       dp->width_arg_index = n - 1;
242                       cp = np + 1;
243                     }
244                 }
245               if (dp->width_arg_index == ARG_NONE)
246                 {
247                   dp->width_arg_index = arg_posn++;
248                   if (dp->width_arg_index == ARG_NONE)
249                     /* arg_posn wrapped around.  */
250                     goto error;
251                 }
252               REGISTER_ARG (dp->width_arg_index, TYPE_INT);
253             }
254           else if (*cp >= '0' && *cp <= '9')
255             {
256               size_t width_length;
257
258               dp->width_start = cp;
259               for (; *cp >= '0' && *cp <= '9'; cp++)
260                 ;
261               dp->width_end = cp;
262               width_length = dp->width_end - dp->width_start;
263               if (max_width_length < width_length)
264                 max_width_length = width_length;
265             }
266
267           /* Parse the precision.  */
268           if (*cp == '.')
269             {
270               cp++;
271               if (*cp == '*')
272                 {
273                   dp->precision_start = cp - 1;
274                   cp++;
275                   dp->precision_end = cp;
276                   if (max_precision_length < 2)
277                     max_precision_length = 2;
278
279                   /* Test for positional argument.  */
280                   if (*cp >= '0' && *cp <= '9')
281                     {
282                       const CHAR_T *np;
283
284                       for (np = cp; *np >= '0' && *np <= '9'; np++)
285                         ;
286                       if (*np == '$')
287                         {
288                           size_t n = 0;
289
290                           for (np = cp; *np >= '0' && *np <= '9'; np++)
291                             n = xsum (xtimes (n, 10), *np - '0');
292                           if (n == 0)
293                             /* Positional argument 0.  */
294                             goto error;
295                           if (size_overflow_p (n))
296                             /* n too large, would lead to out of memory
297                                later.  */
298                             goto error;
299                           dp->precision_arg_index = n - 1;
300                           cp = np + 1;
301                         }
302                     }
303                   if (dp->precision_arg_index == ARG_NONE)
304                     {
305                       dp->precision_arg_index = arg_posn++;
306                       if (dp->precision_arg_index == ARG_NONE)
307                         /* arg_posn wrapped around.  */
308                         goto error;
309                     }
310                   REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
311                 }
312               else
313                 {
314                   size_t precision_length;
315
316                   dp->precision_start = cp - 1;
317                   for (; *cp >= '0' && *cp <= '9'; cp++)
318                     ;
319                   dp->precision_end = cp;
320                   precision_length = dp->precision_end - dp->precision_start;
321                   if (max_precision_length < precision_length)
322                     max_precision_length = precision_length;
323                 }
324             }
325
326           {
327             arg_type type;
328
329             /* Parse argument type/size specifiers.  */
330             {
331               int flags = 0;
332
333               for (;;)
334                 {
335                   if (*cp == 'h')
336                     {
337                       flags |= (1 << (flags & 1));
338                       cp++;
339                     }
340                   else if (*cp == 'L')
341                     {
342                       flags |= 4;
343                       cp++;
344                     }
345                   else if (*cp == 'l')
346                     {
347                       flags += 8;
348                       cp++;
349                     }
350                   else if (*cp == 'j')
351                     {
352                       if (sizeof (intmax_t) > sizeof (long))
353                         {
354                           /* intmax_t = long long */
355                           flags += 16;
356                         }
357                       else if (sizeof (intmax_t) > sizeof (int))
358                         {
359                           /* intmax_t = long */
360                           flags += 8;
361                         }
362                       cp++;
363                     }
364                   else if (*cp == 'z' || *cp == 'Z')
365                     {
366                       /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
367                          because the warning facility in gcc-2.95.2 understands
368                          only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
369                       if (sizeof (size_t) > sizeof (long))
370                         {
371                           /* size_t = long long */
372                           flags += 16;
373                         }
374                       else if (sizeof (size_t) > sizeof (int))
375                         {
376                           /* size_t = long */
377                           flags += 8;
378                         }
379                       cp++;
380                     }
381                   else if (*cp == 't')
382                     {
383                       if (sizeof (ptrdiff_t) > sizeof (long))
384                         {
385                           /* ptrdiff_t = long long */
386                           flags += 16;
387                         }
388                       else if (sizeof (ptrdiff_t) > sizeof (int))
389                         {
390                           /* ptrdiff_t = long */
391                           flags += 8;
392                         }
393                       cp++;
394                     }
395                   else
396                     break;
397                 }
398
399               /* Read the conversion character.  */
400               c = *cp++;
401               switch (c)
402                 {
403                 case 'd': case 'i':
404 #if HAVE_LONG_LONG_INT
405                   /* If 'long long' exists and is larger than 'long':  */
406                   if (flags >= 16 || (flags & 4))
407                     type = TYPE_LONGLONGINT;
408                   else
409 #endif
410                   /* If 'long long' exists and is the same as 'long', we parse
411                      "lld" into TYPE_LONGINT.  */
412                   if (flags >= 8)
413                     type = TYPE_LONGINT;
414                   else if (flags & 2)
415                     type = TYPE_SCHAR;
416                   else if (flags & 1)
417                     type = TYPE_SHORT;
418                   else
419                     type = TYPE_INT;
420                   break;
421                 case 'o': case 'u': case 'x': case 'X':
422 #if HAVE_LONG_LONG_INT
423                   /* If 'long long' exists and is larger than 'long':  */
424                   if (flags >= 16 || (flags & 4))
425                     type = TYPE_ULONGLONGINT;
426                   else
427 #endif
428                   /* If 'unsigned long long' exists and is the same as
429                      'unsigned long', we parse "llu" into TYPE_ULONGINT.  */
430                   if (flags >= 8)
431                     type = TYPE_ULONGINT;
432                   else if (flags & 2)
433                     type = TYPE_UCHAR;
434                   else if (flags & 1)
435                     type = TYPE_USHORT;
436                   else
437                     type = TYPE_UINT;
438                   break;
439                 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
440                 case 'a': case 'A':
441                   if (flags >= 16 || (flags & 4))
442                     type = TYPE_LONGDOUBLE;
443                   else
444                     type = TYPE_DOUBLE;
445                   break;
446                 case 'c':
447                   if (flags >= 8)
448 #if HAVE_WINT_T
449                     type = TYPE_WIDE_CHAR;
450 #else
451                     goto error;
452 #endif
453                   else
454                     type = TYPE_CHAR;
455                   break;
456 #if HAVE_WINT_T
457                 case 'C':
458                   type = TYPE_WIDE_CHAR;
459                   c = 'c';
460                   break;
461 #endif
462                 case 's':
463                   if (flags >= 8)
464 #if HAVE_WCHAR_T
465                     type = TYPE_WIDE_STRING;
466 #else
467                     goto error;
468 #endif
469                   else
470                     type = TYPE_STRING;
471                   break;
472 #if HAVE_WCHAR_T
473                 case 'S':
474                   type = TYPE_WIDE_STRING;
475                   c = 's';
476                   break;
477 #endif
478                 case 'p':
479                   type = TYPE_POINTER;
480                   break;
481                 case 'n':
482 #if HAVE_LONG_LONG_INT
483                   /* If 'long long' exists and is larger than 'long':  */
484                   if (flags >= 16 || (flags & 4))
485                     type = TYPE_COUNT_LONGLONGINT_POINTER;
486                   else
487 #endif
488                   /* If 'long long' exists and is the same as 'long', we parse
489                      "lln" into TYPE_COUNT_LONGINT_POINTER.  */
490                   if (flags >= 8)
491                     type = TYPE_COUNT_LONGINT_POINTER;
492                   else if (flags & 2)
493                     type = TYPE_COUNT_SCHAR_POINTER;
494                   else if (flags & 1)
495                     type = TYPE_COUNT_SHORT_POINTER;
496                   else
497                     type = TYPE_COUNT_INT_POINTER;
498                   break;
499 #if ENABLE_UNISTDIO
500                 /* The unistdio extensions.  */
501                 case 'U':
502                   if (flags >= 16)
503                     type = TYPE_U32_STRING;
504                   else if (flags >= 8)
505                     type = TYPE_U16_STRING;
506                   else
507                     type = TYPE_U8_STRING;
508                   break;
509 #endif
510                 case '%':
511                   type = TYPE_NONE;
512                   break;
513                 default:
514                   /* Unknown conversion character.  */
515                   goto error;
516                 }
517             }
518
519             if (type != TYPE_NONE)
520               {
521                 dp->arg_index = arg_index;
522                 if (dp->arg_index == ARG_NONE)
523                   {
524                     dp->arg_index = arg_posn++;
525                     if (dp->arg_index == ARG_NONE)
526                       /* arg_posn wrapped around.  */
527                       goto error;
528                   }
529                 REGISTER_ARG (dp->arg_index, type);
530               }
531             dp->conversion = c;
532             dp->dir_end = cp;
533           }
534
535           d->count++;
536           if (d->count >= d_allocated)
537             {
538               size_t memory_size;
539               DIRECTIVE *memory;
540
541               d_allocated = xtimes (d_allocated, 2);
542               memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
543               if (size_overflow_p (memory_size))
544                 /* Overflow, would lead to out of memory.  */
545                 goto out_of_memory;
546               memory = (DIRECTIVE *) realloc (d->dir, memory_size);
547               if (memory == NULL)
548                 /* Out of memory.  */
549                 goto out_of_memory;
550               d->dir = memory;
551             }
552         }
553 #if CHAR_T_ONLY_ASCII
554       else if (!c_isascii (c))
555         {
556           /* Non-ASCII character.  Not supported.  */
557           goto error;
558         }
559 #endif
560     }
561   d->dir[d->count].dir_start = cp;
562
563   d->max_width_length = max_width_length;
564   d->max_precision_length = max_precision_length;
565   return 0;
566
567 error:
568   if (a->arg)
569     free (a->arg);
570   if (d->dir)
571     free (d->dir);
572   errno = EINVAL;
573   return -1;
574
575 out_of_memory:
576   if (a->arg)
577     free (a->arg);
578   if (d->dir)
579     free (d->dir);
580 out_of_memory_1:
581   errno = ENOMEM;
582   return -1;
583 }
584
585 #undef PRINTF_PARSE
586 #undef DIRECTIVES
587 #undef DIRECTIVE
588 #undef CHAR_T_ONLY_ASCII
589 #undef CHAR_T