1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
58 # define SIZE_MAX ((size_t) -1)
61 #define INT_BITS (sizeof (int) * CHAR_BIT)
63 struct quoting_options
65 /* Basic quoting style. */
66 enum quoting_style style;
68 /* Additional flags. Behavior is altered according to these bits:
69 0x01: Elide null bytes rather than embed them unquoted.
73 /* Quote the characters indicated by this bit vector even if the
74 quoting style would not normally require them to be quoted. */
75 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
78 /* Names of quoting styles. */
79 char const *const quoting_style_args[] =
91 /* Correspondences to quoting style names. */
92 enum quoting_style const quoting_style_vals[] =
94 literal_quoting_style,
96 shell_always_quoting_style,
100 clocale_quoting_style
103 /* The default quoting options. */
104 static struct quoting_options default_quoting_options;
106 /* Allocate a new set of quoting options, with contents initially identical
107 to O if O is not null, or to the default if O is null.
108 It is the caller's responsibility to free the result. */
109 struct quoting_options *
110 clone_quoting_options (struct quoting_options *o)
113 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
119 /* Get the value of O's quoting style. If O is null, use the default. */
121 get_quoting_style (struct quoting_options *o)
123 return (o ? o : &default_quoting_options)->style;
126 /* In O (or in the default if O is null),
127 set the value of the quoting style to S. */
129 set_quoting_style (struct quoting_options *o, enum quoting_style s)
131 (o ? o : &default_quoting_options)->style = s;
134 /* In O (or in the default if O is null),
135 set the value of the quoting options for character C to I.
136 Return the old value. Currently, the only values defined for I are
137 0 (the default) and 1 (which means to quote the character even if
138 it would not otherwise be quoted). */
140 set_char_quoting (struct quoting_options *o, char c, int i)
142 unsigned char uc = c;
144 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
145 int shift = uc % INT_BITS;
146 int r = (*p >> shift) & 1;
147 *p ^= ((i & 1) ^ r) << shift;
151 /* In O (or in the default if O is null),
152 set the value of the quoting options flag to I.
153 Return the old value. Currently, the only values defined for I are
154 0 (the default) and 1 (which means to elide null bytes from styles
155 that would otherwise output them unquoted). */
157 set_quoting_flags (struct quoting_options *o, int i)
161 o = &default_quoting_options;
167 /* MSGID approximates a quotation mark. Return its translation if it
168 has one; otherwise, return either it or "\"", depending on S. */
170 gettext_quote (char const *msgid, enum quoting_style s)
172 char const *translation = _(msgid);
173 if (translation == msgid && s == clocale_quoting_style)
178 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
179 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and the
180 remaining part of O to control quoting.
181 Terminate the output with a null character, and return the written
182 size of the output, not counting the terminating null.
183 If BUFFERSIZE is too small to store the output string, return the
184 value that would have been returned had BUFFERSIZE been large enough.
185 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
187 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
188 ARGSIZE, O), except it uses QUOTING_STYLE and FLAGS instead of the
189 quoting style specified by O, and O may not be null. */
192 quotearg_buffer_restyled (char *buffer, size_t buffersize,
193 char const *arg, size_t argsize,
194 enum quoting_style quoting_style, int flags,
195 struct quoting_options const *o)
199 char const *quote_string = 0;
200 size_t quote_string_len = 0;
201 bool backslash_escapes = false;
202 bool unibyte_locale = MB_CUR_MAX == 1;
207 if (len < buffersize) \
213 switch (quoting_style)
215 case c_quoting_style:
217 backslash_escapes = true;
219 quote_string_len = 1;
222 case escape_quoting_style:
223 backslash_escapes = true;
226 case locale_quoting_style:
227 case clocale_quoting_style:
230 Get translations for open and closing quotation marks.
232 The message catalog should translate "`" to a left
233 quotation mark suitable for the locale, and similarly for
234 "'". If the catalog has no translation,
235 locale_quoting_style quotes `like this', and
236 clocale_quoting_style quotes "like this".
238 For example, an American English Unicode locale should
239 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
240 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
241 MARK). A British English Unicode locale should instead
242 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
243 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
245 If you don't know what to put here, please see
246 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
247 and use glyphs suitable for your language. */
249 char const *left = gettext_quote (N_("`"), quoting_style);
250 char const *right = gettext_quote (N_("'"), quoting_style);
251 for (quote_string = left; *quote_string; quote_string++)
252 STORE (*quote_string);
253 backslash_escapes = true;
254 quote_string = right;
255 quote_string_len = strlen (quote_string);
259 case shell_always_quoting_style:
262 quote_string_len = 1;
269 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
274 if (backslash_escapes
276 && i + quote_string_len <= argsize
277 && memcmp (arg + i, quote_string, quote_string_len) == 0)
284 if (backslash_escapes)
291 else if (flags & 0x1)
296 switch (quoting_style)
298 case shell_quoting_style:
299 goto use_shell_always_quoting_style;
301 case c_quoting_style:
302 if (i + 2 < argsize && arg[i + 1] == '?')
306 case '(': case ')': case '-': case '/':
307 case '<': case '=': case '>':
308 /* Escape the second '?' in what would otherwise be
327 case '\a': esc = 'a'; goto c_escape;
328 case '\b': esc = 'b'; goto c_escape;
329 case '\f': esc = 'f'; goto c_escape;
330 case '\n': esc = 'n'; goto c_and_shell_escape;
331 case '\r': esc = 'r'; goto c_and_shell_escape;
332 case '\t': esc = 't'; goto c_and_shell_escape;
333 case '\v': esc = 'v'; goto c_escape;
334 case '\\': esc = c; goto c_and_shell_escape;
337 if (quoting_style == shell_quoting_style)
338 goto use_shell_always_quoting_style;
340 if (backslash_escapes)
347 case '{': case '}': /* sometimes special if isolated */
348 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
356 case '!': /* special in bash */
357 case '"': case '$': case '&':
358 case '(': case ')': case '*': case ';':
360 case '=': /* sometimes special in 0th or (with "set -k") later args */
362 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
364 /* A shell special character. In theory, '$' and '`' could
365 be the first bytes of multibyte characters, which means
366 we should check them with mbrtowc, but in practice this
367 doesn't happen so it's not worth worrying about. */
368 if (quoting_style == shell_quoting_style)
369 goto use_shell_always_quoting_style;
373 switch (quoting_style)
375 case shell_quoting_style:
376 goto use_shell_always_quoting_style;
378 case shell_always_quoting_style:
389 case '%': case '+': case ',': case '-': case '.': case '/':
390 case '0': case '1': case '2': case '3': case '4': case '5':
391 case '6': case '7': case '8': case '9': case ':':
392 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
393 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
394 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
395 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
396 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
397 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
398 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
399 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
400 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
401 /* These characters don't cause problems, no matter what the
402 quoting style is. They cannot start multibyte sequences. */
406 /* If we have a multibyte sequence, copy it until we reach
407 its end, find an error, or come back to the initial shift
408 state. For C-like styles, if the sequence has
409 unprintable characters, escape the whole sequence, since
410 we can't easily escape single characters within it. */
412 /* Length of multibyte sequence found so far. */
420 printable = isprint (c) != 0;
425 memset (&mbstate, 0, sizeof mbstate);
429 if (argsize == SIZE_MAX)
430 argsize = strlen (arg);
435 size_t bytes = mbrtowc (&w, &arg[i + m],
436 argsize - (i + m), &mbstate);
439 else if (bytes == (size_t) -1)
444 else if (bytes == (size_t) -2)
447 while (i + m < argsize && arg[i + m])
453 /* Work around a bug with older shells that "see" a '\'
454 that is really the 2nd byte of a multibyte character.
455 In practice the problem is limited to ASCII
456 chars >= '@' that are shell special chars. */
457 if ('[' == 0x5b && quoting_style == shell_quoting_style)
460 for (j = 1; j < bytes; j++)
461 switch (arg[i + m + j])
463 case '[': case '\\': case '^':
465 goto use_shell_always_quoting_style;
477 while (! mbsinit (&mbstate));
480 if (1 < m || (backslash_escapes && ! printable))
482 /* Output a multibyte sequence, or an escaped
483 unprintable unibyte character. */
488 if (backslash_escapes && ! printable)
491 STORE ('0' + (c >> 6));
492 STORE ('0' + ((c >> 3) & 7));
506 if (! (backslash_escapes
507 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
517 if (i == 0 && quoting_style == shell_quoting_style)
518 goto use_shell_always_quoting_style;
521 for (; *quote_string; quote_string++)
522 STORE (*quote_string);
524 if (len < buffersize)
528 use_shell_always_quoting_style:
529 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
530 shell_always_quoting_style, flags, o);
533 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
534 argument ARG (of size ARGSIZE), using O to control quoting.
535 If O is null, use the default.
536 Terminate the output with a null character, and return the written
537 size of the output, not counting the terminating null.
538 If BUFFERSIZE is too small to store the output string, return the
539 value that would have been returned had BUFFERSIZE been large enough.
540 If ARGSIZE is SIZE_MAX, use the string length of the argument for
543 quotearg_buffer (char *buffer, size_t buffersize,
544 char const *arg, size_t argsize,
545 struct quoting_options const *o)
547 struct quoting_options const *p = o ? o : &default_quoting_options;
549 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
550 p->style, p->flags, p);
555 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
557 quotearg_alloc (char const *arg, size_t argsize,
558 struct quoting_options const *o)
560 return quotearg_alloc_mem (arg, argsize, NULL, o);
563 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
564 allocated storage containing the quoted string, and store the
565 resulting size into *SIZE, if non-NULL. The result can contain
566 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
567 NULL, and set_quoting_flags has not set the null byte elision
570 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
571 struct quoting_options const *o)
573 struct quoting_options const *p = o ? o : &default_quoting_options;
575 /* Elide embedded null bytes if we can't return a size. */
576 int flags = p->flags | (size ? 0 : 0x1);
577 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
579 char *buf = xcharalloc (bufsize);
580 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags, p);
587 /* A storage slot with size and pointer to a value. */
594 /* Preallocate a slot 0 buffer, so that the caller can always quote
595 one small component of a "memory exhausted" message in slot 0. */
596 static char slot0[256];
597 static unsigned int nslots = 1;
598 static struct slotvec slotvec0 = {sizeof slot0, slot0};
599 static struct slotvec *slotvec = &slotvec0;
604 struct slotvec *sv = slotvec;
606 for (i = 1; i < nslots; i++)
608 if (sv[0].val != slot0)
611 slotvec0.size = sizeof slot0;
612 slotvec0.val = slot0;
622 /* Use storage slot N to return a quoted version of argument ARG.
623 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
624 null-terminated string.
625 OPTIONS specifies the quoting options.
626 The returned value points to static storage that can be
627 reused by the next call to this function with the same value of N.
628 N must be nonnegative. N is deliberately declared with type "int"
629 to allow for future extensions (using negative values). */
631 quotearg_n_options (int n, char const *arg, size_t argsize,
632 struct quoting_options const *options)
637 struct slotvec *sv = slotvec;
644 /* FIXME: technically, the type of n1 should be `unsigned int',
645 but that evokes an unsuppressible warning from gcc-4.0.1 and
646 older. If gcc ever provides an option to suppress that warning,
647 revert to the original type, so that the test in xalloc_oversized
648 is once again performed only at compile time. */
650 bool preallocated = (sv == &slotvec0);
652 if (xalloc_oversized (n1, sizeof *sv))
655 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
658 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
663 size_t size = sv[n].size;
664 char *val = sv[n].val;
665 /* Elide embedded null bytes since we don't return a size. */
666 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
668 options->flags | 0x1, options);
672 sv[n].size = size = qsize + 1;
675 sv[n].val = val = xcharalloc (size);
676 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
677 options->flags | 0x1, options);
686 quotearg_n (int n, char const *arg)
688 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
692 quotearg_n_mem (int n, char const *arg, size_t argsize)
694 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
698 quotearg (char const *arg)
700 return quotearg_n (0, arg);
704 quotearg_mem (char const *arg, size_t argsize)
706 return quotearg_n_mem (0, arg, argsize);
709 /* Return quoting options for STYLE, with no extra quoting. */
710 static struct quoting_options
711 quoting_options_from_style (enum quoting_style style)
713 struct quoting_options o;
716 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
721 quotearg_n_style (int n, enum quoting_style s, char const *arg)
723 struct quoting_options const o = quoting_options_from_style (s);
724 return quotearg_n_options (n, arg, SIZE_MAX, &o);
728 quotearg_n_style_mem (int n, enum quoting_style s,
729 char const *arg, size_t argsize)
731 struct quoting_options const o = quoting_options_from_style (s);
732 return quotearg_n_options (n, arg, argsize, &o);
736 quotearg_style (enum quoting_style s, char const *arg)
738 return quotearg_n_style (0, s, arg);
742 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
744 return quotearg_n_style_mem (0, s, arg, argsize);
748 quotearg_char_mem (char const *arg, size_t argsize, char ch)
750 struct quoting_options options;
751 options = default_quoting_options;
752 set_char_quoting (&options, ch, 1);
753 return quotearg_n_options (0, arg, argsize, &options);
757 quotearg_char (char const *arg, char ch)
759 return quotearg_char_mem (arg, SIZE_MAX, ch);
763 quotearg_colon (char const *arg)
765 return quotearg_char (arg, ':');
769 quotearg_colon_mem (char const *arg, size_t argsize)
771 return quotearg_char_mem (arg, argsize, ':');