1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998-2002, 2004-2011 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) msgid
40 # define SIZE_MAX ((size_t) -1)
43 #define INT_BITS (sizeof (int) * CHAR_BIT)
45 /* The attribute __pure__ was added in gcc 2.96. */
46 #undef _GL_ATTRIBUTE_PURE
47 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
48 # define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
50 # define _GL_ATTRIBUTE_PURE /* empty */
53 struct quoting_options
55 /* Basic quoting style. */
56 enum quoting_style style;
58 /* Additional flags. Bitwise combination of enum quoting_flags. */
61 /* Quote the characters indicated by this bit vector even if the
62 quoting style would not normally require them to be quoted. */
63 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
65 /* The left quote for custom_quoting_style. */
66 char const *left_quote;
68 /* The right quote for custom_quoting_style. */
69 char const *right_quote;
72 /* Names of quoting styles. */
73 char const *const quoting_style_args[] =
86 /* Correspondences to quoting style names. */
87 enum quoting_style const quoting_style_vals[] =
89 literal_quoting_style,
91 shell_always_quoting_style,
93 c_maybe_quoting_style,
99 /* The default quoting options. */
100 static struct quoting_options default_quoting_options;
102 /* Allocate a new set of quoting options, with contents initially identical
103 to O if O is not null, or to the default if O is null.
104 It is the caller's responsibility to free the result. */
105 struct quoting_options *
106 clone_quoting_options (struct quoting_options *o)
109 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
115 /* Get the value of O's quoting style. If O is null, use the default. */
116 enum quoting_style _GL_ATTRIBUTE_PURE
117 get_quoting_style (struct quoting_options *o)
119 return (o ? o : &default_quoting_options)->style;
122 /* In O (or in the default if O is null),
123 set the value of the quoting style to S. */
125 set_quoting_style (struct quoting_options *o, enum quoting_style s)
127 (o ? o : &default_quoting_options)->style = s;
130 /* In O (or in the default if O is null),
131 set the value of the quoting options for character C to I.
132 Return the old value. Currently, the only values defined for I are
133 0 (the default) and 1 (which means to quote the character even if
134 it would not otherwise be quoted). */
136 set_char_quoting (struct quoting_options *o, char c, int i)
138 unsigned char uc = c;
140 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
141 int shift = uc % INT_BITS;
142 int r = (*p >> shift) & 1;
143 *p ^= ((i & 1) ^ r) << shift;
147 /* In O (or in the default if O is null),
148 set the value of the quoting options flag to I, which can be a
149 bitwise combination of enum quoting_flags, or 0 for default
150 behavior. Return the old value. */
152 set_quoting_flags (struct quoting_options *o, int i)
156 o = &default_quoting_options;
163 set_custom_quoting (struct quoting_options *o,
164 char const *left_quote, char const *right_quote)
167 o = &default_quoting_options;
168 o->style = custom_quoting_style;
169 if (!left_quote || !right_quote)
171 o->left_quote = left_quote;
172 o->right_quote = right_quote;
175 /* Return quoting options for STYLE, with no extra quoting. */
176 static struct quoting_options
177 quoting_options_from_style (enum quoting_style style)
179 struct quoting_options o;
182 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
186 /* MSGID approximates a quotation mark. Return its translation if it
187 has one; otherwise, return either it or "\"", depending on S. */
189 gettext_quote (char const *msgid, enum quoting_style s)
191 char const *translation = _(msgid);
192 if (translation == msgid && s == clocale_quoting_style)
197 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
198 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
199 QUOTE_THESE_TOO to control quoting.
200 Terminate the output with a null character, and return the written
201 size of the output, not counting the terminating null.
202 If BUFFERSIZE is too small to store the output string, return the
203 value that would have been returned had BUFFERSIZE been large enough.
204 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
206 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
207 ARGSIZE, O), except it breaks O into its component pieces and is
208 not careful about errno. */
211 quotearg_buffer_restyled (char *buffer, size_t buffersize,
212 char const *arg, size_t argsize,
213 enum quoting_style quoting_style, int flags,
214 unsigned int const *quote_these_too,
215 char const *left_quote,
216 char const *right_quote)
220 char const *quote_string = 0;
221 size_t quote_string_len = 0;
222 bool backslash_escapes = false;
223 bool unibyte_locale = MB_CUR_MAX == 1;
224 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
229 if (len < buffersize) \
235 switch (quoting_style)
237 case c_maybe_quoting_style:
238 quoting_style = c_quoting_style;
239 elide_outer_quotes = true;
241 case c_quoting_style:
242 if (!elide_outer_quotes)
244 backslash_escapes = true;
246 quote_string_len = 1;
249 case escape_quoting_style:
250 backslash_escapes = true;
251 elide_outer_quotes = false;
254 case locale_quoting_style:
255 case clocale_quoting_style:
256 case custom_quoting_style:
258 if (quoting_style != custom_quoting_style)
261 Get translations for open and closing quotation marks.
263 The message catalog should translate "`" to a left
264 quotation mark suitable for the locale, and similarly for
265 "'". If the catalog has no translation,
266 locale_quoting_style quotes `like this', and
267 clocale_quoting_style quotes "like this".
269 For example, an American English Unicode locale should
270 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
271 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
272 MARK). A British English Unicode locale should instead
273 translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
274 and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
276 If you don't know what to put here, please see
277 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
278 and use glyphs suitable for your language. */
279 left_quote = gettext_quote (N_("`"), quoting_style);
280 right_quote = gettext_quote (N_("'"), quoting_style);
282 if (!elide_outer_quotes)
283 for (quote_string = left_quote; *quote_string; quote_string++)
284 STORE (*quote_string);
285 backslash_escapes = true;
286 quote_string = right_quote;
287 quote_string_len = strlen (quote_string);
291 case shell_quoting_style:
292 quoting_style = shell_always_quoting_style;
293 elide_outer_quotes = true;
295 case shell_always_quoting_style:
296 if (!elide_outer_quotes)
299 quote_string_len = 1;
302 case literal_quoting_style:
303 elide_outer_quotes = false;
310 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
314 bool is_right_quote = false;
316 if (backslash_escapes
318 && i + quote_string_len <= argsize
319 && memcmp (arg + i, quote_string, quote_string_len) == 0)
321 if (elide_outer_quotes)
322 goto force_outer_quoting_style;
323 is_right_quote = true;
330 if (backslash_escapes)
332 if (elide_outer_quotes)
333 goto force_outer_quoting_style;
335 /* If quote_string were to begin with digits, we'd need to
336 test for the end of the arg as well. However, it's
337 hard to imagine any locale that would use digits in
338 quotes, and set_custom_quoting is documented not to
340 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
346 /* We don't have to worry that this last '0' will be
347 backslash-escaped because, again, quote_string should
348 not start with it and because quote_these_too is
349 documented as not accepting it. */
351 else if (flags & QA_ELIDE_NULL_BYTES)
356 switch (quoting_style)
358 case shell_always_quoting_style:
359 if (elide_outer_quotes)
360 goto force_outer_quoting_style;
363 case c_quoting_style:
364 if ((flags & QA_SPLIT_TRIGRAPHS)
365 && i + 2 < argsize && arg[i + 1] == '?')
369 case '(': case ')': case '-': case '/':
370 case '<': case '=': case '>':
371 /* Escape the second '?' in what would otherwise be
373 if (elide_outer_quotes)
374 goto force_outer_quoting_style;
393 case '\a': esc = 'a'; goto c_escape;
394 case '\b': esc = 'b'; goto c_escape;
395 case '\f': esc = 'f'; goto c_escape;
396 case '\n': esc = 'n'; goto c_and_shell_escape;
397 case '\r': esc = 'r'; goto c_and_shell_escape;
398 case '\t': esc = 't'; goto c_and_shell_escape;
399 case '\v': esc = 'v'; goto c_escape;
401 /* No need to escape the escape if we are trying to elide
402 outer quotes and nothing else is problematic. */
403 if (backslash_escapes && elide_outer_quotes && quote_string_len)
407 if (quoting_style == shell_always_quoting_style
408 && elide_outer_quotes)
409 goto force_outer_quoting_style;
412 if (backslash_escapes)
419 case '{': case '}': /* sometimes special if isolated */
420 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
428 case '!': /* special in bash */
429 case '"': case '$': case '&':
430 case '(': case ')': case '*': case ';':
432 case '=': /* sometimes special in 0th or (with "set -k") later args */
434 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
436 /* A shell special character. In theory, '$' and '`' could
437 be the first bytes of multibyte characters, which means
438 we should check them with mbrtowc, but in practice this
439 doesn't happen so it's not worth worrying about. */
440 if (quoting_style == shell_always_quoting_style
441 && elide_outer_quotes)
442 goto force_outer_quoting_style;
446 if (quoting_style == shell_always_quoting_style)
448 if (elide_outer_quotes)
449 goto force_outer_quoting_style;
456 case '%': case '+': case ',': case '-': case '.': case '/':
457 case '0': case '1': case '2': case '3': case '4': case '5':
458 case '6': case '7': case '8': case '9': case ':':
459 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
460 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
461 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
462 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
463 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
464 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
465 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
466 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
467 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
468 /* These characters don't cause problems, no matter what the
469 quoting style is. They cannot start multibyte sequences.
470 A digit or a special letter would cause trouble if it
471 appeared at the beginning of quote_string because we'd then
472 escape by prepending a backslash. However, it's hard to
473 imagine any locale that would use digits or letters as
474 quotes, and set_custom_quoting is documented not to accept
475 them. Also, a digit or a special letter would cause
476 trouble if it appeared in quote_these_too, but that's also
477 documented as not accepting them. */
481 /* If we have a multibyte sequence, copy it until we reach
482 its end, find an error, or come back to the initial shift
483 state. For C-like styles, if the sequence has
484 unprintable characters, escape the whole sequence, since
485 we can't easily escape single characters within it. */
487 /* Length of multibyte sequence found so far. */
495 printable = isprint (c) != 0;
500 memset (&mbstate, 0, sizeof mbstate);
504 if (argsize == SIZE_MAX)
505 argsize = strlen (arg);
510 size_t bytes = mbrtowc (&w, &arg[i + m],
511 argsize - (i + m), &mbstate);
514 else if (bytes == (size_t) -1)
519 else if (bytes == (size_t) -2)
522 while (i + m < argsize && arg[i + m])
528 /* Work around a bug with older shells that "see" a '\'
529 that is really the 2nd byte of a multibyte character.
530 In practice the problem is limited to ASCII
531 chars >= '@' that are shell special chars. */
532 if ('[' == 0x5b && elide_outer_quotes
533 && quoting_style == shell_always_quoting_style)
536 for (j = 1; j < bytes; j++)
537 switch (arg[i + m + j])
539 case '[': case '\\': case '^':
541 goto force_outer_quoting_style;
553 while (! mbsinit (&mbstate));
556 if (1 < m || (backslash_escapes && ! printable))
558 /* Output a multibyte sequence, or an escaped
559 unprintable unibyte character. */
564 if (backslash_escapes && ! printable)
566 if (elide_outer_quotes)
567 goto force_outer_quoting_style;
569 STORE ('0' + (c >> 6));
570 STORE ('0' + ((c >> 3) & 7));
573 else if (is_right_quote)
576 is_right_quote = false;
589 if (! ((backslash_escapes || elide_outer_quotes)
591 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
596 if (elide_outer_quotes)
597 goto force_outer_quoting_style;
604 if (len == 0 && quoting_style == shell_always_quoting_style
605 && elide_outer_quotes)
606 goto force_outer_quoting_style;
608 if (quote_string && !elide_outer_quotes)
609 for (; *quote_string; quote_string++)
610 STORE (*quote_string);
612 if (len < buffersize)
616 force_outer_quoting_style:
617 /* Don't reuse quote_these_too, since the addition of outer quotes
618 sufficiently quotes the specified characters. */
619 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
621 flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
622 left_quote, right_quote);
625 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
626 argument ARG (of size ARGSIZE), using O to control quoting.
627 If O is null, use the default.
628 Terminate the output with a null character, and return the written
629 size of the output, not counting the terminating null.
630 If BUFFERSIZE is too small to store the output string, return the
631 value that would have been returned had BUFFERSIZE been large enough.
632 If ARGSIZE is SIZE_MAX, use the string length of the argument for
635 quotearg_buffer (char *buffer, size_t buffersize,
636 char const *arg, size_t argsize,
637 struct quoting_options const *o)
639 struct quoting_options const *p = o ? o : &default_quoting_options;
641 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
642 p->style, p->flags, p->quote_these_too,
643 p->left_quote, p->right_quote);
648 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
650 quotearg_alloc (char const *arg, size_t argsize,
651 struct quoting_options const *o)
653 return quotearg_alloc_mem (arg, argsize, NULL, o);
656 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
657 allocated storage containing the quoted string, and store the
658 resulting size into *SIZE, if non-NULL. The result can contain
659 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
660 NULL, and set_quoting_flags has not set the null byte elision
663 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
664 struct quoting_options const *o)
666 struct quoting_options const *p = o ? o : &default_quoting_options;
668 /* Elide embedded null bytes if we can't return a size. */
669 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
670 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
671 flags, p->quote_these_too,
674 char *buf = xcharalloc (bufsize);
675 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
677 p->left_quote, p->right_quote);
684 /* A storage slot with size and pointer to a value. */
691 /* Preallocate a slot 0 buffer, so that the caller can always quote
692 one small component of a "memory exhausted" message in slot 0. */
693 static char slot0[256];
694 static unsigned int nslots = 1;
695 static struct slotvec slotvec0 = {sizeof slot0, slot0};
696 static struct slotvec *slotvec = &slotvec0;
701 struct slotvec *sv = slotvec;
703 for (i = 1; i < nslots; i++)
705 if (sv[0].val != slot0)
708 slotvec0.size = sizeof slot0;
709 slotvec0.val = slot0;
719 /* Use storage slot N to return a quoted version of argument ARG.
720 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
721 null-terminated string.
722 OPTIONS specifies the quoting options.
723 The returned value points to static storage that can be
724 reused by the next call to this function with the same value of N.
725 N must be nonnegative. N is deliberately declared with type "int"
726 to allow for future extensions (using negative values). */
728 quotearg_n_options (int n, char const *arg, size_t argsize,
729 struct quoting_options const *options)
734 struct slotvec *sv = slotvec;
741 /* FIXME: technically, the type of n1 should be `unsigned int',
742 but that evokes an unsuppressible warning from gcc-4.0.1 and
743 older. If gcc ever provides an option to suppress that warning,
744 revert to the original type, so that the test in xalloc_oversized
745 is once again performed only at compile time. */
747 bool preallocated = (sv == &slotvec0);
749 if (xalloc_oversized (n1, sizeof *sv))
752 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
755 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
760 size_t size = sv[n].size;
761 char *val = sv[n].val;
762 /* Elide embedded null bytes since we don't return a size. */
763 int flags = options->flags | QA_ELIDE_NULL_BYTES;
764 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
765 options->style, flags,
766 options->quote_these_too,
768 options->right_quote);
772 sv[n].size = size = qsize + 1;
775 sv[n].val = val = xcharalloc (size);
776 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
777 flags, options->quote_these_too,
779 options->right_quote);
788 quotearg_n (int n, char const *arg)
790 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
794 quotearg_n_mem (int n, char const *arg, size_t argsize)
796 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
800 quotearg (char const *arg)
802 return quotearg_n (0, arg);
806 quotearg_mem (char const *arg, size_t argsize)
808 return quotearg_n_mem (0, arg, argsize);
812 quotearg_n_style (int n, enum quoting_style s, char const *arg)
814 struct quoting_options const o = quoting_options_from_style (s);
815 return quotearg_n_options (n, arg, SIZE_MAX, &o);
819 quotearg_n_style_mem (int n, enum quoting_style s,
820 char const *arg, size_t argsize)
822 struct quoting_options const o = quoting_options_from_style (s);
823 return quotearg_n_options (n, arg, argsize, &o);
827 quotearg_style (enum quoting_style s, char const *arg)
829 return quotearg_n_style (0, s, arg);
833 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
835 return quotearg_n_style_mem (0, s, arg, argsize);
839 quotearg_char_mem (char const *arg, size_t argsize, char ch)
841 struct quoting_options options;
842 options = default_quoting_options;
843 set_char_quoting (&options, ch, 1);
844 return quotearg_n_options (0, arg, argsize, &options);
848 quotearg_char (char const *arg, char ch)
850 return quotearg_char_mem (arg, SIZE_MAX, ch);
854 quotearg_colon (char const *arg)
856 return quotearg_char (arg, ':');
860 quotearg_colon_mem (char const *arg, size_t argsize)
862 return quotearg_char_mem (arg, argsize, ':');
866 quotearg_n_custom (int n, char const *left_quote,
867 char const *right_quote, char const *arg)
869 return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
874 quotearg_n_custom_mem (int n, char const *left_quote,
875 char const *right_quote,
876 char const *arg, size_t argsize)
878 struct quoting_options o = default_quoting_options;
879 set_custom_quoting (&o, left_quote, right_quote);
880 return quotearg_n_options (n, arg, argsize, &o);
884 quotearg_custom (char const *left_quote, char const *right_quote,
887 return quotearg_n_custom (0, left_quote, right_quote, arg);
891 quotearg_custom_mem (char const *left_quote, char const *right_quote,
892 char const *arg, size_t argsize)
894 return quotearg_n_custom_mem (0, left_quote, right_quote, arg,