1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
58 # define SIZE_MAX ((size_t) -1)
61 #define INT_BITS (sizeof (int) * CHAR_BIT)
63 struct quoting_options
65 /* Basic quoting style. */
66 enum quoting_style style;
68 /* Additional flags. Bitwise combination of enum quoting_flags. */
71 /* Quote the characters indicated by this bit vector even if the
72 quoting style would not normally require them to be quoted. */
73 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
76 /* Names of quoting styles. */
77 char const *const quoting_style_args[] =
90 /* Correspondences to quoting style names. */
91 enum quoting_style const quoting_style_vals[] =
93 literal_quoting_style,
95 shell_always_quoting_style,
97 c_maybe_quoting_style,
100 clocale_quoting_style
103 /* The default quoting options. */
104 static struct quoting_options default_quoting_options;
106 /* Allocate a new set of quoting options, with contents initially identical
107 to O if O is not null, or to the default if O is null.
108 It is the caller's responsibility to free the result. */
109 struct quoting_options *
110 clone_quoting_options (struct quoting_options *o)
113 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
119 /* Get the value of O's quoting style. If O is null, use the default. */
121 get_quoting_style (struct quoting_options *o)
123 return (o ? o : &default_quoting_options)->style;
126 /* In O (or in the default if O is null),
127 set the value of the quoting style to S. */
129 set_quoting_style (struct quoting_options *o, enum quoting_style s)
131 (o ? o : &default_quoting_options)->style = s;
134 /* In O (or in the default if O is null),
135 set the value of the quoting options for character C to I.
136 Return the old value. Currently, the only values defined for I are
137 0 (the default) and 1 (which means to quote the character even if
138 it would not otherwise be quoted). */
140 set_char_quoting (struct quoting_options *o, char c, int i)
142 unsigned char uc = c;
144 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
145 int shift = uc % INT_BITS;
146 int r = (*p >> shift) & 1;
147 *p ^= ((i & 1) ^ r) << shift;
151 /* In O (or in the default if O is null),
152 set the value of the quoting options flag to I, which can be a
153 bitwise combination of enum quoting_flags, or 0 for default
154 behavior. Return the old value. */
156 set_quoting_flags (struct quoting_options *o, int i)
160 o = &default_quoting_options;
166 /* MSGID approximates a quotation mark. Return its translation if it
167 has one; otherwise, return either it or "\"", depending on S. */
169 gettext_quote (char const *msgid, enum quoting_style s)
171 char const *translation = _(msgid);
172 if (translation == msgid && s == clocale_quoting_style)
177 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
178 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and the
179 remaining part of O to control quoting.
180 Terminate the output with a null character, and return the written
181 size of the output, not counting the terminating null.
182 If BUFFERSIZE is too small to store the output string, return the
183 value that would have been returned had BUFFERSIZE been large enough.
184 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
186 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
187 ARGSIZE, O), except it uses QUOTING_STYLE and FLAGS instead of the
188 quoting style specified by O, and O may not be null. */
191 quotearg_buffer_restyled (char *buffer, size_t buffersize,
192 char const *arg, size_t argsize,
193 enum quoting_style quoting_style, int flags,
194 struct quoting_options const *o)
198 char const *quote_string = 0;
199 size_t quote_string_len = 0;
200 bool backslash_escapes = false;
201 bool unibyte_locale = MB_CUR_MAX == 1;
202 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
207 if (len < buffersize) \
213 switch (quoting_style)
215 case c_maybe_quoting_style:
216 quoting_style = c_quoting_style;
217 elide_outer_quotes = true;
219 case c_quoting_style:
220 if (!elide_outer_quotes)
222 backslash_escapes = true;
224 quote_string_len = 1;
227 case escape_quoting_style:
228 backslash_escapes = true;
229 elide_outer_quotes = false;
232 case locale_quoting_style:
233 case clocale_quoting_style:
236 Get translations for open and closing quotation marks.
238 The message catalog should translate "`" to a left
239 quotation mark suitable for the locale, and similarly for
240 "'". If the catalog has no translation,
241 locale_quoting_style quotes `like this', and
242 clocale_quoting_style quotes "like this".
244 For example, an American English Unicode locale should
245 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
246 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
247 MARK). A British English Unicode locale should instead
248 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
249 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
251 If you don't know what to put here, please see
252 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
253 and use glyphs suitable for your language. */
255 char const *left = gettext_quote (N_("`"), quoting_style);
256 char const *right = gettext_quote (N_("'"), quoting_style);
257 if (!elide_outer_quotes)
258 for (quote_string = left; *quote_string; quote_string++)
259 STORE (*quote_string);
260 backslash_escapes = true;
261 quote_string = right;
262 quote_string_len = strlen (quote_string);
266 case shell_quoting_style:
267 quoting_style = shell_always_quoting_style;
268 elide_outer_quotes = true;
270 case shell_always_quoting_style:
271 if (!elide_outer_quotes)
274 quote_string_len = 1;
277 case literal_quoting_style:
278 elide_outer_quotes = false;
285 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
290 if (backslash_escapes
292 && i + quote_string_len <= argsize
293 && memcmp (arg + i, quote_string, quote_string_len) == 0)
295 if (elide_outer_quotes)
296 goto force_outer_quoting_style;
304 if (backslash_escapes)
306 if (elide_outer_quotes)
307 goto force_outer_quoting_style;
313 else if (flags & QA_ELIDE_NULL_BYTES)
318 switch (quoting_style)
320 case shell_always_quoting_style:
321 if (elide_outer_quotes)
322 goto force_outer_quoting_style;
325 case c_quoting_style:
326 if (i + 2 < argsize && arg[i + 1] == '?')
330 case '(': case ')': case '-': case '/':
331 case '<': case '=': case '>':
332 /* Escape the second '?' in what would otherwise be
334 if (elide_outer_quotes)
335 goto force_outer_quoting_style;
353 case '\a': esc = 'a'; goto c_escape;
354 case '\b': esc = 'b'; goto c_escape;
355 case '\f': esc = 'f'; goto c_escape;
356 case '\n': esc = 'n'; goto c_and_shell_escape;
357 case '\r': esc = 'r'; goto c_and_shell_escape;
358 case '\t': esc = 't'; goto c_and_shell_escape;
359 case '\v': esc = 'v'; goto c_escape;
360 case '\\': esc = c; goto c_and_shell_escape;
363 if (quoting_style == shell_always_quoting_style
364 && elide_outer_quotes)
365 goto force_outer_quoting_style;
368 if (backslash_escapes)
375 case '{': case '}': /* sometimes special if isolated */
376 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
384 case '!': /* special in bash */
385 case '"': case '$': case '&':
386 case '(': case ')': case '*': case ';':
388 case '=': /* sometimes special in 0th or (with "set -k") later args */
390 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
392 /* A shell special character. In theory, '$' and '`' could
393 be the first bytes of multibyte characters, which means
394 we should check them with mbrtowc, but in practice this
395 doesn't happen so it's not worth worrying about. */
396 if (quoting_style == shell_always_quoting_style
397 && elide_outer_quotes)
398 goto force_outer_quoting_style;
402 if (quoting_style == shell_always_quoting_style)
404 if (elide_outer_quotes)
405 goto force_outer_quoting_style;
412 case '%': case '+': case ',': case '-': case '.': case '/':
413 case '0': case '1': case '2': case '3': case '4': case '5':
414 case '6': case '7': case '8': case '9': case ':':
415 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
416 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
417 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
418 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
419 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
420 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
421 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
422 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
423 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
424 /* These characters don't cause problems, no matter what the
425 quoting style is. They cannot start multibyte sequences. */
429 /* If we have a multibyte sequence, copy it until we reach
430 its end, find an error, or come back to the initial shift
431 state. For C-like styles, if the sequence has
432 unprintable characters, escape the whole sequence, since
433 we can't easily escape single characters within it. */
435 /* Length of multibyte sequence found so far. */
443 printable = isprint (c) != 0;
448 memset (&mbstate, 0, sizeof mbstate);
452 if (argsize == SIZE_MAX)
453 argsize = strlen (arg);
458 size_t bytes = mbrtowc (&w, &arg[i + m],
459 argsize - (i + m), &mbstate);
462 else if (bytes == (size_t) -1)
467 else if (bytes == (size_t) -2)
470 while (i + m < argsize && arg[i + m])
476 /* Work around a bug with older shells that "see" a '\'
477 that is really the 2nd byte of a multibyte character.
478 In practice the problem is limited to ASCII
479 chars >= '@' that are shell special chars. */
480 if ('[' == 0x5b && elide_outer_quotes
481 && quoting_style == shell_always_quoting_style)
484 for (j = 1; j < bytes; j++)
485 switch (arg[i + m + j])
487 case '[': case '\\': case '^':
489 goto force_outer_quoting_style;
501 while (! mbsinit (&mbstate));
504 if (1 < m || (backslash_escapes && ! printable))
506 /* Output a multibyte sequence, or an escaped
507 unprintable unibyte character. */
512 if (backslash_escapes && ! printable)
514 if (elide_outer_quotes)
515 goto force_outer_quoting_style;
517 STORE ('0' + (c >> 6));
518 STORE ('0' + ((c >> 3) & 7));
532 if (! (backslash_escapes
533 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
537 if (elide_outer_quotes)
538 goto force_outer_quoting_style;
545 if (i == 0 && quoting_style == shell_always_quoting_style
546 && elide_outer_quotes)
547 goto force_outer_quoting_style;
549 if (quote_string && !elide_outer_quotes)
550 for (; *quote_string; quote_string++)
551 STORE (*quote_string);
553 if (len < buffersize)
557 force_outer_quoting_style:
558 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
560 flags & ~QA_ELIDE_OUTER_QUOTES, o);
563 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
564 argument ARG (of size ARGSIZE), using O to control quoting.
565 If O is null, use the default.
566 Terminate the output with a null character, and return the written
567 size of the output, not counting the terminating null.
568 If BUFFERSIZE is too small to store the output string, return the
569 value that would have been returned had BUFFERSIZE been large enough.
570 If ARGSIZE is SIZE_MAX, use the string length of the argument for
573 quotearg_buffer (char *buffer, size_t buffersize,
574 char const *arg, size_t argsize,
575 struct quoting_options const *o)
577 struct quoting_options const *p = o ? o : &default_quoting_options;
579 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
580 p->style, p->flags, p);
585 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
587 quotearg_alloc (char const *arg, size_t argsize,
588 struct quoting_options const *o)
590 return quotearg_alloc_mem (arg, argsize, NULL, o);
593 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
594 allocated storage containing the quoted string, and store the
595 resulting size into *SIZE, if non-NULL. The result can contain
596 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
597 NULL, and set_quoting_flags has not set the null byte elision
600 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
601 struct quoting_options const *o)
603 struct quoting_options const *p = o ? o : &default_quoting_options;
605 /* Elide embedded null bytes if we can't return a size. */
606 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
607 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
609 char *buf = xcharalloc (bufsize);
610 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags, p);
617 /* A storage slot with size and pointer to a value. */
624 /* Preallocate a slot 0 buffer, so that the caller can always quote
625 one small component of a "memory exhausted" message in slot 0. */
626 static char slot0[256];
627 static unsigned int nslots = 1;
628 static struct slotvec slotvec0 = {sizeof slot0, slot0};
629 static struct slotvec *slotvec = &slotvec0;
634 struct slotvec *sv = slotvec;
636 for (i = 1; i < nslots; i++)
638 if (sv[0].val != slot0)
641 slotvec0.size = sizeof slot0;
642 slotvec0.val = slot0;
652 /* Use storage slot N to return a quoted version of argument ARG.
653 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
654 null-terminated string.
655 OPTIONS specifies the quoting options.
656 The returned value points to static storage that can be
657 reused by the next call to this function with the same value of N.
658 N must be nonnegative. N is deliberately declared with type "int"
659 to allow for future extensions (using negative values). */
661 quotearg_n_options (int n, char const *arg, size_t argsize,
662 struct quoting_options const *options)
667 struct slotvec *sv = slotvec;
674 /* FIXME: technically, the type of n1 should be `unsigned int',
675 but that evokes an unsuppressible warning from gcc-4.0.1 and
676 older. If gcc ever provides an option to suppress that warning,
677 revert to the original type, so that the test in xalloc_oversized
678 is once again performed only at compile time. */
680 bool preallocated = (sv == &slotvec0);
682 if (xalloc_oversized (n1, sizeof *sv))
685 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
688 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
693 size_t size = sv[n].size;
694 char *val = sv[n].val;
695 /* Elide embedded null bytes since we don't return a size. */
696 int flags = options->flags | QA_ELIDE_NULL_BYTES;
697 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
698 options->style, flags, options);
702 sv[n].size = size = qsize + 1;
705 sv[n].val = val = xcharalloc (size);
706 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
716 quotearg_n (int n, char const *arg)
718 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
722 quotearg_n_mem (int n, char const *arg, size_t argsize)
724 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
728 quotearg (char const *arg)
730 return quotearg_n (0, arg);
734 quotearg_mem (char const *arg, size_t argsize)
736 return quotearg_n_mem (0, arg, argsize);
739 /* Return quoting options for STYLE, with no extra quoting. */
740 static struct quoting_options
741 quoting_options_from_style (enum quoting_style style)
743 struct quoting_options o;
746 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
751 quotearg_n_style (int n, enum quoting_style s, char const *arg)
753 struct quoting_options const o = quoting_options_from_style (s);
754 return quotearg_n_options (n, arg, SIZE_MAX, &o);
758 quotearg_n_style_mem (int n, enum quoting_style s,
759 char const *arg, size_t argsize)
761 struct quoting_options const o = quoting_options_from_style (s);
762 return quotearg_n_options (n, arg, argsize, &o);
766 quotearg_style (enum quoting_style s, char const *arg)
768 return quotearg_n_style (0, s, arg);
772 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
774 return quotearg_n_style_mem (0, s, arg, argsize);
778 quotearg_char_mem (char const *arg, size_t argsize, char ch)
780 struct quoting_options options;
781 options = default_quoting_options;
782 set_char_quoting (&options, ch, 1);
783 return quotearg_n_options (0, arg, argsize, &options);
787 quotearg_char (char const *arg, char ch)
789 return quotearg_char_mem (arg, SIZE_MAX, ch);
793 quotearg_colon (char const *arg)
795 return quotearg_char (arg, ':');
799 quotearg_colon_mem (char const *arg, size_t argsize)
801 return quotearg_char_mem (arg, argsize, ':');