1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) msgid
41 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
49 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
50 other macros are defined only for documentation and to satisfy C
55 # define mbstate_t int
56 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
57 # define iswprint(wc) isprint ((unsigned char) (wc))
61 #if !defined mbsinit && !HAVE_MBSINIT
62 # define mbsinit(ps) 1
68 # define SIZE_MAX ((size_t) -1)
71 #define INT_BITS (sizeof (int) * CHAR_BIT)
73 struct quoting_options
75 /* Basic quoting style. */
76 enum quoting_style style;
78 /* Quote the characters indicated by this bit vector even if the
79 quoting style would not normally require them to be quoted. */
80 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
83 /* Names of quoting styles. */
84 char const *const quoting_style_args[] =
96 /* Correspondences to quoting style names. */
97 enum quoting_style const quoting_style_vals[] =
99 literal_quoting_style,
101 shell_always_quoting_style,
103 escape_quoting_style,
104 locale_quoting_style,
105 clocale_quoting_style
108 /* The default quoting options. */
109 static struct quoting_options default_quoting_options;
111 /* Allocate a new set of quoting options, with contents initially identical
112 to O if O is not null, or to the default if O is null.
113 It is the caller's responsibility to free the result. */
114 struct quoting_options *
115 clone_quoting_options (struct quoting_options *o)
118 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
124 /* Get the value of O's quoting style. If O is null, use the default. */
126 get_quoting_style (struct quoting_options *o)
128 return (o ? o : &default_quoting_options)->style;
131 /* In O (or in the default if O is null),
132 set the value of the quoting style to S. */
134 set_quoting_style (struct quoting_options *o, enum quoting_style s)
136 (o ? o : &default_quoting_options)->style = s;
139 /* In O (or in the default if O is null),
140 set the value of the quoting options for character C to I.
141 Return the old value. Currently, the only values defined for I are
142 0 (the default) and 1 (which means to quote the character even if
143 it would not otherwise be quoted). */
145 set_char_quoting (struct quoting_options *o, char c, int i)
147 unsigned char uc = c;
149 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
150 int shift = uc % INT_BITS;
151 int r = (*p >> shift) & 1;
152 *p ^= ((i & 1) ^ r) << shift;
156 /* MSGID approximates a quotation mark. Return its translation if it
157 has one; otherwise, return either it or "\"", depending on S. */
159 gettext_quote (char const *msgid, enum quoting_style s)
161 char const *translation = _(msgid);
162 if (translation == msgid && s == clocale_quoting_style)
167 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
168 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
169 non-quoting-style part of O to control quoting.
170 Terminate the output with a null character, and return the written
171 size of the output, not counting the terminating null.
172 If BUFFERSIZE is too small to store the output string, return the
173 value that would have been returned had BUFFERSIZE been large enough.
174 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
176 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
177 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
178 style specified by O, and O may not be null. */
181 quotearg_buffer_restyled (char *buffer, size_t buffersize,
182 char const *arg, size_t argsize,
183 enum quoting_style quoting_style,
184 struct quoting_options const *o)
188 char const *quote_string = 0;
189 size_t quote_string_len = 0;
190 bool backslash_escapes = false;
191 bool unibyte_locale = MB_CUR_MAX == 1;
196 if (len < buffersize) \
202 switch (quoting_style)
204 case c_quoting_style:
206 backslash_escapes = true;
208 quote_string_len = 1;
211 case escape_quoting_style:
212 backslash_escapes = true;
215 case locale_quoting_style:
216 case clocale_quoting_style:
219 Get translations for open and closing quotation marks.
221 The message catalog should translate "`" to a left
222 quotation mark suitable for the locale, and similarly for
223 "'". If the catalog has no translation,
224 locale_quoting_style quotes `like this', and
225 clocale_quoting_style quotes "like this".
227 For example, an American English Unicode locale should
228 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
229 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
230 MARK). A British English Unicode locale should instead
231 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
232 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
234 If you don't know what to put here, please see
235 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
236 and use glyphs suitable for your language. */
238 char const *left = gettext_quote (N_("`"), quoting_style);
239 char const *right = gettext_quote (N_("'"), quoting_style);
240 for (quote_string = left; *quote_string; quote_string++)
241 STORE (*quote_string);
242 backslash_escapes = true;
243 quote_string = right;
244 quote_string_len = strlen (quote_string);
248 case shell_always_quoting_style:
251 quote_string_len = 1;
258 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
263 if (backslash_escapes
265 && i + quote_string_len <= argsize
266 && memcmp (arg + i, quote_string, quote_string_len) == 0)
273 if (backslash_escapes)
283 switch (quoting_style)
285 case shell_quoting_style:
286 goto use_shell_always_quoting_style;
288 case c_quoting_style:
289 if (i + 2 < argsize && arg[i + 1] == '?')
293 case '(': case ')': case '-': case '/':
294 case '<': case '=': case '>':
295 /* Escape the second '?' in what would otherwise be
314 case '\a': esc = 'a'; goto c_escape;
315 case '\b': esc = 'b'; goto c_escape;
316 case '\f': esc = 'f'; goto c_escape;
317 case '\n': esc = 'n'; goto c_and_shell_escape;
318 case '\r': esc = 'r'; goto c_and_shell_escape;
319 case '\t': esc = 't'; goto c_and_shell_escape;
320 case '\v': esc = 'v'; goto c_escape;
321 case '\\': esc = c; goto c_and_shell_escape;
324 if (quoting_style == shell_quoting_style)
325 goto use_shell_always_quoting_style;
327 if (backslash_escapes)
334 case '{': case '}': /* sometimes special if isolated */
335 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
343 case '!': /* special in bash */
344 case '"': case '$': case '&':
345 case '(': case ')': case '*': case ';':
347 case '=': /* sometimes special in 0th or (with "set -k") later args */
349 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
351 /* A shell special character. In theory, '$' and '`' could
352 be the first bytes of multibyte characters, which means
353 we should check them with mbrtowc, but in practice this
354 doesn't happen so it's not worth worrying about. */
355 if (quoting_style == shell_quoting_style)
356 goto use_shell_always_quoting_style;
360 switch (quoting_style)
362 case shell_quoting_style:
363 goto use_shell_always_quoting_style;
365 case shell_always_quoting_style:
376 case '%': case '+': case ',': case '-': case '.': case '/':
377 case '0': case '1': case '2': case '3': case '4': case '5':
378 case '6': case '7': case '8': case '9': case ':':
379 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
380 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
381 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
382 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
383 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
384 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
385 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
386 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
387 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
388 /* These characters don't cause problems, no matter what the
389 quoting style is. They cannot start multibyte sequences. */
393 /* If we have a multibyte sequence, copy it until we reach
394 its end, find an error, or come back to the initial shift
395 state. For C-like styles, if the sequence has
396 unprintable characters, escape the whole sequence, since
397 we can't easily escape single characters within it. */
399 /* Length of multibyte sequence found so far. */
407 printable = isprint (c) != 0;
412 memset (&mbstate, 0, sizeof mbstate);
416 if (argsize == SIZE_MAX)
417 argsize = strlen (arg);
422 size_t bytes = mbrtowc (&w, &arg[i + m],
423 argsize - (i + m), &mbstate);
426 else if (bytes == (size_t) -1)
431 else if (bytes == (size_t) -2)
434 while (i + m < argsize && arg[i + m])
440 /* Work around a bug with older shells that "see" a '\'
441 that is really the 2nd byte of a multibyte character.
442 In practice the problem is limited to ASCII
443 chars >= '@' that are shell special chars. */
444 if ('[' == 0x5b && quoting_style == shell_quoting_style)
447 for (j = 1; j < bytes; j++)
448 switch (arg[i + m + j])
450 case '[': case '\\': case '^':
452 goto use_shell_always_quoting_style;
464 while (! mbsinit (&mbstate));
467 if (1 < m || (backslash_escapes && ! printable))
469 /* Output a multibyte sequence, or an escaped
470 unprintable unibyte character. */
475 if (backslash_escapes && ! printable)
478 STORE ('0' + (c >> 6));
479 STORE ('0' + ((c >> 3) & 7));
493 if (! (backslash_escapes
494 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
504 if (i == 0 && quoting_style == shell_quoting_style)
505 goto use_shell_always_quoting_style;
508 for (; *quote_string; quote_string++)
509 STORE (*quote_string);
511 if (len < buffersize)
515 use_shell_always_quoting_style:
516 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
517 shell_always_quoting_style, o);
520 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
521 argument ARG (of size ARGSIZE), using O to control quoting.
522 If O is null, use the default.
523 Terminate the output with a null character, and return the written
524 size of the output, not counting the terminating null.
525 If BUFFERSIZE is too small to store the output string, return the
526 value that would have been returned had BUFFERSIZE been large enough.
527 If ARGSIZE is SIZE_MAX, use the string length of the argument for
530 quotearg_buffer (char *buffer, size_t buffersize,
531 char const *arg, size_t argsize,
532 struct quoting_options const *o)
534 struct quoting_options const *p = o ? o : &default_quoting_options;
536 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
542 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
543 allocated storage containing the quoted string. */
545 quotearg_alloc (char const *arg, size_t argsize,
546 struct quoting_options const *o)
549 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
550 char *buf = xcharalloc (bufsize);
551 quotearg_buffer (buf, bufsize, arg, argsize, o);
556 /* A storage slot with size and pointer to a value. */
563 /* Preallocate a slot 0 buffer, so that the caller can always quote
564 one small component of a "memory exhausted" message in slot 0. */
565 static char slot0[256];
566 static unsigned int nslots = 1;
567 static struct slotvec slotvec0 = {sizeof slot0, slot0};
568 static struct slotvec *slotvec = &slotvec0;
573 struct slotvec *sv = slotvec;
575 for (i = 1; i < nslots; i++)
577 if (sv[0].val != slot0)
580 slotvec0.size = sizeof slot0;
581 slotvec0.val = slot0;
591 /* Use storage slot N to return a quoted version of argument ARG.
592 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
593 null-terminated string.
594 OPTIONS specifies the quoting options.
595 The returned value points to static storage that can be
596 reused by the next call to this function with the same value of N.
597 N must be nonnegative. N is deliberately declared with type "int"
598 to allow for future extensions (using negative values). */
600 quotearg_n_options (int n, char const *arg, size_t argsize,
601 struct quoting_options const *options)
606 struct slotvec *sv = slotvec;
613 /* FIXME: technically, the type of n1 should be `unsigned int',
614 but that evokes an unsuppressible warning from gcc-4.0.1 and
615 older. If gcc ever provides an option to suppress that warning,
616 revert to the original type, so that the test in xalloc_oversized
617 is once again performed only at compile time. */
619 bool preallocated = (sv == &slotvec0);
621 if (xalloc_oversized (n1, sizeof *sv))
624 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
627 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
632 size_t size = sv[n].size;
633 char *val = sv[n].val;
634 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
638 sv[n].size = size = qsize + 1;
641 sv[n].val = val = xcharalloc (size);
642 quotearg_buffer (val, size, arg, argsize, options);
651 quotearg_n (int n, char const *arg)
653 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
657 quotearg (char const *arg)
659 return quotearg_n (0, arg);
662 /* Return quoting options for STYLE, with no extra quoting. */
663 static struct quoting_options
664 quoting_options_from_style (enum quoting_style style)
666 struct quoting_options o;
668 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
673 quotearg_n_style (int n, enum quoting_style s, char const *arg)
675 struct quoting_options const o = quoting_options_from_style (s);
676 return quotearg_n_options (n, arg, SIZE_MAX, &o);
680 quotearg_n_style_mem (int n, enum quoting_style s,
681 char const *arg, size_t argsize)
683 struct quoting_options const o = quoting_options_from_style (s);
684 return quotearg_n_options (n, arg, argsize, &o);
688 quotearg_style (enum quoting_style s, char const *arg)
690 return quotearg_n_style (0, s, arg);
694 quotearg_char (char const *arg, char ch)
696 struct quoting_options options;
697 options = default_quoting_options;
698 set_char_quoting (&options, ch, 1);
699 return quotearg_n_options (0, arg, SIZE_MAX, &options);
703 quotearg_colon (char const *arg)
705 return quotearg_char (arg, ':');