1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
58 # define SIZE_MAX ((size_t) -1)
61 #define INT_BITS (sizeof (int) * CHAR_BIT)
63 struct quoting_options
65 /* Basic quoting style. */
66 enum quoting_style style;
68 /* Quote the characters indicated by this bit vector even if the
69 quoting style would not normally require them to be quoted. */
70 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
73 /* Names of quoting styles. */
74 char const *const quoting_style_args[] =
86 /* Correspondences to quoting style names. */
87 enum quoting_style const quoting_style_vals[] =
89 literal_quoting_style,
91 shell_always_quoting_style,
98 /* The default quoting options. */
99 static struct quoting_options default_quoting_options;
101 /* Allocate a new set of quoting options, with contents initially identical
102 to O if O is not null, or to the default if O is null.
103 It is the caller's responsibility to free the result. */
104 struct quoting_options *
105 clone_quoting_options (struct quoting_options *o)
108 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
114 /* Get the value of O's quoting style. If O is null, use the default. */
116 get_quoting_style (struct quoting_options *o)
118 return (o ? o : &default_quoting_options)->style;
121 /* In O (or in the default if O is null),
122 set the value of the quoting style to S. */
124 set_quoting_style (struct quoting_options *o, enum quoting_style s)
126 (o ? o : &default_quoting_options)->style = s;
129 /* In O (or in the default if O is null),
130 set the value of the quoting options for character C to I.
131 Return the old value. Currently, the only values defined for I are
132 0 (the default) and 1 (which means to quote the character even if
133 it would not otherwise be quoted). */
135 set_char_quoting (struct quoting_options *o, char c, int i)
137 unsigned char uc = c;
139 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
140 int shift = uc % INT_BITS;
141 int r = (*p >> shift) & 1;
142 *p ^= ((i & 1) ^ r) << shift;
146 /* MSGID approximates a quotation mark. Return its translation if it
147 has one; otherwise, return either it or "\"", depending on S. */
149 gettext_quote (char const *msgid, enum quoting_style s)
151 char const *translation = _(msgid);
152 if (translation == msgid && s == clocale_quoting_style)
157 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
158 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
159 non-quoting-style part of O to control quoting.
160 Terminate the output with a null character, and return the written
161 size of the output, not counting the terminating null.
162 If BUFFERSIZE is too small to store the output string, return the
163 value that would have been returned had BUFFERSIZE been large enough.
164 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
166 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
167 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
168 style specified by O, and O may not be null. */
171 quotearg_buffer_restyled (char *buffer, size_t buffersize,
172 char const *arg, size_t argsize,
173 enum quoting_style quoting_style,
174 struct quoting_options const *o)
178 char const *quote_string = 0;
179 size_t quote_string_len = 0;
180 bool backslash_escapes = false;
181 bool unibyte_locale = MB_CUR_MAX == 1;
186 if (len < buffersize) \
192 switch (quoting_style)
194 case c_quoting_style:
196 backslash_escapes = true;
198 quote_string_len = 1;
201 case escape_quoting_style:
202 backslash_escapes = true;
205 case locale_quoting_style:
206 case clocale_quoting_style:
209 Get translations for open and closing quotation marks.
211 The message catalog should translate "`" to a left
212 quotation mark suitable for the locale, and similarly for
213 "'". If the catalog has no translation,
214 locale_quoting_style quotes `like this', and
215 clocale_quoting_style quotes "like this".
217 For example, an American English Unicode locale should
218 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
219 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
220 MARK). A British English Unicode locale should instead
221 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
222 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
224 If you don't know what to put here, please see
225 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
226 and use glyphs suitable for your language. */
228 char const *left = gettext_quote (N_("`"), quoting_style);
229 char const *right = gettext_quote (N_("'"), quoting_style);
230 for (quote_string = left; *quote_string; quote_string++)
231 STORE (*quote_string);
232 backslash_escapes = true;
233 quote_string = right;
234 quote_string_len = strlen (quote_string);
238 case shell_always_quoting_style:
241 quote_string_len = 1;
248 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
253 if (backslash_escapes
255 && i + quote_string_len <= argsize
256 && memcmp (arg + i, quote_string, quote_string_len) == 0)
263 if (backslash_escapes)
273 switch (quoting_style)
275 case shell_quoting_style:
276 goto use_shell_always_quoting_style;
278 case c_quoting_style:
279 if (i + 2 < argsize && arg[i + 1] == '?')
283 case '(': case ')': case '-': case '/':
284 case '<': case '=': case '>':
285 /* Escape the second '?' in what would otherwise be
304 case '\a': esc = 'a'; goto c_escape;
305 case '\b': esc = 'b'; goto c_escape;
306 case '\f': esc = 'f'; goto c_escape;
307 case '\n': esc = 'n'; goto c_and_shell_escape;
308 case '\r': esc = 'r'; goto c_and_shell_escape;
309 case '\t': esc = 't'; goto c_and_shell_escape;
310 case '\v': esc = 'v'; goto c_escape;
311 case '\\': esc = c; goto c_and_shell_escape;
314 if (quoting_style == shell_quoting_style)
315 goto use_shell_always_quoting_style;
317 if (backslash_escapes)
324 case '{': case '}': /* sometimes special if isolated */
325 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
333 case '!': /* special in bash */
334 case '"': case '$': case '&':
335 case '(': case ')': case '*': case ';':
337 case '=': /* sometimes special in 0th or (with "set -k") later args */
339 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
341 /* A shell special character. In theory, '$' and '`' could
342 be the first bytes of multibyte characters, which means
343 we should check them with mbrtowc, but in practice this
344 doesn't happen so it's not worth worrying about. */
345 if (quoting_style == shell_quoting_style)
346 goto use_shell_always_quoting_style;
350 switch (quoting_style)
352 case shell_quoting_style:
353 goto use_shell_always_quoting_style;
355 case shell_always_quoting_style:
366 case '%': case '+': case ',': case '-': case '.': case '/':
367 case '0': case '1': case '2': case '3': case '4': case '5':
368 case '6': case '7': case '8': case '9': case ':':
369 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
370 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
371 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
372 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
373 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
374 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
375 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
376 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
377 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
378 /* These characters don't cause problems, no matter what the
379 quoting style is. They cannot start multibyte sequences. */
383 /* If we have a multibyte sequence, copy it until we reach
384 its end, find an error, or come back to the initial shift
385 state. For C-like styles, if the sequence has
386 unprintable characters, escape the whole sequence, since
387 we can't easily escape single characters within it. */
389 /* Length of multibyte sequence found so far. */
397 printable = isprint (c) != 0;
402 memset (&mbstate, 0, sizeof mbstate);
406 if (argsize == SIZE_MAX)
407 argsize = strlen (arg);
412 size_t bytes = mbrtowc (&w, &arg[i + m],
413 argsize - (i + m), &mbstate);
416 else if (bytes == (size_t) -1)
421 else if (bytes == (size_t) -2)
424 while (i + m < argsize && arg[i + m])
430 /* Work around a bug with older shells that "see" a '\'
431 that is really the 2nd byte of a multibyte character.
432 In practice the problem is limited to ASCII
433 chars >= '@' that are shell special chars. */
434 if ('[' == 0x5b && quoting_style == shell_quoting_style)
437 for (j = 1; j < bytes; j++)
438 switch (arg[i + m + j])
440 case '[': case '\\': case '^':
442 goto use_shell_always_quoting_style;
454 while (! mbsinit (&mbstate));
457 if (1 < m || (backslash_escapes && ! printable))
459 /* Output a multibyte sequence, or an escaped
460 unprintable unibyte character. */
465 if (backslash_escapes && ! printable)
468 STORE ('0' + (c >> 6));
469 STORE ('0' + ((c >> 3) & 7));
483 if (! (backslash_escapes
484 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
494 if (i == 0 && quoting_style == shell_quoting_style)
495 goto use_shell_always_quoting_style;
498 for (; *quote_string; quote_string++)
499 STORE (*quote_string);
501 if (len < buffersize)
505 use_shell_always_quoting_style:
506 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
507 shell_always_quoting_style, o);
510 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
511 argument ARG (of size ARGSIZE), using O to control quoting.
512 If O is null, use the default.
513 Terminate the output with a null character, and return the written
514 size of the output, not counting the terminating null.
515 If BUFFERSIZE is too small to store the output string, return the
516 value that would have been returned had BUFFERSIZE been large enough.
517 If ARGSIZE is SIZE_MAX, use the string length of the argument for
520 quotearg_buffer (char *buffer, size_t buffersize,
521 char const *arg, size_t argsize,
522 struct quoting_options const *o)
524 struct quoting_options const *p = o ? o : &default_quoting_options;
526 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
532 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
534 quotearg_alloc (char const *arg, size_t argsize,
535 struct quoting_options const *o)
537 return quotearg_alloc_mem (arg, argsize, NULL, o);
540 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
541 allocated storage containing the quoted string, and store the
542 resulting size into *SIZE, if non-NULL. If SIZE is NULL, then
543 either ARGSIZE should be -1, or O should escape or elide any
544 embedded null bytes. */
546 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
547 struct quoting_options const *o)
550 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
551 char *buf = xcharalloc (bufsize);
552 quotearg_buffer (buf, bufsize, arg, argsize, o);
559 /* A storage slot with size and pointer to a value. */
566 /* Preallocate a slot 0 buffer, so that the caller can always quote
567 one small component of a "memory exhausted" message in slot 0. */
568 static char slot0[256];
569 static unsigned int nslots = 1;
570 static struct slotvec slotvec0 = {sizeof slot0, slot0};
571 static struct slotvec *slotvec = &slotvec0;
576 struct slotvec *sv = slotvec;
578 for (i = 1; i < nslots; i++)
580 if (sv[0].val != slot0)
583 slotvec0.size = sizeof slot0;
584 slotvec0.val = slot0;
594 /* Use storage slot N to return a quoted version of argument ARG.
595 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
596 null-terminated string.
597 OPTIONS specifies the quoting options.
598 The returned value points to static storage that can be
599 reused by the next call to this function with the same value of N.
600 N must be nonnegative. N is deliberately declared with type "int"
601 to allow for future extensions (using negative values). */
603 quotearg_n_options (int n, char const *arg, size_t argsize,
604 struct quoting_options const *options)
609 struct slotvec *sv = slotvec;
616 /* FIXME: technically, the type of n1 should be `unsigned int',
617 but that evokes an unsuppressible warning from gcc-4.0.1 and
618 older. If gcc ever provides an option to suppress that warning,
619 revert to the original type, so that the test in xalloc_oversized
620 is once again performed only at compile time. */
622 bool preallocated = (sv == &slotvec0);
624 if (xalloc_oversized (n1, sizeof *sv))
627 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
630 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
635 size_t size = sv[n].size;
636 char *val = sv[n].val;
637 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
641 sv[n].size = size = qsize + 1;
644 sv[n].val = val = xcharalloc (size);
645 quotearg_buffer (val, size, arg, argsize, options);
654 quotearg_n (int n, char const *arg)
656 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
660 quotearg_n_mem (int n, char const *arg, size_t argsize)
662 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
666 quotearg (char const *arg)
668 return quotearg_n (0, arg);
672 quotearg_mem (char const *arg, size_t argsize)
674 return quotearg_n_mem (0, arg, argsize);
677 /* Return quoting options for STYLE, with no extra quoting. */
678 static struct quoting_options
679 quoting_options_from_style (enum quoting_style style)
681 struct quoting_options o;
683 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
688 quotearg_n_style (int n, enum quoting_style s, char const *arg)
690 struct quoting_options const o = quoting_options_from_style (s);
691 return quotearg_n_options (n, arg, SIZE_MAX, &o);
695 quotearg_n_style_mem (int n, enum quoting_style s,
696 char const *arg, size_t argsize)
698 struct quoting_options const o = quoting_options_from_style (s);
699 return quotearg_n_options (n, arg, argsize, &o);
703 quotearg_style (enum quoting_style s, char const *arg)
705 return quotearg_n_style (0, s, arg);
709 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
711 return quotearg_n_style_mem (0, s, arg, argsize);
715 quotearg_char_mem (char const *arg, size_t argsize, char ch)
717 struct quoting_options options;
718 options = default_quoting_options;
719 set_char_quoting (&options, ch, 1);
720 return quotearg_n_options (0, arg, argsize, &options);
724 quotearg_char (char const *arg, char ch)
726 return quotearg_char_mem (arg, SIZE_MAX, ch);
730 quotearg_colon (char const *arg)
732 return quotearg_char (arg, ':');
736 quotearg_colon_mem (char const *arg, size_t argsize)
738 return quotearg_char_mem (arg, argsize, ':');