1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free
4 Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
42 other macros are defined only for documentation and to satisfy C
47 # define mbstate_t int
48 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
49 # define iswprint(wc) isprint ((unsigned char) (wc))
53 #if !defined mbsinit && !HAVE_MBSINIT
54 # define mbsinit(ps) 1
58 # define SIZE_MAX ((size_t) -1)
61 #define INT_BITS (sizeof (int) * CHAR_BIT)
63 struct quoting_options
65 /* Basic quoting style. */
66 enum quoting_style style;
68 /* Quote the characters indicated by this bit vector even if the
69 quoting style would not normally require them to be quoted. */
70 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
73 /* Names of quoting styles. */
74 char const *const quoting_style_args[] =
86 /* Correspondences to quoting style names. */
87 enum quoting_style const quoting_style_vals[] =
89 literal_quoting_style,
91 shell_always_quoting_style,
98 /* The default quoting options. */
99 static struct quoting_options default_quoting_options;
101 /* Allocate a new set of quoting options, with contents initially identical
102 to O if O is not null, or to the default if O is null.
103 It is the caller's responsibility to free the result. */
104 struct quoting_options *
105 clone_quoting_options (struct quoting_options *o)
108 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
114 /* Get the value of O's quoting style. If O is null, use the default. */
116 get_quoting_style (struct quoting_options *o)
118 return (o ? o : &default_quoting_options)->style;
121 /* In O (or in the default if O is null),
122 set the value of the quoting style to S. */
124 set_quoting_style (struct quoting_options *o, enum quoting_style s)
126 (o ? o : &default_quoting_options)->style = s;
129 /* In O (or in the default if O is null),
130 set the value of the quoting options for character C to I.
131 Return the old value. Currently, the only values defined for I are
132 0 (the default) and 1 (which means to quote the character even if
133 it would not otherwise be quoted). */
135 set_char_quoting (struct quoting_options *o, char c, int i)
137 unsigned char uc = c;
139 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
140 int shift = uc % INT_BITS;
141 int r = (*p >> shift) & 1;
142 *p ^= ((i & 1) ^ r) << shift;
146 /* MSGID approximates a quotation mark. Return its translation if it
147 has one; otherwise, return either it or "\"", depending on S. */
149 gettext_quote (char const *msgid, enum quoting_style s)
151 char const *translation = _(msgid);
152 if (translation == msgid && s == clocale_quoting_style)
157 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
158 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
159 non-quoting-style part of O to control quoting.
160 Terminate the output with a null character, and return the written
161 size of the output, not counting the terminating null.
162 If BUFFERSIZE is too small to store the output string, return the
163 value that would have been returned had BUFFERSIZE been large enough.
164 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
166 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
167 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
168 style specified by O, and O may not be null. */
171 quotearg_buffer_restyled (char *buffer, size_t buffersize,
172 char const *arg, size_t argsize,
173 enum quoting_style quoting_style,
174 struct quoting_options const *o)
178 char const *quote_string = 0;
179 size_t quote_string_len = 0;
180 bool backslash_escapes = false;
181 bool unibyte_locale = MB_CUR_MAX == 1;
186 if (len < buffersize) \
192 switch (quoting_style)
194 case c_quoting_style:
196 backslash_escapes = true;
198 quote_string_len = 1;
201 case escape_quoting_style:
202 backslash_escapes = true;
205 case locale_quoting_style:
206 case clocale_quoting_style:
209 Get translations for open and closing quotation marks.
211 The message catalog should translate "`" to a left
212 quotation mark suitable for the locale, and similarly for
213 "'". If the catalog has no translation,
214 locale_quoting_style quotes `like this', and
215 clocale_quoting_style quotes "like this".
217 For example, an American English Unicode locale should
218 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
219 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
220 MARK). A British English Unicode locale should instead
221 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
222 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
224 If you don't know what to put here, please see
225 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
226 and use glyphs suitable for your language. */
228 char const *left = gettext_quote (N_("`"), quoting_style);
229 char const *right = gettext_quote (N_("'"), quoting_style);
230 for (quote_string = left; *quote_string; quote_string++)
231 STORE (*quote_string);
232 backslash_escapes = true;
233 quote_string = right;
234 quote_string_len = strlen (quote_string);
238 case shell_always_quoting_style:
241 quote_string_len = 1;
248 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
253 if (backslash_escapes
255 && i + quote_string_len <= argsize
256 && memcmp (arg + i, quote_string, quote_string_len) == 0)
263 if (backslash_escapes)
273 switch (quoting_style)
275 case shell_quoting_style:
276 goto use_shell_always_quoting_style;
278 case c_quoting_style:
279 if (i + 2 < argsize && arg[i + 1] == '?')
283 case '(': case ')': case '-': case '/':
284 case '<': case '=': case '>':
285 /* Escape the second '?' in what would otherwise be
304 case '\a': esc = 'a'; goto c_escape;
305 case '\b': esc = 'b'; goto c_escape;
306 case '\f': esc = 'f'; goto c_escape;
307 case '\n': esc = 'n'; goto c_and_shell_escape;
308 case '\r': esc = 'r'; goto c_and_shell_escape;
309 case '\t': esc = 't'; goto c_and_shell_escape;
310 case '\v': esc = 'v'; goto c_escape;
311 case '\\': esc = c; goto c_and_shell_escape;
314 if (quoting_style == shell_quoting_style)
315 goto use_shell_always_quoting_style;
317 if (backslash_escapes)
324 case '{': case '}': /* sometimes special if isolated */
325 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
333 case '!': /* special in bash */
334 case '"': case '$': case '&':
335 case '(': case ')': case '*': case ';':
337 case '=': /* sometimes special in 0th or (with "set -k") later args */
339 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
341 /* A shell special character. In theory, '$' and '`' could
342 be the first bytes of multibyte characters, which means
343 we should check them with mbrtowc, but in practice this
344 doesn't happen so it's not worth worrying about. */
345 if (quoting_style == shell_quoting_style)
346 goto use_shell_always_quoting_style;
350 switch (quoting_style)
352 case shell_quoting_style:
353 goto use_shell_always_quoting_style;
355 case shell_always_quoting_style:
366 case '%': case '+': case ',': case '-': case '.': case '/':
367 case '0': case '1': case '2': case '3': case '4': case '5':
368 case '6': case '7': case '8': case '9': case ':':
369 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
370 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
371 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
372 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
373 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
374 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
375 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
376 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
377 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
378 /* These characters don't cause problems, no matter what the
379 quoting style is. They cannot start multibyte sequences. */
383 /* If we have a multibyte sequence, copy it until we reach
384 its end, find an error, or come back to the initial shift
385 state. For C-like styles, if the sequence has
386 unprintable characters, escape the whole sequence, since
387 we can't easily escape single characters within it. */
389 /* Length of multibyte sequence found so far. */
397 printable = isprint (c) != 0;
402 memset (&mbstate, 0, sizeof mbstate);
406 if (argsize == SIZE_MAX)
407 argsize = strlen (arg);
412 size_t bytes = mbrtowc (&w, &arg[i + m],
413 argsize - (i + m), &mbstate);
416 else if (bytes == (size_t) -1)
421 else if (bytes == (size_t) -2)
424 while (i + m < argsize && arg[i + m])
430 /* Work around a bug with older shells that "see" a '\'
431 that is really the 2nd byte of a multibyte character.
432 In practice the problem is limited to ASCII
433 chars >= '@' that are shell special chars. */
434 if ('[' == 0x5b && quoting_style == shell_quoting_style)
437 for (j = 1; j < bytes; j++)
438 switch (arg[i + m + j])
440 case '[': case '\\': case '^':
442 goto use_shell_always_quoting_style;
454 while (! mbsinit (&mbstate));
457 if (1 < m || (backslash_escapes && ! printable))
459 /* Output a multibyte sequence, or an escaped
460 unprintable unibyte character. */
465 if (backslash_escapes && ! printable)
468 STORE ('0' + (c >> 6));
469 STORE ('0' + ((c >> 3) & 7));
483 if (! (backslash_escapes
484 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
494 if (i == 0 && quoting_style == shell_quoting_style)
495 goto use_shell_always_quoting_style;
498 for (; *quote_string; quote_string++)
499 STORE (*quote_string);
501 if (len < buffersize)
505 use_shell_always_quoting_style:
506 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
507 shell_always_quoting_style, o);
510 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
511 argument ARG (of size ARGSIZE), using O to control quoting.
512 If O is null, use the default.
513 Terminate the output with a null character, and return the written
514 size of the output, not counting the terminating null.
515 If BUFFERSIZE is too small to store the output string, return the
516 value that would have been returned had BUFFERSIZE been large enough.
517 If ARGSIZE is SIZE_MAX, use the string length of the argument for
520 quotearg_buffer (char *buffer, size_t buffersize,
521 char const *arg, size_t argsize,
522 struct quoting_options const *o)
524 struct quoting_options const *p = o ? o : &default_quoting_options;
526 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
532 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
533 allocated storage containing the quoted string. */
535 quotearg_alloc (char const *arg, size_t argsize,
536 struct quoting_options const *o)
539 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
540 char *buf = xcharalloc (bufsize);
541 quotearg_buffer (buf, bufsize, arg, argsize, o);
546 /* A storage slot with size and pointer to a value. */
553 /* Preallocate a slot 0 buffer, so that the caller can always quote
554 one small component of a "memory exhausted" message in slot 0. */
555 static char slot0[256];
556 static unsigned int nslots = 1;
557 static struct slotvec slotvec0 = {sizeof slot0, slot0};
558 static struct slotvec *slotvec = &slotvec0;
563 struct slotvec *sv = slotvec;
565 for (i = 1; i < nslots; i++)
567 if (sv[0].val != slot0)
570 slotvec0.size = sizeof slot0;
571 slotvec0.val = slot0;
581 /* Use storage slot N to return a quoted version of argument ARG.
582 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
583 null-terminated string.
584 OPTIONS specifies the quoting options.
585 The returned value points to static storage that can be
586 reused by the next call to this function with the same value of N.
587 N must be nonnegative. N is deliberately declared with type "int"
588 to allow for future extensions (using negative values). */
590 quotearg_n_options (int n, char const *arg, size_t argsize,
591 struct quoting_options const *options)
596 struct slotvec *sv = slotvec;
603 /* FIXME: technically, the type of n1 should be `unsigned int',
604 but that evokes an unsuppressible warning from gcc-4.0.1 and
605 older. If gcc ever provides an option to suppress that warning,
606 revert to the original type, so that the test in xalloc_oversized
607 is once again performed only at compile time. */
609 bool preallocated = (sv == &slotvec0);
611 if (xalloc_oversized (n1, sizeof *sv))
614 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
617 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
622 size_t size = sv[n].size;
623 char *val = sv[n].val;
624 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
628 sv[n].size = size = qsize + 1;
631 sv[n].val = val = xcharalloc (size);
632 quotearg_buffer (val, size, arg, argsize, options);
641 quotearg_n (int n, char const *arg)
643 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
647 quotearg (char const *arg)
649 return quotearg_n (0, arg);
652 /* Return quoting options for STYLE, with no extra quoting. */
653 static struct quoting_options
654 quoting_options_from_style (enum quoting_style style)
656 struct quoting_options o;
658 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
663 quotearg_n_style (int n, enum quoting_style s, char const *arg)
665 struct quoting_options const o = quoting_options_from_style (s);
666 return quotearg_n_options (n, arg, SIZE_MAX, &o);
670 quotearg_n_style_mem (int n, enum quoting_style s,
671 char const *arg, size_t argsize)
673 struct quoting_options const o = quoting_options_from_style (s);
674 return quotearg_n_options (n, arg, argsize, &o);
678 quotearg_style (enum quoting_style s, char const *arg)
680 return quotearg_n_style (0, s, arg);
684 quotearg_char (char const *arg, char ch)
686 struct quoting_options options;
687 options = default_quoting_options;
688 set_char_quoting (&options, ch, 1);
689 return quotearg_n_options (0, arg, SIZE_MAX, &options);
693 quotearg_colon (char const *arg)
695 return quotearg_char (arg, ':');