1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
42 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
50 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
51 other macros are defined only for documentation and to satisfy C
55 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
56 # define iswprint(wc) isprint ((unsigned char) (wc))
60 #if !defined mbsinit && !HAVE_MBSINIT
61 # define mbsinit(ps) 1
68 # if !defined iswprint && !HAVE_ISWPRINT
69 # define iswprint(wc) 1
74 # define SIZE_MAX ((size_t) -1)
77 #define INT_BITS (sizeof (int) * CHAR_BIT)
79 struct quoting_options
81 /* Basic quoting style. */
82 enum quoting_style style;
84 /* Quote the characters indicated by this bit vector even if the
85 quoting style would not normally require them to be quoted. */
86 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
89 /* Names of quoting styles. */
90 char const *const quoting_style_args[] =
102 /* Correspondences to quoting style names. */
103 enum quoting_style const quoting_style_vals[] =
105 literal_quoting_style,
107 shell_always_quoting_style,
109 escape_quoting_style,
110 locale_quoting_style,
111 clocale_quoting_style
114 /* The default quoting options. */
115 static struct quoting_options default_quoting_options;
117 /* Allocate a new set of quoting options, with contents initially identical
118 to O if O is not null, or to the default if O is null.
119 It is the caller's responsibility to free the result. */
120 struct quoting_options *
121 clone_quoting_options (struct quoting_options *o)
124 struct quoting_options *p = xmalloc (sizeof *p);
125 *p = *(o ? o : &default_quoting_options);
130 /* Get the value of O's quoting style. If O is null, use the default. */
132 get_quoting_style (struct quoting_options *o)
134 return (o ? o : &default_quoting_options)->style;
137 /* In O (or in the default if O is null),
138 set the value of the quoting style to S. */
140 set_quoting_style (struct quoting_options *o, enum quoting_style s)
142 (o ? o : &default_quoting_options)->style = s;
145 /* In O (or in the default if O is null),
146 set the value of the quoting options for character C to I.
147 Return the old value. Currently, the only values defined for I are
148 0 (the default) and 1 (which means to quote the character even if
149 it would not otherwise be quoted). */
151 set_char_quoting (struct quoting_options *o, char c, int i)
153 unsigned char uc = c;
154 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
155 int shift = uc % INT_BITS;
156 int r = (*p >> shift) & 1;
157 *p ^= ((i & 1) ^ r) << shift;
161 /* MSGID approximates a quotation mark. Return its translation if it
162 has one; otherwise, return either it or "\"", depending on S. */
164 gettext_quote (char const *msgid, enum quoting_style s)
166 char const *translation = _(msgid);
167 if (translation == msgid && s == clocale_quoting_style)
172 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
173 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
174 non-quoting-style part of O to control quoting.
175 Terminate the output with a null character, and return the written
176 size of the output, not counting the terminating null.
177 If BUFFERSIZE is too small to store the output string, return the
178 value that would have been returned had BUFFERSIZE been large enough.
179 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
181 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
182 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
183 style specified by O, and O may not be null. */
186 quotearg_buffer_restyled (char *buffer, size_t buffersize,
187 char const *arg, size_t argsize,
188 enum quoting_style quoting_style,
189 struct quoting_options const *o)
193 char const *quote_string = 0;
194 size_t quote_string_len = 0;
195 int backslash_escapes = 0;
196 int unibyte_locale = MB_CUR_MAX == 1;
201 if (len < buffersize) \
207 switch (quoting_style)
209 case c_quoting_style:
211 backslash_escapes = 1;
213 quote_string_len = 1;
216 case escape_quoting_style:
217 backslash_escapes = 1;
220 case locale_quoting_style:
221 case clocale_quoting_style:
223 /* Get translations for open and closing quotation marks.
225 The message catalog should translate "`" to a left
226 quotation mark suitable for the locale, and similarly for
227 "'". If the catalog has no translation,
228 locale_quoting_style quotes `like this', and
229 clocale_quoting_style quotes "like this".
231 For example, an American English Unicode locale should
232 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
233 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
234 MARK). A British English Unicode locale should instead
235 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
236 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
238 char const *left = gettext_quote (N_("`"), quoting_style);
239 char const *right = gettext_quote (N_("'"), quoting_style);
240 for (quote_string = left; *quote_string; quote_string++)
241 STORE (*quote_string);
242 backslash_escapes = 1;
243 quote_string = right;
244 quote_string_len = strlen (quote_string);
248 case shell_always_quoting_style:
251 quote_string_len = 1;
258 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
263 if (backslash_escapes
265 && i + quote_string_len <= argsize
266 && memcmp (arg + i, quote_string, quote_string_len) == 0)
273 if (backslash_escapes)
283 switch (quoting_style)
285 case shell_quoting_style:
286 goto use_shell_always_quoting_style;
288 case c_quoting_style:
289 if (i + 2 < argsize && arg[i + 1] == '?')
293 case '(': case ')': case '-': case '/':
294 case '<': case '=': case '>':
295 /* Escape the second '?' in what would otherwise be
311 case '\a': esc = 'a'; goto c_escape;
312 case '\b': esc = 'b'; goto c_escape;
313 case '\f': esc = 'f'; goto c_escape;
314 case '\n': esc = 'n'; goto c_and_shell_escape;
315 case '\r': esc = 'r'; goto c_and_shell_escape;
316 case '\t': esc = 't'; goto c_and_shell_escape;
317 case '\v': esc = 'v'; goto c_escape;
318 case '\\': esc = c; goto c_and_shell_escape;
321 if (quoting_style == shell_quoting_style)
322 goto use_shell_always_quoting_style;
324 if (backslash_escapes)
331 case '{': case '}': /* sometimes special if isolated */
332 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
340 case '!': /* special in bash */
341 case '"': case '$': case '&':
342 case '(': case ')': case '*': case ';':
344 case '=': /* sometimes special in 0th or (with "set -k") later args */
346 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
348 /* A shell special character. In theory, '$' and '`' could
349 be the first bytes of multibyte characters, which means
350 we should check them with mbrtowc, but in practice this
351 doesn't happen so it's not worth worrying about. */
352 if (quoting_style == shell_quoting_style)
353 goto use_shell_always_quoting_style;
357 switch (quoting_style)
359 case shell_quoting_style:
360 goto use_shell_always_quoting_style;
362 case shell_always_quoting_style:
373 case '%': case '+': case ',': case '-': case '.': case '/':
374 case '0': case '1': case '2': case '3': case '4': case '5':
375 case '6': case '7': case '8': case '9': case ':':
376 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
377 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
378 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
379 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
380 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
381 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
382 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
383 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
384 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
385 /* These characters don't cause problems, no matter what the
386 quoting style is. They cannot start multibyte sequences. */
390 /* If we have a multibyte sequence, copy it until we reach
391 its end, find an error, or come back to the initial shift
392 state. For C-like styles, if the sequence has
393 unprintable characters, escape the whole sequence, since
394 we can't easily escape single characters within it. */
396 /* Length of multibyte sequence found so far. */
404 printable = isprint (c);
409 memset (&mbstate, 0, sizeof mbstate);
413 if (argsize == SIZE_MAX)
414 argsize = strlen (arg);
419 size_t bytes = mbrtowc (&w, &arg[i + m],
420 argsize - (i + m), &mbstate);
423 else if (bytes == (size_t) -1)
428 else if (bytes == (size_t) -2)
431 while (i + m < argsize && arg[i + m])
437 /* Work around a bug with older shells that "see" a '\'
438 that is really the 2nd byte of a multibyte character.
439 In practice the problem is limited to ASCII
440 chars >= '@' that are shell special chars. */
441 if ('[' == 0x5b && quoting_style == shell_quoting_style)
444 for (j = 1; j < bytes; j++)
445 switch (arg[i + m + j])
447 case '[': case '\\': case '^':
449 goto use_shell_always_quoting_style;
458 while (! mbsinit (&mbstate));
461 if (1 < m || (backslash_escapes && ! printable))
463 /* Output a multibyte sequence, or an escaped
464 unprintable unibyte character. */
469 if (backslash_escapes && ! printable)
472 STORE ('0' + (c >> 6));
473 STORE ('0' + ((c >> 3) & 7));
487 if (! (backslash_escapes
488 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
498 if (i == 0 && quoting_style == shell_quoting_style)
499 goto use_shell_always_quoting_style;
502 for (; *quote_string; quote_string++)
503 STORE (*quote_string);
505 if (len < buffersize)
509 use_shell_always_quoting_style:
510 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
511 shell_always_quoting_style, o);
514 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
515 argument ARG (of size ARGSIZE), using O to control quoting.
516 If O is null, use the default.
517 Terminate the output with a null character, and return the written
518 size of the output, not counting the terminating null.
519 If BUFFERSIZE is too small to store the output string, return the
520 value that would have been returned had BUFFERSIZE been large enough.
521 If ARGSIZE is SIZE_MAX, use the string length of the argument for
524 quotearg_buffer (char *buffer, size_t buffersize,
525 char const *arg, size_t argsize,
526 struct quoting_options const *o)
528 struct quoting_options const *p = o ? o : &default_quoting_options;
530 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
536 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
537 allocated storage containing the quoted string. */
539 quotearg_alloc (char const *arg, size_t argsize,
540 struct quoting_options const *o)
543 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
544 char *buf = xmalloc (bufsize);
545 quotearg_buffer (buf, bufsize, arg, argsize, o);
550 /* Use storage slot N to return a quoted version of argument ARG.
551 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
552 null-terminated string.
553 OPTIONS specifies the quoting options.
554 The returned value points to static storage that can be
555 reused by the next call to this function with the same value of N.
556 N must be nonnegative. N is deliberately declared with type "int"
557 to allow for future extensions (using negative values). */
559 quotearg_n_options (int n, char const *arg, size_t argsize,
560 struct quoting_options const *options)
564 /* Preallocate a slot 0 buffer, so that the caller can always quote
565 one small component of a "memory exhausted" message in slot 0. */
566 static char slot0[256];
567 static unsigned int nslots = 1;
574 static struct slotvec slotvec0 = {sizeof slot0, slot0};
575 static struct slotvec *slotvec = &slotvec0;
582 unsigned int n1 = n0 + 1;
584 if (xalloc_oversized (n1, sizeof *slotvec))
587 if (slotvec == &slotvec0)
589 slotvec = xmalloc (sizeof *slotvec);
592 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
593 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
598 size_t size = slotvec[n].size;
599 char *val = slotvec[n].val;
600 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
604 slotvec[n].size = size = qsize + 1;
607 slotvec[n].val = val = xmalloc (size);
608 quotearg_buffer (val, size, arg, argsize, options);
617 quotearg_n (int n, char const *arg)
619 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
623 quotearg (char const *arg)
625 return quotearg_n (0, arg);
628 /* Return quoting options for STYLE, with no extra quoting. */
629 static struct quoting_options
630 quoting_options_from_style (enum quoting_style style)
632 struct quoting_options o;
634 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
639 quotearg_n_style (int n, enum quoting_style s, char const *arg)
641 struct quoting_options const o = quoting_options_from_style (s);
642 return quotearg_n_options (n, arg, SIZE_MAX, &o);
646 quotearg_n_style_mem (int n, enum quoting_style s,
647 char const *arg, size_t argsize)
649 struct quoting_options const o = quoting_options_from_style (s);
650 return quotearg_n_options (n, arg, argsize, &o);
654 quotearg_style (enum quoting_style s, char const *arg)
656 return quotearg_n_style (0, s, arg);
660 quotearg_char (char const *arg, char ch)
662 struct quoting_options options;
663 options = default_quoting_options;
664 set_char_quoting (&options, ch, 1);
665 return quotearg_n_options (0, arg, SIZE_MAX, &options);
669 quotearg_colon (char const *arg)
671 return quotearg_char (arg, ':');