1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
27 #include <sys/types.h>
34 #define _(msgid) gettext (msgid)
35 #define N_(msgid) msgid
44 # define SIZE_MAX ((size_t) -1)
47 # define UCHAR_MAX ((unsigned char) -1)
50 # define UINT_MAX ((unsigned int) -1)
53 #if HAVE_C_BACKSLASH_A
54 # define ALERT_CHAR '\a'
56 # define ALERT_CHAR '\7'
69 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
77 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
78 other macros are defined only for documentation and to satisfy C
82 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
83 # define mbsinit(ps) 1
84 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
91 # if !defined iswprint && !HAVE_ISWPRINT
92 # define iswprint(wc) 1
96 #define INT_BITS (sizeof (int) * CHAR_BIT)
98 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
99 # define IN_CTYPE_DOMAIN(c) 1
101 # define IN_CTYPE_DOMAIN(c) isascii(c)
104 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
106 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
108 struct quoting_options
110 /* Basic quoting style. */
111 enum quoting_style style;
113 /* Quote the characters indicated by this bit vector even if the
114 quoting style would not normally require them to be quoted. */
115 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
118 /* Names of quoting styles. */
119 char const *const quoting_style_args[] =
131 /* Correspondences to quoting style names. */
132 enum quoting_style const quoting_style_vals[] =
134 literal_quoting_style,
136 shell_always_quoting_style,
138 escape_quoting_style,
139 locale_quoting_style,
140 clocale_quoting_style
143 /* The default quoting options. */
144 static struct quoting_options default_quoting_options;
146 /* Allocate a new set of quoting options, with contents initially identical
147 to O if O is not null, or to the default if O is null.
148 It is the caller's responsibility to free the result. */
149 struct quoting_options *
150 clone_quoting_options (struct quoting_options *o)
152 struct quoting_options *p
153 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
154 *p = *(o ? o : &default_quoting_options);
158 /* Get the value of O's quoting style. If O is null, use the default. */
160 get_quoting_style (struct quoting_options *o)
162 return (o ? o : &default_quoting_options)->style;
165 /* In O (or in the default if O is null),
166 set the value of the quoting style to S. */
168 set_quoting_style (struct quoting_options *o, enum quoting_style s)
170 (o ? o : &default_quoting_options)->style = s;
173 /* In O (or in the default if O is null),
174 set the value of the quoting options for character C to I.
175 Return the old value. Currently, the only values defined for I are
176 0 (the default) and 1 (which means to quote the character even if
177 it would not otherwise be quoted). */
179 set_char_quoting (struct quoting_options *o, char c, int i)
181 unsigned char uc = c;
182 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
183 int shift = uc % INT_BITS;
184 int r = (*p >> shift) & 1;
185 *p ^= ((i & 1) ^ r) << shift;
189 /* MSGID approximates a quotation mark. Return its translation if it
190 has one; otherwise, return either it or "\"", depending on S. */
192 gettext_quote (char const *msgid, enum quoting_style s)
194 char const *translation = _(msgid);
195 if (translation == msgid && s == clocale_quoting_style)
200 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
201 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
202 non-quoting-style part of O to control quoting.
203 Terminate the output with a null character, and return the written
204 size of the output, not counting the terminating null.
205 If BUFFERSIZE is too small to store the output string, return the
206 value that would have been returned had BUFFERSIZE been large enough.
207 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
209 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
210 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
211 style specified by O, and O may not be null. */
214 quotearg_buffer_restyled (char *buffer, size_t buffersize,
215 char const *arg, size_t argsize,
216 enum quoting_style quoting_style,
217 struct quoting_options const *o)
221 char const *quote_string = 0;
222 size_t quote_string_len = 0;
223 int backslash_escapes = 0;
224 int unibyte_locale = MB_CUR_MAX == 1;
229 if (len < buffersize) \
235 switch (quoting_style)
237 case c_quoting_style:
239 backslash_escapes = 1;
241 quote_string_len = 1;
244 case escape_quoting_style:
245 backslash_escapes = 1;
248 case locale_quoting_style:
249 case clocale_quoting_style:
251 /* Get translations for open and closing quotation marks.
253 The message catalog should translate "`" to a left
254 quotation mark suitable for the locale, and similarly for
255 "'". If the catalog has no translation,
256 locale_quoting_style quotes `like this', and
257 clocale_quoting_style quotes "like this".
259 For example, an American English Unicode locale should
260 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
261 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
262 MARK). A British English Unicode locale should instead
263 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
264 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
266 char const *left = gettext_quote (N_("`"), quoting_style);
267 char const *right = gettext_quote (N_("'"), quoting_style);
268 for (quote_string = left; *quote_string; quote_string++)
269 STORE (*quote_string);
270 backslash_escapes = 1;
271 quote_string = right;
272 quote_string_len = strlen (quote_string);
276 case shell_always_quoting_style:
279 quote_string_len = 1;
286 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
291 if (backslash_escapes
293 && i + quote_string_len <= argsize
294 && memcmp (arg + i, quote_string, quote_string_len) == 0)
301 if (backslash_escapes)
311 switch (quoting_style)
313 case shell_quoting_style:
314 goto use_shell_always_quoting_style;
316 case c_quoting_style:
317 if (i + 2 < argsize && arg[i + 1] == '?')
321 case '(': case ')': case '-': case '/':
322 case '<': case '=': case '>':
323 /* Escape the second '?' in what would otherwise be
339 case ALERT_CHAR: esc = 'a'; goto c_escape;
340 case '\b': esc = 'b'; goto c_escape;
341 case '\f': esc = 'f'; goto c_escape;
342 case '\n': esc = 'n'; goto c_and_shell_escape;
343 case '\r': esc = 'r'; goto c_and_shell_escape;
344 case '\t': esc = 't'; goto c_and_shell_escape;
345 case '\v': esc = 'v'; goto c_escape;
346 case '\\': esc = c; goto c_and_shell_escape;
349 if (quoting_style == shell_quoting_style)
350 goto use_shell_always_quoting_style;
352 if (backslash_escapes)
364 case '!': /* special in bash */
365 case '"': case '$': case '&':
366 case '(': case ')': case '*': case ';':
367 case '<': case '>': case '[':
368 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
370 /* A shell special character. In theory, '$' and '`' could
371 be the first bytes of multibyte characters, which means
372 we should check them with mbrtowc, but in practice this
373 doesn't happen so it's not worth worrying about. */
374 if (quoting_style == shell_quoting_style)
375 goto use_shell_always_quoting_style;
379 switch (quoting_style)
381 case shell_quoting_style:
382 goto use_shell_always_quoting_style;
384 case shell_always_quoting_style:
395 case '%': case '+': case ',': case '-': case '.': case '/':
396 case '0': case '1': case '2': case '3': case '4': case '5':
397 case '6': case '7': case '8': case '9': case ':': case '=':
398 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
399 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
400 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
401 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
402 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
403 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
404 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
405 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
406 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
408 /* These characters don't cause problems, no matter what the
409 quoting style is. They cannot start multibyte sequences. */
413 /* If we have a multibyte sequence, copy it until we reach
414 its end, find an error, or come back to the initial shift
415 state. For C-like styles, if the sequence has
416 unprintable characters, escape the whole sequence, since
417 we can't easily escape single characters within it. */
419 /* Length of multibyte sequence found so far. */
427 printable = ISPRINT (c);
432 memset (&mbstate, 0, sizeof mbstate);
436 if (argsize == (size_t) -1)
437 argsize = strlen (arg);
442 size_t bytes = mbrtowc (&w, &arg[i + m],
443 argsize - (i + m), &mbstate);
446 else if (bytes == (size_t) -1)
451 else if (bytes == (size_t) -2)
454 while (i + m < argsize && arg[i + m])
465 while (! mbsinit (&mbstate));
468 if (1 < m || (backslash_escapes && ! printable))
470 /* Output a multibyte sequence, or an escaped
471 unprintable unibyte character. */
476 if (backslash_escapes && ! printable)
479 STORE ('0' + (c >> 6));
480 STORE ('0' + ((c >> 3) & 7));
494 if (! (backslash_escapes
495 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
506 for (; *quote_string; quote_string++)
507 STORE (*quote_string);
509 if (len < buffersize)
513 use_shell_always_quoting_style:
514 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
515 shell_always_quoting_style, o);
518 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
519 argument ARG (of size ARGSIZE), using O to control quoting.
520 If O is null, use the default.
521 Terminate the output with a null character, and return the written
522 size of the output, not counting the terminating null.
523 If BUFFERSIZE is too small to store the output string, return the
524 value that would have been returned had BUFFERSIZE been large enough.
525 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
527 quotearg_buffer (char *buffer, size_t buffersize,
528 char const *arg, size_t argsize,
529 struct quoting_options const *o)
531 struct quoting_options const *p = o ? o : &default_quoting_options;
532 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
536 /* Use storage slot N to return a quoted version of argument ARG.
537 ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
538 OPTIONS specifies the quoting options.
539 The returned value points to static storage that can be
540 reused by the next call to this function with the same value of N.
541 N must be nonnegative. N is deliberately declared with type "int"
542 to allow for future extensions (using negative values). */
544 quotearg_n_options (int n, char const *arg, size_t argsize,
545 struct quoting_options const *options)
547 /* Preallocate a slot 0 buffer, so that the caller can always quote
548 one small component of a "memory exhausted" message in slot 0. */
549 static char slot0[256];
550 static unsigned int nslots = 1;
557 static struct slotvec slotvec0 = {sizeof slot0, slot0};
558 static struct slotvec *slotvec = &slotvec0;
565 unsigned int n1 = n0 + 1;
566 size_t s = n1 * sizeof *slotvec;
568 if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
569 && n1 != s / sizeof *slotvec)
572 if (slotvec == &slotvec0)
574 slotvec = (struct slotvec *) xmalloc (sizeof *slotvec);
577 slotvec = (struct slotvec *) xrealloc (slotvec, s);
578 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
583 size_t size = slotvec[n].size;
584 char *val = slotvec[n].val;
585 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
589 slotvec[n].size = size = qsize + 1;
590 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
591 quotearg_buffer (val, size, arg, argsize, options);
599 quotearg_n (int n, char const *arg)
601 return quotearg_n_options (n, arg, (size_t) -1, &default_quoting_options);
605 quotearg (char const *arg)
607 return quotearg_n (0, arg);
610 /* Return quoting options for STYLE, with no extra quoting. */
611 static struct quoting_options
612 quoting_options_from_style (enum quoting_style style)
614 struct quoting_options o;
616 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
621 quotearg_n_style (int n, enum quoting_style s, char const *arg)
623 struct quoting_options const o = quoting_options_from_style (s);
624 return quotearg_n_options (n, arg, (size_t) -1, &o);
628 quotearg_n_style_mem (int n, enum quoting_style s,
629 char const *arg, size_t argsize)
631 struct quoting_options const o = quoting_options_from_style (s);
632 return quotearg_n_options (n, arg, argsize, &o);
636 quotearg_style (enum quoting_style s, char const *arg)
638 return quotearg_n_style (0, s, arg);
642 quotearg_char (char const *arg, char ch)
644 struct quoting_options options;
645 options = default_quoting_options;
646 set_char_quoting (&options, ch, 1);
647 return quotearg_n_options (0, arg, (size_t) -1, &options);
651 quotearg_colon (char const *arg)
653 return quotearg_char (arg, ':');