1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
27 #include <sys/types.h>
35 # define _(text) gettext (text)
48 # define UCHAR_MAX ((unsigned char) -1)
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
54 # define ALERT_CHAR '\7'
67 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
75 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
76 other macros are defined only for documentation and to satisfy C
80 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
81 # define mbsinit(ps) 1
82 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
89 # if !defined iswprint && !HAVE_ISWPRINT
90 # define iswprint(wc) 1
94 #define INT_BITS (sizeof (int) * CHAR_BIT)
96 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
97 # define IN_CTYPE_DOMAIN(c) 1
99 # define IN_CTYPE_DOMAIN(c) isascii(c)
102 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
104 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
106 struct quoting_options
108 /* Basic quoting style. */
109 enum quoting_style style;
111 /* Quote the characters indicated by this bit vector even if the
112 quoting style would not normally require them to be quoted. */
113 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
116 /* Names of quoting styles. */
117 char const *const quoting_style_args[] =
129 /* Correspondences to quoting style names. */
130 enum quoting_style const quoting_style_vals[] =
132 literal_quoting_style,
134 shell_always_quoting_style,
136 escape_quoting_style,
137 locale_quoting_style,
138 clocale_quoting_style
141 /* The default quoting options. */
142 static struct quoting_options default_quoting_options;
144 /* Allocate a new set of quoting options, with contents initially identical
145 to O if O is not null, or to the default if O is null.
146 It is the caller's responsibility to free the result. */
147 struct quoting_options *
148 clone_quoting_options (struct quoting_options *o)
150 struct quoting_options *p
151 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
152 *p = *(o ? o : &default_quoting_options);
156 /* Get the value of O's quoting style. If O is null, use the default. */
158 get_quoting_style (struct quoting_options *o)
160 return (o ? o : &default_quoting_options)->style;
163 /* In O (or in the default if O is null),
164 set the value of the quoting style to S. */
166 set_quoting_style (struct quoting_options *o, enum quoting_style s)
168 (o ? o : &default_quoting_options)->style = s;
171 /* In O (or in the default if O is null),
172 set the value of the quoting options for character C to I.
173 Return the old value. Currently, the only values defined for I are
174 0 (the default) and 1 (which means to quote the character even if
175 it would not otherwise be quoted). */
177 set_char_quoting (struct quoting_options *o, char c, int i)
179 unsigned char uc = c;
180 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
181 int shift = uc % INT_BITS;
182 int r = (*p >> shift) & 1;
183 *p ^= ((i & 1) ^ r) << shift;
187 /* MSGID approximates a quotation mark. Return its translation if it
188 has one; otherwise, return either it or "\"", depending on S. */
190 gettext_quote (char const *msgid, enum quoting_style s)
192 char const *translation = _(msgid);
193 if (translation == msgid && s == clocale_quoting_style)
198 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
199 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
200 non-quoting-style part of O to control quoting.
201 Terminate the output with a null character, and return the written
202 size of the output, not counting the terminating null.
203 If BUFFERSIZE is too small to store the output string, return the
204 value that would have been returned had BUFFERSIZE been large enough.
205 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
207 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
208 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
209 style specified by O, and O may not be null. */
212 quotearg_buffer_restyled (char *buffer, size_t buffersize,
213 char const *arg, size_t argsize,
214 enum quoting_style quoting_style,
215 struct quoting_options const *o)
219 char const *quote_string = 0;
220 size_t quote_string_len = 0;
221 int backslash_escapes = 0;
222 int unibyte_locale = MB_CUR_MAX == 1;
227 if (len < buffersize) \
233 switch (quoting_style)
235 case c_quoting_style:
237 backslash_escapes = 1;
239 quote_string_len = 1;
242 case escape_quoting_style:
243 backslash_escapes = 1;
246 case locale_quoting_style:
247 case clocale_quoting_style:
249 /* Get translations for open and closing quotation marks.
251 The message catalog should translate "`" to a left
252 quotation mark suitable for the locale, and similarly for
253 "'". If the catalog has no translation,
254 locale_quoting_style quotes `like this', and
255 clocale_quoting_style quotes "like this".
257 For example, an American English Unicode locale should
258 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
259 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
260 MARK). A British English Unicode locale should instead
261 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
262 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
264 char const *left = gettext_quote (N_("`"), quoting_style);
265 char const *right = gettext_quote (N_("'"), quoting_style);
266 for (quote_string = left; *quote_string; quote_string++)
267 STORE (*quote_string);
268 backslash_escapes = 1;
269 quote_string = right;
270 quote_string_len = strlen (quote_string);
274 case shell_always_quoting_style:
277 quote_string_len = 1;
284 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
289 if (backslash_escapes
291 && i + quote_string_len <= argsize
292 && memcmp (arg + i, quote_string, quote_string_len) == 0)
299 switch (quoting_style)
301 case shell_quoting_style:
302 goto use_shell_always_quoting_style;
304 case c_quoting_style:
305 if (i + 2 < argsize && arg[i + 1] == '?')
309 case '(': case ')': case '-': case '/':
310 case '<': case '=': case '>':
311 /* Escape the second '?' in what would otherwise be
327 case ALERT_CHAR: esc = 'a'; goto c_escape;
328 case '\b': esc = 'b'; goto c_escape;
329 case '\f': esc = 'f'; goto c_escape;
330 case '\n': esc = 'n'; goto c_and_shell_escape;
331 case '\r': esc = 'r'; goto c_and_shell_escape;
332 case '\t': esc = 't'; goto c_and_shell_escape;
333 case '\v': esc = 'v'; goto c_escape;
334 case '\\': esc = c; goto c_and_shell_escape;
337 if (quoting_style == shell_quoting_style)
338 goto use_shell_always_quoting_style;
340 if (backslash_escapes)
352 case '!': /* special in bash */
353 case '"': case '$': case '&':
354 case '(': case ')': case '*': case ';':
355 case '<': case '>': case '[':
356 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
358 /* A shell special character. In theory, '$' and '`' could
359 be the first bytes of multibyte characters, which means
360 we should check them with mbrtowc, but in practice this
361 doesn't happen so it's not worth worrying about. */
362 if (quoting_style == shell_quoting_style)
363 goto use_shell_always_quoting_style;
367 switch (quoting_style)
369 case shell_quoting_style:
370 goto use_shell_always_quoting_style;
372 case shell_always_quoting_style:
383 case '%': case '+': case ',': case '-': case '.': case '/':
384 case '0': case '1': case '2': case '3': case '4': case '5':
385 case '6': case '7': case '8': case '9': case ':': case '=':
386 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
387 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
388 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
389 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
390 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
391 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
392 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
393 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
394 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
396 /* These characters don't cause problems, no matter what the
397 quoting style is. They cannot start multibyte sequences. */
401 /* If we have a multibyte sequence, copy it until we reach
402 its end, find an error, or come back to the initial shift
403 state. For C-like styles, if the sequence has
404 unprintable characters, escape the whole sequence, since
405 we can't easily escape single characters within it. */
407 /* Length of multibyte sequence found so far. */
415 printable = ISPRINT (c);
420 memset (&mbstate, 0, sizeof mbstate);
424 if (argsize == (size_t) -1)
425 argsize = strlen (arg);
430 size_t bytes = mbrtowc (&w, &arg[i + m],
431 argsize - (i + m), &mbstate);
434 else if (bytes == (size_t) -1)
439 else if (bytes == (size_t) -2)
442 while (i + m < argsize && arg[i + m])
453 while (! mbsinit (&mbstate));
456 if (1 < m || (backslash_escapes && ! printable))
458 /* Output a multibyte sequence, or an escaped
459 unprintable unibyte character. */
464 if (backslash_escapes && ! printable)
467 STORE ('0' + (c >> 6));
468 STORE ('0' + ((c >> 3) & 7));
482 if (! (backslash_escapes
483 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
494 for (; *quote_string; quote_string++)
495 STORE (*quote_string);
497 if (len < buffersize)
501 use_shell_always_quoting_style:
502 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
503 shell_always_quoting_style, o);
506 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
507 argument ARG (of size ARGSIZE), using O to control quoting.
508 If O is null, use the default.
509 Terminate the output with a null character, and return the written
510 size of the output, not counting the terminating null.
511 If BUFFERSIZE is too small to store the output string, return the
512 value that would have been returned had BUFFERSIZE been large enough.
513 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
515 quotearg_buffer (char *buffer, size_t buffersize,
516 char const *arg, size_t argsize,
517 struct quoting_options const *o)
519 struct quoting_options const *p = o ? o : &default_quoting_options;
520 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
524 /* Use storage slot N to return a quoted version of the string ARG.
525 OPTIONS specifies the quoting options.
526 The returned value points to static storage that can be
527 reused by the next call to this function with the same value of N.
528 N must be nonnegative. N is deliberately declared with type "int"
529 to allow for future extensions (using negative values). */
531 quotearg_n_options (int n, char const *arg,
532 struct quoting_options const *options)
534 /* Preallocate a slot 0 buffer, so that the caller can always quote
535 one small component of a "memory exhausted" message in slot 0. */
536 static char slot0[256];
537 static unsigned int nslots = 1;
543 static struct slotvec slotvec0 = {sizeof slot0, slot0};
544 static struct slotvec *slotvec = &slotvec0;
549 size_t s = n1 * sizeof (struct slotvec);
550 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
552 if (slotvec == &slotvec0)
554 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
557 slotvec = (struct slotvec *) xrealloc (slotvec, s);
558 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
563 size_t size = slotvec[n].size;
564 char *val = slotvec[n].val;
565 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
569 slotvec[n].size = size = qsize + 1;
570 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
571 quotearg_buffer (val, size, arg, (size_t) -1, options);
579 quotearg_n (unsigned int n, char const *arg)
581 return quotearg_n_options (n, arg, &default_quoting_options);
585 quotearg (char const *arg)
587 return quotearg_n (0, arg);
591 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
593 struct quoting_options o;
595 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
596 return quotearg_n_options (n, arg, &o);
600 quotearg_style (enum quoting_style s, char const *arg)
602 return quotearg_n_style (0, s, arg);
606 quotearg_char (char const *arg, char ch)
608 struct quoting_options options;
609 options = default_quoting_options;
610 set_char_quoting (&options, ch, 1);
611 return quotearg_n_options (0, arg, &options);
615 quotearg_colon (char const *arg)
617 return quotearg_char (arg, ':');