1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
24 #include <sys/types.h>
32 # define _(text) gettext (text)
45 # define UCHAR_MAX ((unsigned char) -1)
48 #if HAVE_C_BACKSLASH_A
49 # define ALERT_CHAR '\a'
51 # define ALERT_CHAR '\7'
69 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
70 # define mbsinit(ps) 1
73 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
74 other macros are defined only for documentation and to satisfy C
78 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
79 # define mbsinit(ps) 1
80 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
87 # if !defined iswprint && !HAVE_ISWPRINT
88 # define iswprint(wc) 1
92 #define INT_BITS (sizeof (int) * CHAR_BIT)
94 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
95 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
99 # define ISASCII(c) isascii (c)
101 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
103 #define ISPRINT(c) (ISASCII (c) && isprint (c))
105 struct quoting_options
107 /* Basic quoting style. */
108 enum quoting_style style;
110 /* Quote the characters indicated by this bit vector even if the
111 quoting style would not normally require them to be quoted. */
112 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
115 /* Names of quoting styles. */
116 char const *const quoting_style_args[] =
128 /* Correspondences to quoting style names. */
129 enum quoting_style const quoting_style_vals[] =
131 literal_quoting_style,
133 shell_always_quoting_style,
135 escape_quoting_style,
136 locale_quoting_style,
137 clocale_quoting_style
140 /* The default quoting options. */
141 static struct quoting_options default_quoting_options;
143 /* Allocate a new set of quoting options, with contents initially identical
144 to O if O is not null, or to the default if O is null.
145 It is the caller's responsibility to free the result. */
146 struct quoting_options *
147 clone_quoting_options (struct quoting_options *o)
149 struct quoting_options *p
150 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
151 *p = *(o ? o : &default_quoting_options);
155 /* Get the value of O's quoting style. If O is null, use the default. */
157 get_quoting_style (struct quoting_options *o)
159 return (o ? o : &default_quoting_options)->style;
162 /* In O (or in the default if O is null),
163 set the value of the quoting style to S. */
165 set_quoting_style (struct quoting_options *o, enum quoting_style s)
167 (o ? o : &default_quoting_options)->style = s;
170 /* In O (or in the default if O is null),
171 set the value of the quoting options for character C to I.
172 Return the old value. Currently, the only values defined for I are
173 0 (the default) and 1 (which means to quote the character even if
174 it would not otherwise be quoted). */
176 set_char_quoting (struct quoting_options *o, char c, int i)
178 unsigned char uc = c;
179 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
180 int shift = uc % INT_BITS;
181 int r = (*p >> shift) & 1;
182 *p ^= ((i & 1) ^ r) << shift;
186 /* MSGID approximates a quotation mark. Return its translation if it
187 has one; otherwise, return either it or "\"", depending on S. */
189 gettext_quote (char const *msgid, enum quoting_style s)
191 char const *translation = _(msgid);
192 if (translation == msgid && s == clocale_quoting_style)
197 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
198 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
199 non-quoting-style part of O to control quoting.
200 Terminate the output with a null character, and return the written
201 size of the output, not counting the terminating null.
202 If BUFFERSIZE is too small to store the output string, return the
203 value that would have been returned had BUFFERSIZE been large enough.
204 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
206 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
207 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
208 style specified by O, and O may not be null. */
211 quotearg_buffer_restyled (char *buffer, size_t buffersize,
212 char const *arg, size_t argsize,
213 enum quoting_style quoting_style,
214 struct quoting_options const *o)
218 char const *quote_string = 0;
219 size_t quote_string_len = 0;
220 int backslash_escapes = 0;
221 int unibyte_locale = MB_CUR_MAX == 1;
226 if (len < buffersize) \
232 switch (quoting_style)
234 case c_quoting_style:
236 backslash_escapes = 1;
238 quote_string_len = 1;
241 case escape_quoting_style:
242 backslash_escapes = 1;
245 case locale_quoting_style:
246 case clocale_quoting_style:
248 /* Get translations for open and closing quotation marks.
250 The message catalog should translate "`" to a left
251 quotation mark suitable for the locale, and similarly for
252 "'". If the catalog has no translation,
253 locale_quoting_style quotes `like this', and
254 clocale_quoting_style quotes "like this".
256 For example, an American English Unicode locale should
257 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
258 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
259 MARK). A British English Unicode locale should instead
260 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
261 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
263 char const *left = gettext_quote (N_("`"), quoting_style);
264 char const *right = gettext_quote (N_("'"), quoting_style);
265 for (quote_string = left; *quote_string; quote_string++)
266 STORE (*quote_string);
267 backslash_escapes = 1;
268 quote_string = right;
269 quote_string_len = strlen (quote_string);
273 case shell_always_quoting_style:
276 quote_string_len = 1;
283 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
288 if (backslash_escapes
290 && i + quote_string_len <= argsize
291 && memcmp (arg + i, quote_string, quote_string_len) == 0)
298 switch (quoting_style)
300 case shell_quoting_style:
301 goto use_shell_always_quoting_style;
303 case c_quoting_style:
304 if (i + 2 < argsize && arg[i + 1] == '?')
308 case '(': case ')': case '-': case '/':
309 case '<': case '=': case '>':
310 /* Escape the second '?' in what would otherwise be
326 case ALERT_CHAR: esc = 'a'; goto c_escape;
327 case '\b': esc = 'b'; goto c_escape;
328 case '\f': esc = 'f'; goto c_escape;
329 case '\n': esc = 'n'; goto c_and_shell_escape;
330 case '\r': esc = 'r'; goto c_and_shell_escape;
331 case '\t': esc = 't'; goto c_and_shell_escape;
332 case '\v': esc = 'v'; goto c_escape;
333 case '\\': esc = c; goto c_and_shell_escape;
336 if (quoting_style == shell_quoting_style)
337 goto use_shell_always_quoting_style;
339 if (backslash_escapes)
351 case '!': /* special in bash */
352 case '"': case '$': case '&':
353 case '(': case ')': case '*': case ';':
354 case '<': case '>': case '[':
355 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
357 /* A shell special character. In theory, '$' and '`' could
358 be the first bytes of multibyte characters, which means
359 we should check them with mbrtowc, but in practice this
360 doesn't happen so it's not worth worrying about. */
361 if (quoting_style == shell_quoting_style)
362 goto use_shell_always_quoting_style;
366 switch (quoting_style)
368 case shell_quoting_style:
369 goto use_shell_always_quoting_style;
371 case shell_always_quoting_style:
382 case '%': case '+': case ',': case '-': case '.': case '/':
383 case '0': case '1': case '2': case '3': case '4': case '5':
384 case '6': case '7': case '8': case '9': case ':': case '=':
385 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
386 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
387 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
388 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
389 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
390 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
391 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
392 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
393 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
395 /* These characters don't cause problems, no matter what the
396 quoting style is. They cannot start multibyte sequences. */
400 /* If we have a multibyte sequence, copy it until we reach
401 its end, find an error, or come back to the initial shift
402 state. For C-like styles, if the sequence has
403 unprintable characters, escape the whole sequence, since
404 we can't easily escape single characters within it. */
406 /* Length of multibyte sequence found so far. */
414 printable = ISPRINT (c);
419 memset (&mbstate, 0, sizeof mbstate);
423 if (argsize == (size_t) -1)
424 argsize = strlen (arg);
429 size_t bytes = mbrtowc (&w, &arg[i + m],
430 argsize - (i + m), &mbstate);
433 else if (bytes == (size_t) -1)
438 else if (bytes == (size_t) -2)
441 while (i + m < argsize && arg[i + m])
452 while (! mbsinit (&mbstate));
455 if (1 < m || (backslash_escapes && ! printable))
457 /* Output a multibyte sequence, or an escaped
458 unprintable unibyte character. */
463 if (backslash_escapes && ! printable)
466 STORE ('0' + (c >> 6));
467 STORE ('0' + ((c >> 3) & 7));
481 if (! (backslash_escapes
482 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
493 for (; *quote_string; quote_string++)
494 STORE (*quote_string);
496 if (len < buffersize)
500 use_shell_always_quoting_style:
501 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
502 shell_always_quoting_style, o);
505 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
506 argument ARG (of size ARGSIZE), using O to control quoting.
507 If O is null, use the default.
508 Terminate the output with a null character, and return the written
509 size of the output, not counting the terminating null.
510 If BUFFERSIZE is too small to store the output string, return the
511 value that would have been returned had BUFFERSIZE been large enough.
512 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
514 quotearg_buffer (char *buffer, size_t buffersize,
515 char const *arg, size_t argsize,
516 struct quoting_options const *o)
518 struct quoting_options const *p = o ? o : &default_quoting_options;
519 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
523 /* Use storage slot N to return a quoted version of the string ARG.
524 OPTIONS specifies the quoting options.
525 The returned value points to static storage that can be
526 reused by the next call to this function with the same value of N.
527 N must be nonnegative. N is deliberately declared with type "int"
528 to allow for future extensions (using negative values). */
530 quotearg_n_options (int n, char const *arg,
531 struct quoting_options const *options)
533 /* Preallocate a slot 0 buffer, so that the caller can always quote
534 one small component of a "memory exhausted" message in slot 0. */
535 static char slot0[256];
536 static unsigned int nslots = 1;
542 static struct slotvec slotvec0 = {sizeof slot0, slot0};
543 static struct slotvec *slotvec = &slotvec0;
548 size_t s = n1 * sizeof (struct slotvec);
549 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
551 if (slotvec == &slotvec0)
553 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
556 slotvec = (struct slotvec *) xrealloc (slotvec, s);
557 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
562 size_t size = slotvec[n].size;
563 char *val = slotvec[n].val;
564 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
568 slotvec[n].size = size = qsize + 1;
569 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
570 quotearg_buffer (val, size, arg, (size_t) -1, options);
578 quotearg_n (unsigned int n, char const *arg)
580 return quotearg_n_options (n, arg, &default_quoting_options);
584 quotearg (char const *arg)
586 return quotearg_n (0, arg);
590 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
592 struct quoting_options o;
594 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
595 return quotearg_n_options (n, arg, &o);
599 quotearg_style (enum quoting_style s, char const *arg)
601 return quotearg_n_style (0, s, arg);
605 quotearg_char (char const *arg, char ch)
607 struct quoting_options options;
608 options = default_quoting_options;
609 set_char_quoting (&options, ch, 1);
610 return quotearg_n_options (0, arg, &options);
614 quotearg_colon (char const *arg)
616 return quotearg_char (arg, ':');