1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
24 #include <sys/types.h>
32 # define _(text) gettext (text)
45 # define UCHAR_MAX ((unsigned char) -1)
48 #if HAVE_C_BACKSLASH_A
49 # define ALERT_CHAR '\a'
51 # define ALERT_CHAR '\7'
69 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
70 # define mbsinit(ps) 1
73 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
74 # define mbsinit(ps) 1
75 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
82 # if !defined iswprint && !HAVE_ISWPRINT
83 # define iswprint(wc) 1
87 #define INT_BITS (sizeof (int) * CHAR_BIT)
89 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
90 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
94 # define ISASCII(c) isascii (c)
96 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
98 #define ISPRINT(c) (ISASCII (c) && isprint (c))
100 struct quoting_options
102 /* Basic quoting style. */
103 enum quoting_style style;
105 /* Quote the characters indicated by this bit vector even if the
106 quoting style would not normally require them to be quoted. */
107 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
110 /* Names of quoting styles. */
111 char const *const quoting_style_args[] =
123 /* Correspondences to quoting style names. */
124 enum quoting_style const quoting_style_vals[] =
126 literal_quoting_style,
128 shell_always_quoting_style,
130 escape_quoting_style,
131 locale_quoting_style,
132 clocale_quoting_style
135 /* The default quoting options. */
136 static struct quoting_options default_quoting_options;
138 /* Allocate a new set of quoting options, with contents initially identical
139 to O if O is not null, or to the default if O is null.
140 It is the caller's responsibility to free the result. */
141 struct quoting_options *
142 clone_quoting_options (struct quoting_options *o)
144 struct quoting_options *p
145 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
146 *p = *(o ? o : &default_quoting_options);
150 /* Get the value of O's quoting style. If O is null, use the default. */
152 get_quoting_style (struct quoting_options *o)
154 return (o ? o : &default_quoting_options)->style;
157 /* In O (or in the default if O is null),
158 set the value of the quoting style to S. */
160 set_quoting_style (struct quoting_options *o, enum quoting_style s)
162 (o ? o : &default_quoting_options)->style = s;
165 /* In O (or in the default if O is null),
166 set the value of the quoting options for character C to I.
167 Return the old value. Currently, the only values defined for I are
168 0 (the default) and 1 (which means to quote the character even if
169 it would not otherwise be quoted). */
171 set_char_quoting (struct quoting_options *o, char c, int i)
173 unsigned char uc = c;
174 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
175 int shift = uc % INT_BITS;
176 int r = (*p >> shift) & 1;
177 *p ^= ((i & 1) ^ r) << shift;
181 /* MSGID approximates a quotation mark. Return its translation if it
182 has one; otherwise, return either it or "\"", depending on S. */
184 gettext_quote (char const *msgid, enum quoting_style s)
186 char const *translation = _(msgid);
187 if (translation == msgid && s == clocale_quoting_style)
192 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
193 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
194 non-quoting-style part of O to control quoting.
195 Terminate the output with a null character, and return the written
196 size of the output, not counting the terminating null.
197 If BUFFERSIZE is too small to store the output string, return the
198 value that would have been returned had BUFFERSIZE been large enough.
199 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
201 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
202 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
203 style specified by O, and O may not be null. */
206 quotearg_buffer_restyled (char *buffer, size_t buffersize,
207 char const *arg, size_t argsize,
208 enum quoting_style quoting_style,
209 struct quoting_options const *o)
213 char const *quote_string = 0;
214 size_t quote_string_len = 0;
215 int backslash_escapes = 0;
220 if (len < buffersize) \
226 switch (quoting_style)
228 case c_quoting_style:
230 backslash_escapes = 1;
232 quote_string_len = 1;
235 case escape_quoting_style:
236 backslash_escapes = 1;
239 case locale_quoting_style:
240 case clocale_quoting_style:
242 /* Get translations for open and closing quotation marks.
244 The message catalog should translate "`" to a left
245 quotation mark suitable for the locale, and similarly for
246 "'". If the catalog has no translation,
247 locale_quoting_style quotes `like this', and
248 clocale_quoting_style quotes "like this".
250 For example, an American English Unicode locale should
251 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
252 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
253 MARK). A British English Unicode locale should instead
254 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
255 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
257 char const *left = gettext_quote (N_("`"), quoting_style);
258 char const *right = gettext_quote (N_("'"), quoting_style);
259 for (quote_string = left; *quote_string; quote_string++)
260 STORE (*quote_string);
261 backslash_escapes = 1;
262 quote_string = right;
263 quote_string_len = strlen (quote_string);
267 case shell_always_quoting_style:
270 quote_string_len = 1;
277 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
282 if (backslash_escapes
284 && i + quote_string_len <= argsize
285 && memcmp (arg + i, quote_string, quote_string_len) == 0)
292 switch (quoting_style)
294 case shell_quoting_style:
295 goto use_shell_always_quoting_style;
297 case c_quoting_style:
298 if (i + 2 < argsize && arg[i + 1] == '?')
302 case '(': case ')': case '-': case '/':
303 case '<': case '=': case '>':
304 /* Escape the second '?' in what would otherwise be
320 case ALERT_CHAR: esc = 'a'; goto c_escape;
321 case '\b': esc = 'b'; goto c_escape;
322 case '\f': esc = 'f'; goto c_escape;
323 case '\n': esc = 'n'; goto c_and_shell_escape;
324 case '\r': esc = 'r'; goto c_and_shell_escape;
325 case '\t': esc = 't'; goto c_and_shell_escape;
326 case '\v': esc = 'v'; goto c_escape;
327 case '\\': esc = c; goto c_and_shell_escape;
330 if (quoting_style == shell_quoting_style)
331 goto use_shell_always_quoting_style;
333 if (backslash_escapes)
345 case '!': /* special in bash */
346 case '"': case '$': case '&':
347 case '(': case ')': case '*': case ';':
348 case '<': case '>': case '[':
349 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
351 /* A shell special character. In theory, '$' and '`' could
352 be the first bytes of multibyte characters, which means
353 we should check them with mbrtowc, but in practice this
354 doesn't happen so it's not worth worrying about. */
355 if (quoting_style == shell_quoting_style)
356 goto use_shell_always_quoting_style;
360 switch (quoting_style)
362 case shell_quoting_style:
363 goto use_shell_always_quoting_style;
365 case shell_always_quoting_style:
376 case '%': case '+': case ',': case '-': case '.': case '/':
377 case '0': case '1': case '2': case '3': case '4': case '5':
378 case '6': case '7': case '8': case '9': case ':': case '=':
379 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
380 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
381 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
382 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
383 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
384 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
385 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
386 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
387 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
389 /* These characters don't cause problems, no matter what the
390 quoting style is. They cannot start multibyte sequences. */
394 /* If we have a multibyte sequence, copy it until we reach
395 its end, find an error, or come back to the initial shift
396 state. For C-like styles, if the sequence has
397 unprintable characters, escape the whole sequence, since
398 we can't easily escape single characters within it. */
400 /* Length of multibyte sequence found so far. */
405 memset (&mbstate, 0, sizeof mbstate);
407 if (argsize == (size_t) -1)
408 argsize = strlen (arg);
413 size_t bytes = mbrtowc (&w, &arg[i + m],
414 argsize - (i + m), &mbstate);
417 else if (bytes == (size_t) -1)
422 else if (bytes == (size_t) -2)
425 while (i + m < argsize && arg[i + m])
436 while (! mbsinit (&mbstate));
440 /* Escape a unibyte character like a multibyte
441 sequence if using backslash escapes, and if the
442 character is not printable. */
443 m = backslash_escapes && ! ISPRINT (c);
449 /* Output a multibyte sequence, or an escaped
450 unprintable unibyte character. */
451 size_t imax = i + m - 1;
455 if (backslash_escapes && ! printable)
458 STORE ('0' + (c >> 6));
459 STORE ('0' + ((c >> 3) & 7));
473 if (! (backslash_escapes
474 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
485 for (; *quote_string; quote_string++)
486 STORE (*quote_string);
488 if (len < buffersize)
492 use_shell_always_quoting_style:
493 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
494 shell_always_quoting_style, o);
497 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
498 argument ARG (of size ARGSIZE), using O to control quoting.
499 If O is null, use the default.
500 Terminate the output with a null character, and return the written
501 size of the output, not counting the terminating null.
502 If BUFFERSIZE is too small to store the output string, return the
503 value that would have been returned had BUFFERSIZE been large enough.
504 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
506 quotearg_buffer (char *buffer, size_t buffersize,
507 char const *arg, size_t argsize,
508 struct quoting_options const *o)
510 struct quoting_options const *p = o ? o : &default_quoting_options;
511 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
515 /* Use storage slot N to return a quoted version of the string ARG.
516 OPTIONS specifies the quoting options.
517 The returned value points to static storage that can be
518 reused by the next call to this function with the same value of N.
519 N must be nonnegative. N is deliberately declared with type "int"
520 to allow for future extensions (using negative values). */
522 quotearg_n_options (int n, char const *arg,
523 struct quoting_options const *options)
525 static unsigned int nslots;
526 static struct slotvec
535 size_t s = n1 * sizeof (struct slotvec);
536 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
538 slotvec = (struct slotvec *) xrealloc (slotvec, s);
539 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
544 size_t size = slotvec[n].size;
545 char *val = slotvec[n].val;
546 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
550 slotvec[n].size = size = qsize + 1;
551 slotvec[n].val = val = xrealloc (val, size);
552 quotearg_buffer (val, size, arg, (size_t) -1, options);
560 quotearg_n (unsigned int n, char const *arg)
562 return quotearg_n_options (n, arg, &default_quoting_options);
566 quotearg (char const *arg)
568 return quotearg_n (0, arg);
572 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
574 struct quoting_options o;
576 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
577 return quotearg_n_options (n, arg, &o);
581 quotearg_style (enum quoting_style s, char const *arg)
583 return quotearg_n_style (0, s, arg);
587 quotearg_char (char const *arg, char ch)
589 struct quoting_options options;
590 options = default_quoting_options;
591 set_char_quoting (&options, ch, 1);
592 return quotearg_n_options (0, arg, &options);
596 quotearg_colon (char const *arg)
598 return quotearg_char (arg, ':');