1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
24 #include <sys/types.h>
32 # define _(text) gettext (text)
45 # define UCHAR_MAX ((unsigned char) -1)
48 #if HAVE_C_BACKSLASH_A
49 # define ALERT_CHAR '\a'
51 # define ALERT_CHAR '\7'
66 #if HAVE_MBRTOWC && HAVE_WCHAR_H
68 # if !HAVE_MBSTATE_T_OBJECT
69 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
72 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
73 # define mbsinit(ps) 1
74 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
81 # if !defined iswprint && !HAVE_ISWPRINT
82 # define iswprint(wc) 1
86 #define INT_BITS (sizeof (int) * CHAR_BIT)
88 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
89 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
93 # define ISASCII(c) isascii (c)
95 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
97 #define ISPRINT(c) (ISASCII (c) && isprint (c))
99 struct quoting_options
101 /* Basic quoting style. */
102 enum quoting_style style;
104 /* Quote the characters indicated by this bit vector even if the
105 quoting style would not normally require them to be quoted. */
106 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
109 /* Names of quoting styles. */
110 char const *const quoting_style_args[] =
122 /* Correspondences to quoting style names. */
123 enum quoting_style const quoting_style_vals[] =
125 literal_quoting_style,
127 shell_always_quoting_style,
129 escape_quoting_style,
130 locale_quoting_style,
131 clocale_quoting_style
134 /* The default quoting options. */
135 static struct quoting_options default_quoting_options;
137 /* Allocate a new set of quoting options, with contents initially identical
138 to O if O is not null, or to the default if O is null.
139 It is the caller's responsibility to free the result. */
140 struct quoting_options *
141 clone_quoting_options (struct quoting_options *o)
143 struct quoting_options *p
144 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
145 *p = *(o ? o : &default_quoting_options);
149 /* Get the value of O's quoting style. If O is null, use the default. */
151 get_quoting_style (struct quoting_options *o)
153 return (o ? o : &default_quoting_options)->style;
156 /* In O (or in the default if O is null),
157 set the value of the quoting style to S. */
159 set_quoting_style (struct quoting_options *o, enum quoting_style s)
161 (o ? o : &default_quoting_options)->style = s;
164 /* In O (or in the default if O is null),
165 set the value of the quoting options for character C to I.
166 Return the old value. Currently, the only values defined for I are
167 0 (the default) and 1 (which means to quote the character even if
168 it would not otherwise be quoted). */
170 set_char_quoting (struct quoting_options *o, char c, int i)
172 unsigned char uc = c;
173 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
174 int shift = uc % INT_BITS;
175 int r = (*p >> shift) & 1;
176 *p ^= ((i & 1) ^ r) << shift;
180 /* MSGID approximates a quotation mark. Return its translation if it
181 has one; otherwise, return either it or "\"", depending on S. */
183 gettext_quote (char const *msgid, enum quoting_style s)
185 char const *translation = _(msgid);
186 if (translation == msgid && s == clocale_quoting_style)
191 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
192 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
193 non-quoting-style part of O to control quoting.
194 Terminate the output with a null character, and return the written
195 size of the output, not counting the terminating null.
196 If BUFFERSIZE is too small to store the output string, return the
197 value that would have been returned had BUFFERSIZE been large enough.
198 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
200 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
201 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
202 style specified by O, and O may not be null. */
205 quotearg_buffer_restyled (char *buffer, size_t buffersize,
206 char const *arg, size_t argsize,
207 enum quoting_style quoting_style,
208 struct quoting_options const *o)
212 char const *quote_string = 0;
213 size_t quote_string_len = 0;
214 int backslash_escapes = 0;
219 if (len < buffersize) \
225 switch (quoting_style)
227 case c_quoting_style:
229 backslash_escapes = 1;
231 quote_string_len = 1;
234 case escape_quoting_style:
235 backslash_escapes = 1;
238 case locale_quoting_style:
239 case clocale_quoting_style:
241 /* Get translations for open and closing quotation marks.
243 The message catalog should translate "`" to a left
244 quotation mark suitable for the locale, and similarly for
245 "'". If the catalog has no translation,
246 locale_quoting_style quotes `like this', and
247 clocale_quoting_style quotes "like this".
249 For example, an American English Unicode locale should
250 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
251 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
252 MARK). A British English Unicode locale should instead
253 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
254 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
256 char const *left = gettext_quote (N_("`"), quoting_style);
257 char const *right = gettext_quote (N_("'"), quoting_style);
258 for (quote_string = left; *quote_string; quote_string++)
259 STORE (*quote_string);
260 backslash_escapes = 1;
261 quote_string = right;
262 quote_string_len = strlen (quote_string);
266 case shell_always_quoting_style:
269 quote_string_len = 1;
276 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
281 if (backslash_escapes
283 && i + quote_string_len <= argsize
284 && memcmp (arg + i, quote_string, quote_string_len) == 0)
291 switch (quoting_style)
293 case shell_quoting_style:
294 goto use_shell_always_quoting_style;
296 case c_quoting_style:
297 if (i + 2 < argsize && arg[i + 1] == '?')
301 case '(': case ')': case '-': case '/':
302 case '<': case '=': case '>':
303 /* Escape the second '?' in what would otherwise be
319 case ALERT_CHAR: esc = 'a'; goto c_escape;
320 case '\b': esc = 'b'; goto c_escape;
321 case '\f': esc = 'f'; goto c_escape;
322 case '\n': esc = 'n'; goto c_and_shell_escape;
323 case '\r': esc = 'r'; goto c_and_shell_escape;
324 case '\t': esc = 't'; goto c_and_shell_escape;
325 case '\v': esc = 'v'; goto c_escape;
326 case '\\': esc = c; goto c_and_shell_escape;
329 if (quoting_style == shell_quoting_style)
330 goto use_shell_always_quoting_style;
332 if (backslash_escapes)
344 case '!': /* special in bash */
345 case '"': case '$': case '&':
346 case '(': case ')': case '*': case ';':
347 case '<': case '>': case '[':
348 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
350 /* A shell special character. In theory, '$' and '`' could
351 be the first bytes of multibyte characters, which means
352 we should check them with mbrtowc, but in practice this
353 doesn't happen so it's not worth worrying about. */
354 if (quoting_style == shell_quoting_style)
355 goto use_shell_always_quoting_style;
359 switch (quoting_style)
361 case shell_quoting_style:
362 goto use_shell_always_quoting_style;
364 case shell_always_quoting_style:
375 case '%': case '+': case ',': case '-': case '.': case '/':
376 case '0': case '1': case '2': case '3': case '4': case '5':
377 case '6': case '7': case '8': case '9': case ':': case '=':
378 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
379 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
380 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
381 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
382 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
383 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
384 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
385 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
386 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
388 /* These characters don't cause problems, no matter what the
389 quoting style is. They cannot start multibyte sequences. */
393 /* If we have a multibyte sequence, copy it until we reach
394 its end, find an error, or come back to the initial shift
395 state. For C-like styles, if the sequence has
396 unprintable characters, escape the whole sequence, since
397 we can't easily escape single characters within it. */
399 /* Length of multibyte sequence found so far. */
404 memset (&mbstate, 0, sizeof mbstate);
406 if (argsize == (size_t) -1)
407 argsize = strlen (arg);
412 size_t bytes = mbrtowc (&w, &arg[i + m],
413 argsize - (i + m), &mbstate);
416 else if (bytes == (size_t) -1)
421 else if (bytes == (size_t) -2)
424 while (i + m < argsize && arg[i + m])
435 while (! mbsinit (&mbstate));
439 /* Escape a unibyte character like a multibyte
440 sequence if using backslash escapes, and if the
441 character is not printable. */
442 m = backslash_escapes && ! ISPRINT (c);
448 /* Output a multibyte sequence, or an escaped
449 unprintable unibyte character. */
450 size_t imax = i + m - 1;
454 if (backslash_escapes && ! printable)
457 STORE ('0' + (c >> 6));
458 STORE ('0' + ((c >> 3) & 7));
472 if (! (backslash_escapes
473 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
484 for (; *quote_string; quote_string++)
485 STORE (*quote_string);
487 if (len < buffersize)
491 use_shell_always_quoting_style:
492 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
493 shell_always_quoting_style, o);
496 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
497 argument ARG (of size ARGSIZE), using O to control quoting.
498 If O is null, use the default.
499 Terminate the output with a null character, and return the written
500 size of the output, not counting the terminating null.
501 If BUFFERSIZE is too small to store the output string, return the
502 value that would have been returned had BUFFERSIZE been large enough.
503 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
505 quotearg_buffer (char *buffer, size_t buffersize,
506 char const *arg, size_t argsize,
507 struct quoting_options const *o)
509 struct quoting_options const *p = o ? o : &default_quoting_options;
510 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
514 /* Use storage slot N to return a quoted version of the string ARG.
515 OPTIONS specifies the quoting options.
516 The returned value points to static storage that can be
517 reused by the next call to this function with the same value of N.
518 N must be nonnegative. N is deliberately declared with type "int"
519 to allow for future extensions (using negative values). */
521 quotearg_n_options (int n, char const *arg,
522 struct quoting_options const *options)
524 static unsigned int nslots;
525 static struct slotvec
534 size_t s = n1 * sizeof (struct slotvec);
535 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
537 slotvec = (struct slotvec *) xrealloc (slotvec, s);
538 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
543 size_t size = slotvec[n].size;
544 char *val = slotvec[n].val;
545 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
549 slotvec[n].size = size = qsize + 1;
550 slotvec[n].val = val = xrealloc (val, size);
551 quotearg_buffer (val, size, arg, (size_t) -1, options);
559 quotearg_n (unsigned int n, char const *arg)
561 return quotearg_n_options (n, arg, &default_quoting_options);
565 quotearg (char const *arg)
567 return quotearg_n (0, arg);
571 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
573 struct quoting_options o;
575 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
576 return quotearg_n_options (n, arg, &o);
580 quotearg_style (enum quoting_style s, char const *arg)
582 return quotearg_n_style (0, s, arg);
586 quotearg_char (char const *arg, char ch)
588 struct quoting_options options;
589 options = default_quoting_options;
590 set_char_quoting (&options, ch, 1);
591 return quotearg_n_options (0, arg, &options);
595 quotearg_colon (char const *arg)
597 return quotearg_char (arg, ':');