From 12247f770487cd389e515a51d6bfeb250f26e519 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Sun, 23 Aug 2009 18:24:53 -0400 Subject: [PATCH] quotearg: implement custom_quoting_style * lib/quotearg.c: (struct quoting_options): Add left_quote and right_quote fields. (set_custom_quoting): New public function. (quotearg_buffer_restyled): Add left_quote and right_quote arguments, handle them very much like locale quoting, and update all uses. (quotearg_n_custom): New public function. (quotearg_n_custom_mem): New public function. (quotearg_custom): New public function. (quotearg_custom_mem): New public function. * lib/quotearg.h: Prototype and document new public functions. (enum quoting_style): For escape_quoting_style and clocale_quoting_style, comment that QA_SPLIT_TRIGRAPHS is ignored even though they're otherwise like c_quoting_style. Add custom_quoting_style member and document with comparison to clocale_quoting_style. * tests/test-quotearg.c (custom_quotes): New array. (custom_results): New array. (main): Extend to test custom quoting. --- ChangeLog | 23 ++++++++ lib/quotearg.c | 141 ++++++++++++++++++++++++++++++++++++++------------ lib/quotearg.h | 87 +++++++++++++++++++++++++++++-- tests/test-quotearg.c | 98 +++++++++++++++++++++++++++++++++++ 4 files changed, 314 insertions(+), 35 deletions(-) diff --git a/ChangeLog b/ChangeLog index 57f5f32e2..c71b417c8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +2009-08-23 Joel E. Denny + + quotearg: implement custom_quoting_style + * lib/quotearg.c: (struct quoting_options): Add left_quote and + right_quote fields. + (set_custom_quoting): New public function. + (quotearg_buffer_restyled): Add left_quote and right_quote + arguments, handle them very much like locale quoting, and update + all uses. + (quotearg_n_custom): New public function. + (quotearg_n_custom_mem): New public function. + (quotearg_custom): New public function. + (quotearg_custom_mem): New public function. + * lib/quotearg.h: Prototype and document new public functions. + (enum quoting_style): For escape_quoting_style and + clocale_quoting_style, comment that QA_SPLIT_TRIGRAPHS is + ignored even though they're otherwise like c_quoting_style. + Add custom_quoting_style member and document with comparison to + clocale_quoting_style. + * tests/test-quotearg.c (custom_quotes): New array. + (custom_results): New array. + (main): Extend to test custom quoting. + 2009-08-22 Joel E. Denny quotearg: fix right quote escaping when it's in quote_these_too diff --git a/lib/quotearg.c b/lib/quotearg.c index 3f9e628cf..314c627be 100644 --- a/lib/quotearg.c +++ b/lib/quotearg.c @@ -54,6 +54,12 @@ struct quoting_options /* Quote the characters indicated by this bit vector even if the quoting style would not normally require them to be quoted. */ unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; + + /* The left quote for custom_quoting_style. */ + char const *left_quote; + + /* The right quote for custom_quoting_style. */ + char const *right_quote; }; /* Names of quoting styles. */ @@ -146,6 +152,19 @@ set_quoting_flags (struct quoting_options *o, int i) return r; } +void +set_custom_quoting (struct quoting_options *o, + char const *left_quote, char const *right_quote) +{ + if (!o) + o = &default_quoting_options; + o->style = custom_quoting_style; + if (!left_quote || !right_quote) + abort (); + o->left_quote = left_quote; + o->right_quote = right_quote; +} + /* Return quoting options for STYLE, with no extra quoting. */ static struct quoting_options quoting_options_from_style (enum quoting_style style) @@ -185,7 +204,9 @@ static size_t quotearg_buffer_restyled (char *buffer, size_t buffersize, char const *arg, size_t argsize, enum quoting_style quoting_style, int flags, - unsigned int const *quote_these_too) + unsigned int const *quote_these_too, + char const *left_quote, + char const *right_quote) { size_t i; size_t len = 0; @@ -225,34 +246,37 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, case locale_quoting_style: case clocale_quoting_style: + case custom_quoting_style: { - /* TRANSLATORS: - Get translations for open and closing quotation marks. - - The message catalog should translate "`" to a left - quotation mark suitable for the locale, and similarly for - "'". If the catalog has no translation, - locale_quoting_style quotes `like this', and - clocale_quoting_style quotes "like this". - - For example, an American English Unicode locale should - translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and - should translate "'" to U+201D (RIGHT DOUBLE QUOTATION - MARK). A British English Unicode locale should instead - translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and - U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. - - If you don't know what to put here, please see - - and use glyphs suitable for your language. */ - - char const *left = gettext_quote (N_("`"), quoting_style); - char const *right = gettext_quote (N_("'"), quoting_style); + if (quoting_style != custom_quoting_style) + { + /* TRANSLATORS: + Get translations for open and closing quotation marks. + + The message catalog should translate "`" to a left + quotation mark suitable for the locale, and similarly for + "'". If the catalog has no translation, + locale_quoting_style quotes `like this', and + clocale_quoting_style quotes "like this". + + For example, an American English Unicode locale should + translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and + should translate "'" to U+201D (RIGHT DOUBLE QUOTATION + MARK). A British English Unicode locale should instead + translate these to U+2018 (LEFT SINGLE QUOTATION MARK) + and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. + + If you don't know what to put here, please see + + and use glyphs suitable for your language. */ + left_quote = gettext_quote (N_("`"), quoting_style); + right_quote = gettext_quote (N_("'"), quoting_style); + } if (!elide_outer_quotes) - for (quote_string = left; *quote_string; quote_string++) + for (quote_string = left_quote; *quote_string; quote_string++) STORE (*quote_string); backslash_escapes = true; - quote_string = right; + quote_string = right_quote; quote_string_len = strlen (quote_string); } break; @@ -301,6 +325,11 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, if (elide_outer_quotes) goto force_outer_quoting_style; STORE ('\\'); + /* If quote_string were to begin with digits, we'd need to + test for the end of the arg as well. However, it's + hard to imagine any locale that would use digits in + quotes, and set_custom_quoting is documented not to + accept them. */ if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9') { STORE ('0'); @@ -426,7 +455,13 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': /* These characters don't cause problems, no matter what the - quoting style is. They cannot start multibyte sequences. */ + quoting style is. They cannot start multibyte sequences. + A digit or a special letter would cause trouble if it + appeared at the beginning of quote_string because we'd then + escape by prepending a backslash. However, it's hard to + imagine any locale that would use digits or letters as + quotes, and set_custom_quoting is documented not to accept + them. */ break; default: @@ -570,7 +605,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, sufficiently quotes the specified characters. */ return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, quoting_style, - flags & ~QA_ELIDE_OUTER_QUOTES, NULL); + flags & ~QA_ELIDE_OUTER_QUOTES, NULL, + left_quote, right_quote); } /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of @@ -590,7 +626,8 @@ quotearg_buffer (char *buffer, size_t buffersize, struct quoting_options const *p = o ? o : &default_quoting_options; int e = errno; size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, - p->style, p->flags, p->quote_these_too); + p->style, p->flags, p->quote_these_too, + p->left_quote, p->right_quote); errno = e; return r; } @@ -618,10 +655,13 @@ quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size, /* Elide embedded null bytes if we can't return a size. */ int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES); size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style, - flags, p->quote_these_too) + 1; + flags, p->quote_these_too, + p->left_quote, + p->right_quote) + 1; char *buf = xcharalloc (bufsize); quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags, - p->quote_these_too); + p->quote_these_too, + p->left_quote, p->right_quote); errno = e; if (size) *size = bufsize - 1; @@ -710,7 +750,9 @@ quotearg_n_options (int n, char const *arg, size_t argsize, int flags = options->flags | QA_ELIDE_NULL_BYTES; size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize, options->style, flags, - options->quote_these_too); + options->quote_these_too, + options->left_quote, + options->right_quote); if (size <= qsize) { @@ -719,7 +761,9 @@ quotearg_n_options (int n, char const *arg, size_t argsize, free (val); sv[n].val = val = xcharalloc (size); quotearg_buffer_restyled (val, size, arg, argsize, options->style, - flags, options->quote_these_too); + flags, options->quote_these_too, + options->left_quote, + options->right_quote); } errno = e; @@ -804,3 +848,36 @@ quotearg_colon_mem (char const *arg, size_t argsize) { return quotearg_char_mem (arg, argsize, ':'); } + +char * +quotearg_n_custom (int n, char const *left_quote, + char const *right_quote, char const *arg) +{ + return quotearg_n_custom_mem (n, left_quote, right_quote, arg, + SIZE_MAX); +} + +char * +quotearg_n_custom_mem (int n, char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize) +{ + struct quoting_options o = default_quoting_options; + set_custom_quoting (&o, left_quote, right_quote); + return quotearg_n_options (n, arg, argsize, &o); +} + +char * +quotearg_custom (char const *left_quote, char const *right_quote, + char const *arg) +{ + return quotearg_n_custom (0, left_quote, right_quote, arg); +} + +char * +quotearg_custom_mem (char const *left_quote, char const *right_quote, + char const *arg, size_t argsize) +{ + return quotearg_n_custom_mem (0, left_quote, right_quote, arg, + argsize); +} diff --git a/lib/quotearg.h b/lib/quotearg.h index 770010700..e9f6f56a3 100644 --- a/lib/quotearg.h +++ b/lib/quotearg.h @@ -100,7 +100,8 @@ enum quoting_style c_maybe_quoting_style, /* Like c_quoting_style except always omit the surrounding - double-quote characters (ls --quoting-style=escape). + double-quote characters and ignore QA_SPLIT_TRIGRAPHS + (ls --quoting-style=escape). quotearg_buffer: "simple", "\\0 \\t\\n'\"\\033??/\\\\", "a:b" @@ -136,7 +137,8 @@ enum quoting_style locale_quoting_style, /* Like c_quoting_style except use quotation marks appropriate for - the locale (ls --quoting-style=clocale). + the locale and ignore QA_SPLIT_TRIGRAPHS + (ls --quoting-style=clocale). LC_MESSAGES=C quotearg_buffer: @@ -157,7 +159,50 @@ enum quoting_style "\302\253simple\302\273", "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a\\:b\302\273" */ - clocale_quoting_style + clocale_quoting_style, + + /* Like clocale_quoting_style except use the custom quotation marks + set by set_custom_quoting. If custom quotation marks are not + set, the behavior is undefined. + + left_quote = right_quote = "'" + quotearg_buffer: + "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a:b'" + quotearg: + "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a:b'" + quotearg_colon: + "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a\\:b'" + + left_quote = "(" and right_quote = ")" + quotearg_buffer: + "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a:b)" + quotearg: + "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a:b)" + quotearg_colon: + "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a\\:b)" + + left_quote = ":" and right_quote = " " + quotearg_buffer: + ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a:b " + quotearg: + ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a:b " + quotearg_colon: + ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a\\:b " + + left_quote = "\"'" and right_quote = "'\"" + Notice that this is treated as a single level of quotes or two + levels where the outer quote need not be escaped within the inner + quotes. For two levels where the outer quote must be escaped + within the inner quotes, you must use separate quotearg + invocations. + quotearg_buffer: + "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a:b'\"" + quotearg: + "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a:b'\"" + quotearg_colon: + "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a\\:b'\"" + */ + custom_quoting_style }; /* Flags for use in set_quoting_flags. */ @@ -219,6 +264,19 @@ int set_char_quoting (struct quoting_options *o, char c, int i); behavior. Return the old value. */ int set_quoting_flags (struct quoting_options *o, int i); +/* In O (or in the default if O is null), + set the value of the quoting style to custom_quoting_style, + set the left quote to LEFT_QUOTE, and set the right quote to + RIGHT_QUOTE. Each of LEFT_QUOTE and RIGHT_QUOTE must be + null-terminated and can be the empty string. Because backslashes are + used for escaping, it does not make sense for RIGHT_QUOTE to contain + a backslash. RIGHT_QUOTE must not begin with a digit or a letter + that has special meaning after a backslash (for example, "\t" for + tab). */ +void set_custom_quoting (struct quoting_options *o, + char const *left_quote, + char const *right_quote); + /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of argument ARG (of size ARGSIZE), using O to control quoting. If O is null, use the default. @@ -299,6 +357,29 @@ char *quotearg_colon (char const *arg); /* Like quotearg_colon (ARG), except it can quote null bytes. */ char *quotearg_colon_mem (char const *arg, size_t argsize); +/* Like quotearg_n_style (N, S, ARG) but with S as custom_quoting_style + with left quote as LEFT_QUOTE and right quote as RIGHT_QUOTE. See + set_custom_quoting for a description of acceptable LEFT_QUOTE and + RIGHT_QUOTE values. */ +char *quotearg_n_custom (int n, char const *left_quote, + char const *right_quote, char const *arg); + +/* Like quotearg_n_custom (N, LEFT_QUOTE, RIGHT_QUOTE, ARG) except it + can quote null bytes. */ +char *quotearg_n_custom_mem (int n, char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize); + +/* Equivalent to quotearg_n_custom (0, LEFT_QUOTE, RIGHT_QUOTE, ARG). */ +char *quotearg_custom (char const *left_quote, char const *right_quote, + char const *arg); + +/* Equivalent to quotearg_n_custom_mem (0, LEFT_QUOTE, RIGHT_QUOTE, ARG, + ARGSIZE). */ +char *quotearg_custom_mem (char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize); + /* Free any dynamically allocated memory. */ void quotearg_free (void); diff --git a/tests/test-quotearg.c b/tests/test-quotearg.c index 2e2c56b0d..65779ade5 100644 --- a/tests/test-quotearg.c +++ b/tests/test-quotearg.c @@ -199,6 +199,95 @@ static struct result_groups locale_results[] = { #endif /* ENABLE_NLS */ +static char const *custom_quotes[][2] = { + { "", "" }, + { "'", "'" }, + { "(", ")" }, + { ":", " " }, + { " ", ":" }, + { "# ", "\n" }, + { "\"'", "'\"" } +}; + +static struct result_groups custom_results[] = { + /* left_quote = right_quote = "" */ + { { "", "\\0001\\0", 7, "simple", + " \\t\\n'\"\\033?""?/\\\\", "a:b", "a\\\\b", + LQ_ENC RQ_ENC }, + { "", "\\0001\\0", 7, "simple", + " \\t\\n'\"\\033?""?/\\\\", "a:b", "a\\\\b", + LQ_ENC RQ_ENC }, + { "", "\\0001\\0", 7, "simple", + " \\t\\n'\"\\033?""?/\\\\", "a\\:b", "a\\\\b", + LQ_ENC RQ_ENC } }, + + /* left_quote = right_quote = "'" */ + { { "''", "'\\0001\\0'", 9, "'simple'", + "' \\t\\n\\'\"\\033?""?/\\\\'", "'a:b'", "'a\\\\b'", + "'" LQ_ENC RQ_ENC "'" }, + { "''", "'\\0001\\0'", 9, "'simple'", + "' \\t\\n\\'\"\\033?""?/\\\\'", "'a:b'", "'a\\\\b'", + "'" LQ_ENC RQ_ENC "'" }, + { "''", "'\\0001\\0'", 9, "'simple'", + "' \\t\\n\\'\"\\033?""?/\\\\'", "'a\\:b'", "'a\\\\b'", + "'" LQ_ENC RQ_ENC "'" } }, + + /* left_quote = "(" and right_quote = ")" */ + { { "()", "(\\0001\\0)", 9, "(simple)", + "( \\t\\n'\"\\033?""?/\\\\)", "(a:b)", "(a\\\\b)", + "(" LQ_ENC RQ_ENC ")" }, + { "()", "(\\0001\\0)", 9, "(simple)", + "( \\t\\n'\"\\033?""?/\\\\)", "(a:b)", "(a\\\\b)", + "(" LQ_ENC RQ_ENC ")" }, + { "()", "(\\0001\\0)", 9, "(simple)", + "( \\t\\n'\"\\033?""?/\\\\)", "(a\\:b)", "(a\\\\b)", + "(" LQ_ENC RQ_ENC ")" } }, + + /* left_quote = ":" and right_quote = " " */ + { { ": ", ":\\0001\\0 ", 9, ":simple ", + ":\\ \\t\\n'\"\\033?""?/\\\\ ", ":a:b ", ":a\\\\b ", + ":" LQ_ENC RQ_ENC " " }, + { ": ", ":\\0001\\0 ", 9, ":simple ", + ":\\ \\t\\n'\"\\033?""?/\\\\ ", ":a:b ", ":a\\\\b ", + ":" LQ_ENC RQ_ENC " " }, + { ": ", ":\\0001\\0 ", 9, ":simple ", + ":\\ \\t\\n'\"\\033?""?/\\\\ ", ":a\\:b ", ":a\\\\b ", + ":" LQ_ENC RQ_ENC " " } }, + + /* left_quote = " " and right_quote = ":" */ + { { " :", " \\0001\\0:", 9, " simple:", + " \\t\\n'\"\\033?""?/\\\\:", " a\\:b:", " a\\\\b:", + " " LQ_ENC RQ_ENC ":" }, + { " :", " \\0001\\0:", 9, " simple:", + " \\t\\n'\"\\033?""?/\\\\:", " a\\:b:", " a\\\\b:", + " " LQ_ENC RQ_ENC ":" }, + { " :", " \\0001\\0:", 9, " simple:", + " \\t\\n'\"\\033?""?/\\\\:", " a\\:b:", " a\\\\b:", + " " LQ_ENC RQ_ENC ":" } }, + + /* left_quote = "# " and right_quote = "\n" */ + { { "# \n", "# \\0001\\0\n", 10, "# simple\n", + "# \\t\\n'\"\\033?""?/\\\\\n", "# a:b\n", "# a\\\\b\n", + "# " LQ_ENC RQ_ENC "\n" }, + { "# \n", "# \\0001\\0\n", 10, "# simple\n", + "# \\t\\n'\"\\033?""?/\\\\\n", "# a:b\n", "# a\\\\b\n", + "# " LQ_ENC RQ_ENC "\n" }, + { "# \n", "# \\0001\\0\n", 10, "# simple\n", + "# \\t\\n'\"\\033?""?/\\\\\n", "# a\\:b\n", "# a\\\\b\n", + "# " LQ_ENC RQ_ENC "\n" } }, + + /* left_quote = "\"'" and right_quote = "'\"" */ + { { "\"''\"", "\"'\\0001\\0'\"", 11, "\"'simple'\"", + "\"' \\t\\n\\'\"\\033?""?/\\\\'\"", "\"'a:b'\"", "\"'a\\\\b'\"", + "\"'" LQ_ENC RQ_ENC "'\"" }, + { "\"''\"", "\"'\\0001\\0'\"", 11, "\"'simple'\"", + "\"' \\t\\n\\'\"\\033?""?/\\\\'\"", "\"'a:b'\"", "\"'a\\\\b'\"", + "\"'" LQ_ENC RQ_ENC "'\"" }, + { "\"''\"", "\"'\\0001\\0'\"", 11, "\"'simple'\"", + "\"' \\t\\n\\'\"\\033?""?/\\\\'\"", "\"'a\\:b'\"", "\"'a\\\\b'\"", + "\"'" LQ_ENC RQ_ENC "'\"" } } +}; + static void compare (char const *a, size_t la, char const *b, size_t lb) { @@ -324,6 +413,15 @@ main (int argc, char *argv[]) ASSERT (set_quoting_flags (NULL, 0) == QA_SPLIT_TRIGRAPHS); + for (i = 0; i < sizeof custom_quotes / sizeof *custom_quotes; ++i) + { + set_custom_quoting (NULL, + custom_quotes[i][0], custom_quotes[i][1]); + compare_strings (use_quotearg_buffer, &custom_results[i].group1); + compare_strings (use_quotearg, &custom_results[i].group2); + compare_strings (use_quotearg_colon, &custom_results[i].group3); + } + #if ENABLE_NLS /* Clean up environment. */ unsetenv ("LANGUAGE"); -- 2.11.0