quotearg: implement custom_quoting_style
authorJoel E. Denny <jdenny@clemson.edu>
Sun, 23 Aug 2009 22:24:53 +0000 (18:24 -0400)
committerJoel E. Denny <jdenny@clemson.edu>
Sun, 23 Aug 2009 22:30:34 +0000 (18:30 -0400)
* lib/quotearg.c: (struct quoting_options): Add left_quote and
right_quote fields.
(set_custom_quoting): New public function.
(quotearg_buffer_restyled): Add left_quote and right_quote
arguments, handle them very much like locale quoting, and update
all uses.
(quotearg_n_custom): New public function.
(quotearg_n_custom_mem): New public function.
(quotearg_custom): New public function.
(quotearg_custom_mem): New public function.
* lib/quotearg.h: Prototype and document new public functions.
(enum quoting_style): For escape_quoting_style and
clocale_quoting_style, comment that QA_SPLIT_TRIGRAPHS is
ignored even though they're otherwise like c_quoting_style.
Add custom_quoting_style member and document with comparison to
clocale_quoting_style.
* tests/test-quotearg.c (custom_quotes): New array.
(custom_results): New array.
(main): Extend to test custom quoting.

ChangeLog
lib/quotearg.c
lib/quotearg.h
tests/test-quotearg.c

index 57f5f32..c71b417 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2009-08-23  Joel E. Denny  <jdenny@clemson.edu>
+
+       quotearg: implement custom_quoting_style
+       * lib/quotearg.c: (struct quoting_options): Add left_quote and
+       right_quote fields.
+       (set_custom_quoting): New public function.
+       (quotearg_buffer_restyled): Add left_quote and right_quote
+       arguments, handle them very much like locale quoting, and update
+       all uses.
+       (quotearg_n_custom): New public function.
+       (quotearg_n_custom_mem): New public function.
+       (quotearg_custom): New public function.
+       (quotearg_custom_mem): New public function.
+       * lib/quotearg.h: Prototype and document new public functions.
+       (enum quoting_style): For escape_quoting_style and
+       clocale_quoting_style, comment that QA_SPLIT_TRIGRAPHS is
+       ignored even though they're otherwise like c_quoting_style.
+       Add custom_quoting_style member and document with comparison to
+       clocale_quoting_style.
+       * tests/test-quotearg.c (custom_quotes): New array.
+       (custom_results): New array.
+       (main): Extend to test custom quoting.
+
 2009-08-22  Joel E. Denny  <jdenny@clemson.edu>
 
        quotearg: fix right quote escaping when it's in quote_these_too
index 3f9e628..314c627 100644 (file)
@@ -54,6 +54,12 @@ struct quoting_options
   /* Quote the characters indicated by this bit vector even if the
      quoting style would not normally require them to be quoted.  */
   unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
+
+  /* The left quote for custom_quoting_style.  */
+  char const *left_quote;
+
+  /* The right quote for custom_quoting_style.  */
+  char const *right_quote;
 };
 
 /* Names of quoting styles.  */
@@ -146,6 +152,19 @@ set_quoting_flags (struct quoting_options *o, int i)
   return r;
 }
 
+void
+set_custom_quoting (struct quoting_options *o,
+                    char const *left_quote, char const *right_quote)
+{
+  if (!o)
+    o = &default_quoting_options;
+  o->style = custom_quoting_style;
+  if (!left_quote || !right_quote)
+    abort ();
+  o->left_quote = left_quote;
+  o->right_quote = right_quote;
+}
+
 /* Return quoting options for STYLE, with no extra quoting.  */
 static struct quoting_options
 quoting_options_from_style (enum quoting_style style)
@@ -185,7 +204,9 @@ static size_t
 quotearg_buffer_restyled (char *buffer, size_t buffersize,
                          char const *arg, size_t argsize,
                          enum quoting_style quoting_style, int flags,
-                         unsigned int const *quote_these_too)
+                         unsigned int const *quote_these_too,
+                         char const *left_quote,
+                         char const *right_quote)
 {
   size_t i;
   size_t len = 0;
@@ -225,34 +246,37 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
 
     case locale_quoting_style:
     case clocale_quoting_style:
+    case custom_quoting_style:
       {
-       /* TRANSLATORS:
-          Get translations for open and closing quotation marks.
-
-          The message catalog should translate "`" to a left
-          quotation mark suitable for the locale, and similarly for
-          "'".  If the catalog has no translation,
-          locale_quoting_style quotes `like this', and
-          clocale_quoting_style quotes "like this".
-
-          For example, an American English Unicode locale should
-          translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
-          should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
-          MARK).  A British English Unicode locale should instead
-          translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
-          U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
-
-          If you don't know what to put here, please see
-          <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
-          and use glyphs suitable for your language.  */
-
-       char const *left = gettext_quote (N_("`"), quoting_style);
-       char const *right = gettext_quote (N_("'"), quoting_style);
+       if (quoting_style != custom_quoting_style)
+         {
+           /* TRANSLATORS:
+              Get translations for open and closing quotation marks.
+
+              The message catalog should translate "`" to a left
+              quotation mark suitable for the locale, and similarly for
+              "'".  If the catalog has no translation,
+              locale_quoting_style quotes `like this', and
+              clocale_quoting_style quotes "like this".
+
+              For example, an American English Unicode locale should
+              translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
+              should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
+              MARK).  A British English Unicode locale should instead
+              translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
+              and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
+
+              If you don't know what to put here, please see
+              <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
+              and use glyphs suitable for your language.  */
+           left_quote = gettext_quote (N_("`"), quoting_style);
+           right_quote = gettext_quote (N_("'"), quoting_style);
+         }
        if (!elide_outer_quotes)
-         for (quote_string = left; *quote_string; quote_string++)
+         for (quote_string = left_quote; *quote_string; quote_string++)
            STORE (*quote_string);
        backslash_escapes = true;
-       quote_string = right;
+       quote_string = right_quote;
        quote_string_len = strlen (quote_string);
       }
       break;
@@ -301,6 +325,11 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
              if (elide_outer_quotes)
                goto force_outer_quoting_style;
              STORE ('\\');
+             /* If quote_string were to begin with digits, we'd need to
+                test for the end of the arg as well.  However, it's
+                hard to imagine any locale that would use digits in
+                quotes, and set_custom_quoting is documented not to
+                accept them.  */
              if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
                {
                  STORE ('0');
@@ -426,7 +455,13 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
        case 'o': case 'p': case 'q': case 'r': case 's': case 't':
        case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
          /* These characters don't cause problems, no matter what the
-            quoting style is.  They cannot start multibyte sequences.  */
+            quoting style is.  They cannot start multibyte sequences.
+            A digit or a special letter would cause trouble if it
+            appeared at the beginning of quote_string because we'd then
+            escape by prepending a backslash.  However, it's hard to
+            imagine any locale that would use digits or letters as
+            quotes, and set_custom_quoting is documented not to accept
+            them.  */
          break;
 
        default:
@@ -570,7 +605,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
      sufficiently quotes the specified characters.  */
   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
                                   quoting_style,
-                                  flags & ~QA_ELIDE_OUTER_QUOTES, NULL);
+                                  flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
+                                  left_quote, right_quote);
 }
 
 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
@@ -590,7 +626,8 @@ quotearg_buffer (char *buffer, size_t buffersize,
   struct quoting_options const *p = o ? o : &default_quoting_options;
   int e = errno;
   size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
-                                      p->style, p->flags, p->quote_these_too);
+                                      p->style, p->flags, p->quote_these_too,
+                                      p->left_quote, p->right_quote);
   errno = e;
   return r;
 }
@@ -618,10 +655,13 @@ quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
   /* Elide embedded null bytes if we can't return a size.  */
   int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
   size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
-                                            flags, p->quote_these_too) + 1;
+                                            flags, p->quote_these_too,
+                                            p->left_quote,
+                                            p->right_quote) + 1;
   char *buf = xcharalloc (bufsize);
   quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
-                           p->quote_these_too);
+                           p->quote_these_too,
+                           p->left_quote, p->right_quote);
   errno = e;
   if (size)
     *size = bufsize - 1;
@@ -710,7 +750,9 @@ quotearg_n_options (int n, char const *arg, size_t argsize,
     int flags = options->flags | QA_ELIDE_NULL_BYTES;
     size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
                                             options->style, flags,
-                                            options->quote_these_too);
+                                            options->quote_these_too,
+                                            options->left_quote,
+                                            options->right_quote);
 
     if (size <= qsize)
       {
@@ -719,7 +761,9 @@ quotearg_n_options (int n, char const *arg, size_t argsize,
          free (val);
        sv[n].val = val = xcharalloc (size);
        quotearg_buffer_restyled (val, size, arg, argsize, options->style,
-                                 flags, options->quote_these_too);
+                                 flags, options->quote_these_too,
+                                 options->left_quote,
+                                 options->right_quote);
       }
 
     errno = e;
@@ -804,3 +848,36 @@ quotearg_colon_mem (char const *arg, size_t argsize)
 {
   return quotearg_char_mem (arg, argsize, ':');
 }
+
+char *
+quotearg_n_custom (int n, char const *left_quote,
+                  char const *right_quote, char const *arg)
+{
+  return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
+                               SIZE_MAX);
+}
+
+char *
+quotearg_n_custom_mem (int n, char const *left_quote,
+                      char const *right_quote,
+                      char const *arg, size_t argsize)
+{
+  struct quoting_options o = default_quoting_options;
+  set_custom_quoting (&o, left_quote, right_quote);
+  return quotearg_n_options (n, arg, argsize, &o);
+}
+
+char *
+quotearg_custom (char const *left_quote, char const *right_quote,
+                char const *arg)
+{
+  return quotearg_n_custom (0, left_quote, right_quote, arg);
+}
+
+char *
+quotearg_custom_mem (char const *left_quote, char const *right_quote,
+                    char const *arg, size_t argsize)
+{
+  return quotearg_n_custom_mem (0, left_quote, right_quote, arg,
+                               argsize);
+}
index 7700107..e9f6f56 100644 (file)
@@ -100,7 +100,8 @@ enum quoting_style
     c_maybe_quoting_style,
 
     /* Like c_quoting_style except always omit the surrounding
-       double-quote characters (ls --quoting-style=escape).
+       double-quote characters and ignore QA_SPLIT_TRIGRAPHS
+       (ls --quoting-style=escape).
 
        quotearg_buffer:
        "simple", "\\0 \\t\\n'\"\\033??/\\\\", "a:b"
@@ -136,7 +137,8 @@ enum quoting_style
     locale_quoting_style,
 
     /* Like c_quoting_style except use quotation marks appropriate for
-       the locale (ls --quoting-style=clocale).
+       the locale and ignore QA_SPLIT_TRIGRAPHS
+       (ls --quoting-style=clocale).
 
        LC_MESSAGES=C
        quotearg_buffer:
@@ -157,7 +159,50 @@ enum quoting_style
        "\302\253simple\302\273",
        "\302\253\\0 \\t\\n'\"\\033??/\\\\\302\253", "\302\253a\\:b\302\273"
     */
-    clocale_quoting_style
+    clocale_quoting_style,
+
+    /* Like clocale_quoting_style except use the custom quotation marks
+       set by set_custom_quoting.  If custom quotation marks are not
+       set, the behavior is undefined.
+
+       left_quote = right_quote = "'"
+       quotearg_buffer:
+       "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a:b'"
+       quotearg:
+       "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a:b'"
+       quotearg_colon:
+       "'simple'", "'\\0 \\t\\n\\'\"\\033??/\\\\'", "'a\\:b'"
+
+       left_quote = "(" and right_quote = ")"
+       quotearg_buffer:
+       "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a:b)"
+       quotearg:
+       "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a:b)"
+       quotearg_colon:
+       "(simple)", "(\\0 \\t\\n'\"\\033??/\\\\)", "(a\\:b)"
+
+       left_quote = ":" and right_quote = " "
+       quotearg_buffer:
+       ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a:b "
+       quotearg:
+       ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a:b "
+       quotearg_colon:
+       ":simple ", ":\\0\\ \\t\\n'\"\\033??/\\\\ ", ":a\\:b "
+
+       left_quote = "\"'" and right_quote = "'\""
+       Notice that this is treated as a single level of quotes or two
+       levels where the outer quote need not be escaped within the inner
+       quotes.  For two levels where the outer quote must be escaped
+       within the inner quotes, you must use separate quotearg
+       invocations.
+       quotearg_buffer:
+       "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a:b'\""
+       quotearg:
+       "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a:b'\""
+       quotearg_colon:
+       "\"'simple'\"", "\"'\\0 \\t\\n\\'\"\\033??/\\\\'\"", "\"'a\\:b'\""
+    */
+    custom_quoting_style
   };
 
 /* Flags for use in set_quoting_flags.  */
@@ -219,6 +264,19 @@ int set_char_quoting (struct quoting_options *o, char c, int i);
    behavior.  Return the old value.  */
 int set_quoting_flags (struct quoting_options *o, int i);
 
+/* In O (or in the default if O is null),
+   set the value of the quoting style to custom_quoting_style,
+   set the left quote to LEFT_QUOTE, and set the right quote to
+   RIGHT_QUOTE.  Each of LEFT_QUOTE and RIGHT_QUOTE must be
+   null-terminated and can be the empty string.  Because backslashes are
+   used for escaping, it does not make sense for RIGHT_QUOTE to contain
+   a backslash.  RIGHT_QUOTE must not begin with a digit or a letter
+   that has special meaning after a backslash (for example, "\t" for
+   tab).  */
+void set_custom_quoting (struct quoting_options *o,
+                        char const *left_quote,
+                        char const *right_quote);
+
 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
    argument ARG (of size ARGSIZE), using O to control quoting.
    If O is null, use the default.
@@ -299,6 +357,29 @@ char *quotearg_colon (char const *arg);
 /* Like quotearg_colon (ARG), except it can quote null bytes.  */
 char *quotearg_colon_mem (char const *arg, size_t argsize);
 
+/* Like quotearg_n_style (N, S, ARG) but with S as custom_quoting_style
+   with left quote as LEFT_QUOTE and right quote as RIGHT_QUOTE.  See
+   set_custom_quoting for a description of acceptable LEFT_QUOTE and
+   RIGHT_QUOTE values.  */
+char *quotearg_n_custom (int n, char const *left_quote,
+                        char const *right_quote, char const *arg);
+
+/* Like quotearg_n_custom (N, LEFT_QUOTE, RIGHT_QUOTE, ARG) except it
+   can quote null bytes.  */
+char *quotearg_n_custom_mem (int n, char const *left_quote,
+                            char const *right_quote,
+                            char const *arg, size_t argsize);
+
+/* Equivalent to quotearg_n_custom (0, LEFT_QUOTE, RIGHT_QUOTE, ARG).  */
+char *quotearg_custom (char const *left_quote, char const *right_quote,
+                      char const *arg);
+
+/* Equivalent to quotearg_n_custom_mem (0, LEFT_QUOTE, RIGHT_QUOTE, ARG,
+                                       ARGSIZE).  */
+char *quotearg_custom_mem (char const *left_quote,
+                          char const *right_quote,
+                          char const *arg, size_t argsize);
+
 /* Free any dynamically allocated memory.  */
 void quotearg_free (void);
 
index 2e2c56b..65779ad 100644 (file)
@@ -199,6 +199,95 @@ static struct result_groups locale_results[] = {
 
 #endif /* ENABLE_NLS */
 
+static char const *custom_quotes[][2] = {
+  { "", ""  },
+  { "'", "'"  },
+  { "(", ")"  },
+  { ":", " "  },
+  { " ", ":"  },
+  { "# ", "\n" },
+  { "\"'", "'\"" }
+};
+
+static struct result_groups custom_results[] = {
+  /* left_quote = right_quote = "" */
+  { { "", "\\0001\\0", 7, "simple",
+      " \\t\\n'\"\\033?""?/\\\\", "a:b", "a\\\\b",
+      LQ_ENC RQ_ENC },
+    { "", "\\0001\\0", 7, "simple",
+      " \\t\\n'\"\\033?""?/\\\\", "a:b", "a\\\\b",
+      LQ_ENC RQ_ENC },
+    { "", "\\0001\\0", 7, "simple",
+      " \\t\\n'\"\\033?""?/\\\\", "a\\:b", "a\\\\b",
+      LQ_ENC RQ_ENC } },
+
+  /* left_quote = right_quote = "'" */
+  { { "''", "'\\0001\\0'", 9, "'simple'",
+      "' \\t\\n\\'\"\\033?""?/\\\\'", "'a:b'", "'a\\\\b'",
+      "'" LQ_ENC RQ_ENC "'" },
+    { "''", "'\\0001\\0'", 9, "'simple'",
+      "' \\t\\n\\'\"\\033?""?/\\\\'", "'a:b'", "'a\\\\b'",
+      "'" LQ_ENC RQ_ENC "'" },
+    { "''", "'\\0001\\0'", 9, "'simple'",
+      "' \\t\\n\\'\"\\033?""?/\\\\'", "'a\\:b'", "'a\\\\b'",
+      "'" LQ_ENC RQ_ENC "'" } },
+
+  /* left_quote = "(" and right_quote = ")" */
+  { { "()", "(\\0001\\0)", 9, "(simple)",
+      "( \\t\\n'\"\\033?""?/\\\\)", "(a:b)", "(a\\\\b)",
+      "(" LQ_ENC RQ_ENC ")" },
+    { "()", "(\\0001\\0)", 9, "(simple)",
+      "( \\t\\n'\"\\033?""?/\\\\)", "(a:b)", "(a\\\\b)",
+      "(" LQ_ENC RQ_ENC ")" },
+    { "()", "(\\0001\\0)", 9, "(simple)",
+      "( \\t\\n'\"\\033?""?/\\\\)", "(a\\:b)", "(a\\\\b)",
+      "(" LQ_ENC RQ_ENC ")" } },
+
+  /* left_quote = ":" and right_quote = " " */
+  { { ": ", ":\\0001\\0 ", 9, ":simple ",
+      ":\\ \\t\\n'\"\\033?""?/\\\\ ", ":a:b ", ":a\\\\b ",
+      ":" LQ_ENC RQ_ENC " " },
+    { ": ", ":\\0001\\0 ", 9, ":simple ",
+      ":\\ \\t\\n'\"\\033?""?/\\\\ ", ":a:b ", ":a\\\\b ",
+      ":" LQ_ENC RQ_ENC " " },
+    { ": ", ":\\0001\\0 ", 9, ":simple ",
+      ":\\ \\t\\n'\"\\033?""?/\\\\ ", ":a\\:b ", ":a\\\\b ",
+      ":" LQ_ENC RQ_ENC " " } },
+
+  /* left_quote = " " and right_quote = ":" */
+  { { " :", " \\0001\\0:", 9, " simple:",
+      "  \\t\\n'\"\\033?""?/\\\\:", " a\\:b:", " a\\\\b:",
+      " " LQ_ENC RQ_ENC ":" },
+    { " :", " \\0001\\0:", 9, " simple:",
+      "  \\t\\n'\"\\033?""?/\\\\:", " a\\:b:", " a\\\\b:",
+      " " LQ_ENC RQ_ENC ":" },
+    { " :", " \\0001\\0:", 9, " simple:",
+      "  \\t\\n'\"\\033?""?/\\\\:", " a\\:b:", " a\\\\b:",
+      " " LQ_ENC RQ_ENC ":" } },
+
+  /* left_quote = "# " and right_quote = "\n" */
+  { { "# \n", "# \\0001\\0\n", 10, "# simple\n",
+      "#  \\t\\n'\"\\033?""?/\\\\\n", "# a:b\n", "# a\\\\b\n",
+      "# " LQ_ENC RQ_ENC "\n" },
+    { "# \n", "# \\0001\\0\n", 10, "# simple\n",
+      "#  \\t\\n'\"\\033?""?/\\\\\n", "# a:b\n", "# a\\\\b\n",
+      "# " LQ_ENC RQ_ENC "\n" },
+    { "# \n", "# \\0001\\0\n", 10, "# simple\n",
+      "#  \\t\\n'\"\\033?""?/\\\\\n", "# a\\:b\n", "# a\\\\b\n",
+      "# " LQ_ENC RQ_ENC "\n" } },
+
+  /* left_quote = "\"'" and right_quote = "'\"" */
+  { { "\"''\"", "\"'\\0001\\0'\"", 11, "\"'simple'\"",
+      "\"' \\t\\n\\'\"\\033?""?/\\\\'\"", "\"'a:b'\"", "\"'a\\\\b'\"",
+      "\"'" LQ_ENC RQ_ENC "'\"" },
+    { "\"''\"", "\"'\\0001\\0'\"", 11, "\"'simple'\"",
+      "\"' \\t\\n\\'\"\\033?""?/\\\\'\"", "\"'a:b'\"", "\"'a\\\\b'\"",
+      "\"'" LQ_ENC RQ_ENC "'\"" },
+    { "\"''\"", "\"'\\0001\\0'\"", 11, "\"'simple'\"",
+      "\"' \\t\\n\\'\"\\033?""?/\\\\'\"", "\"'a\\:b'\"", "\"'a\\\\b'\"",
+      "\"'" LQ_ENC RQ_ENC "'\"" } }
+};
+
 static void
 compare (char const *a, size_t la, char const *b, size_t lb)
 {
@@ -324,6 +413,15 @@ main (int argc, char *argv[])
 
   ASSERT (set_quoting_flags (NULL, 0) == QA_SPLIT_TRIGRAPHS);
 
+  for (i = 0; i < sizeof custom_quotes / sizeof *custom_quotes; ++i)
+    {
+      set_custom_quoting (NULL,
+                          custom_quotes[i][0], custom_quotes[i][1]);
+      compare_strings (use_quotearg_buffer, &custom_results[i].group1);
+      compare_strings (use_quotearg, &custom_results[i].group2);
+      compare_strings (use_quotearg_colon, &custom_results[i].group3);
+    }
+
 #if ENABLE_NLS
   /* Clean up environment.  */
   unsetenv ("LANGUAGE");