+/* The Java Language Specification, 3rd edition, §3.6.
+ http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#95710 */
+static bool
+is_java_whitespace (unsigned int ch)
+{
+ return (ch == ' ' || ch == '\t' || ch == '\f'
+ || ch == '\n' || ch == '\r');
+}
+
+/* The Java Language Specification, 3rd edition, §3.8.
+ http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#40625
+ and Character.isJavaIdentifierStart and Character.isJavaIdentifierPart */
+static int
+java_ident_category (unsigned int ch)
+{
+ /* FIXME: Check this against Sun's JDK implementation. */
+ if (is_category_L (ch) /* = Character.isLetter(ch) */
+ || is_category_Nl (ch) /* = Character.getType(ch)==LETTER_NUMBER */
+ || is_category_Sc (ch) /* currency symbol */
+ || is_category_Pc (ch) /* connector punctuation */
+ )
+ return UC_IDENTIFIER_START;
+ if (is_category_Nd (ch) /* digit */
+ || is_category_Mc (ch) /* combining mark */
+ || is_category_Mn (ch) /* non-spacing mark */
+ )
+ return UC_IDENTIFIER_VALID;
+ if ((ch >= 0x0000 && ch <= 0x0008)
+ || (ch >= 0x000E && ch <= 0x001B)
+ || (ch >= 0x007F && ch <= 0x009F)
+ || is_category_Cf (ch) /* = Character.getType(ch)==FORMAT */
+ )
+ return UC_IDENTIFIER_IGNORABLE;
+ return UC_IDENTIFIER_INVALID;
+}
+
+/* Construction of sparse 3-level tables. */
+#define TABLE identsyntax_table
+#define ELEMENT uint8_t
+#define DEFAULT UC_IDENTIFIER_INVALID
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output an identifier syntax categorization in a three-level bitmap. */
+static void
+output_ident_category (const char *filename, int (*predicate) (unsigned int), const char *name, const char *version)
+{
+ FILE *stream;
+ unsigned int ch, i;
+ struct identsyntax_table t;
+ unsigned int level1_offset, level2_offset, level3_offset;
+
+ stream = fopen (filename, "w");
+ if (stream == NULL)
+ {
+ fprintf (stderr, "cannot open '%s' for writing\n", filename);
+ exit (1);
+ }
+
+ fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+ fprintf (stream, "/* Language syntax properties of Unicode characters. */\n");
+ fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode %s. */\n",
+ version);
+
+ t.p = 7; /* or 8 */
+ t.q = 5; /* or 4 */
+ identsyntax_table_init (&t);
+
+ for (ch = 0; ch < 0x110000; ch++)
+ {
+ int syntaxcode = predicate (ch);
+ if (syntaxcode != UC_IDENTIFIER_INVALID)
+ identsyntax_table_add (&t, ch, syntaxcode);
+ }
+
+ identsyntax_table_finalize (&t);
+
+ /* Offsets in t.result, in memory of this process. */
+ level1_offset =
+ 5 * sizeof (uint32_t);
+ level2_offset =
+ 5 * sizeof (uint32_t)
+ + t.level1_size * sizeof (uint32_t);
+ level3_offset =
+ 5 * sizeof (uint32_t)
+ + t.level1_size * sizeof (uint32_t)
+ + (t.level2_size << t.q) * sizeof (uint32_t);
+
+ for (i = 0; i < 5; i++)
+ fprintf (stream, "#define identsyntax_header_%d %d\n", i,
+ ((uint32_t *) t.result)[i]);
+ fprintf (stream, "static const\n");
+ fprintf (stream, "struct\n");
+ fprintf (stream, " {\n");
+ fprintf (stream, " int level1[%zu];\n", t.level1_size);
+ fprintf (stream, " short level2[%zu << %d];\n", t.level2_size, t.q);
+ fprintf (stream, " unsigned short level3[%zu * %d];\n", t.level3_size,
+ (1 << t.p) * 2 / 16);
+ fprintf (stream, " }\n");
+ fprintf (stream, "%s =\n", name);
+ fprintf (stream, "{\n");
+ fprintf (stream, " {");
+ if (t.level1_size > 8)
+ fprintf (stream, "\n ");
+ for (i = 0; i < t.level1_size; i++)
+ {
+ uint32_t offset;
+ if (i > 0 && (i % 8) == 0)
+ fprintf (stream, "\n ");
+ offset = ((uint32_t *) (t.result + level1_offset))[i];
+ if (offset == 0)
+ fprintf (stream, " %5d", -1);
+ else
+ fprintf (stream, " %5zu",
+ (offset - level2_offset) / sizeof (uint32_t));
+ if (i+1 < t.level1_size)
+ fprintf (stream, ",");
+ }
+ if (t.level1_size > 8)
+ fprintf (stream, "\n ");
+ fprintf (stream, " },\n");
+ fprintf (stream, " {");
+ if (t.level2_size << t.q > 8)
+ fprintf (stream, "\n ");
+ for (i = 0; i < t.level2_size << t.q; i++)
+ {
+ uint32_t offset;
+ if (i > 0 && (i % 8) == 0)
+ fprintf (stream, "\n ");
+ offset = ((uint32_t *) (t.result + level2_offset))[i];
+ if (offset == 0)
+ fprintf (stream, " %5d", -1);
+ else
+ fprintf (stream, " %5zu",
+ (offset - level3_offset) / sizeof (uint8_t));
+ if (i+1 < t.level2_size << t.q)
+ fprintf (stream, ",");
+ }
+ if (t.level2_size << t.q > 8)
+ fprintf (stream, "\n ");
+ fprintf (stream, " },\n");
+ /* Pack the level3 array. Each entry needs 2 bits only. */
+ fprintf (stream, " {");
+ if ((t.level3_size << t.p) * 2 / 16 > 8)
+ fprintf (stream, "\n ");
+ for (i = 0; i < (t.level3_size << t.p) * 2 / 16; i++)
+ {
+ if (i > 0 && (i % 8) == 0)
+ fprintf (stream, "\n ");
+ fprintf (stream, " 0x%04x",
+ (((uint8_t *) (t.result + level3_offset))[8 * i] << 0)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 1] << 2)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 2] << 4)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 3] << 6)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 4] << 8)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 5] << 10)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 6] << 12)
+ | (((uint8_t *) (t.result + level3_offset))[8 * i + 7] << 14));
+ if (i+1 < (t.level3_size << t.p) * 2 / 16)
+ fprintf (stream, ",");
+ }
+ if ((t.level3_size << t.p) * 2 / 16 > 8)
+ fprintf (stream, "\n ");
+ fprintf (stream, " }\n");
+ fprintf (stream, "};\n");
+
+ if (ferror (stream) || fclose (stream))
+ {
+ fprintf (stderr, "error writing to '%s'\n", filename);
+ exit (1);
+ }
+}
+
+static void
+output_ident_properties (const char *version)
+{
+#define PROPERTY(P) \
+ debug_output_predicate ("unictype/sy_" #P ".txt", is_ ## P); \
+ output_predicate_test ("../tests/unictype/test-sy_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
+ output_predicate ("unictype/sy_" #P ".h", is_ ## P, "u_" #P, "Language syntax properties", version);
+ PROPERTY(c_whitespace)
+ PROPERTY(java_whitespace)
+#undef PROPERTY
+
+ output_ident_category ("unictype/sy_c_ident.h", c_ident_category, "u_c_ident", version);
+ output_ident_category ("unictype/sy_java_ident.h", java_ident_category, "u_java_ident", version);
+}
+
+/* ========================================================================= */
+
+/* Like ISO C <ctype.h> and <wctype.h>. Compatible to glibc's
+ glibc/localedata/locales/i18n file, generated by
+ glibc/localedata/gen-unicode-ctype.c. */
+
+/* Character mappings. */
+
+static unsigned int
+to_upper (unsigned int ch)
+{
+ if (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].upper != NONE)
+ return unicode_attributes[ch].upper;
+ else
+ return ch;
+}
+
+static unsigned int
+to_lower (unsigned int ch)
+{
+ if (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].lower != NONE)
+ return unicode_attributes[ch].lower;
+ else
+ return ch;
+}
+
+static unsigned int
+to_title (unsigned int ch)
+{
+ if (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].title != NONE)
+ return unicode_attributes[ch].title;
+ else
+ return ch;
+}
+
+/* Character class properties. */
+
+static bool
+is_upper (unsigned int ch)
+{
+ return (to_lower (ch) != ch);
+}
+
+static bool
+is_lower (unsigned int ch)
+{
+ return (to_upper (ch) != ch)
+ /* <U00DF> is lowercase, but without simple to_upper mapping. */
+ || (ch == 0x00DF);
+}
+
+static bool
+is_alpha (unsigned int ch)
+{
+ return (unicode_attributes[ch].name != NULL
+ && ((unicode_attributes[ch].category[0] == 'L'
+ /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+ <U0E2F>, <U0E46> should belong to is_punct. */
+ && (ch != 0x0E2F) && (ch != 0x0E46))
+ /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+ <U0E31>, <U0E34>..<U0E3A>, <U0E47>..<U0E4E> are is_alpha. */
+ || (ch == 0x0E31)
+ || (ch >= 0x0E34 && ch <= 0x0E3A)
+ || (ch >= 0x0E47 && ch <= 0x0E4E)
+ /* Avoid warning for <U0345>. */
+ || (ch == 0x0345)
+ /* Avoid warnings for <U2160>..<U217F>. */
+ || (unicode_attributes[ch].category[0] == 'N'
+ && unicode_attributes[ch].category[1] == 'l')
+ /* Avoid warnings for <U24B6>..<U24E9>. */
+ || (unicode_attributes[ch].category[0] == 'S'
+ && unicode_attributes[ch].category[1] == 'o'
+ && strstr (unicode_attributes[ch].name, " LETTER ")
+ != NULL)
+ /* Consider all the non-ASCII digits as alphabetic.
+ ISO C 99 forbids us to have them in category "digit",
+ but we want iswalnum to return true on them. */
+ || (unicode_attributes[ch].category[0] == 'N'
+ && unicode_attributes[ch].category[1] == 'd'
+ && !(ch >= 0x0030 && ch <= 0x0039))));
+}
+
+static bool
+is_digit (unsigned int ch)
+{
+#if 0
+ return (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'N'
+ && unicode_attributes[ch].category[1] == 'd');
+ /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
+ a zero. Must add <0> in front of them by hand. */
+#else
+ /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
+ takes it away:
+ 7.25.2.1.5:
+ The iswdigit function tests for any wide character that corresponds
+ to a decimal-digit character (as defined in 5.2.1).
+ 5.2.1:
+ the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
+ */
+ return (ch >= 0x0030 && ch <= 0x0039);
+#endif
+}
+
+static bool
+is_outdigit (unsigned int ch)
+{
+ return (ch >= 0x0030 && ch <= 0x0039);
+}
+
+static bool
+is_alnum (unsigned int ch)
+{
+ return is_alpha (ch) || is_digit (ch);
+}
+
+static bool
+is_blank (unsigned int ch)
+{
+ return (ch == 0x0009 /* '\t' */
+ /* Category Zs without mention of "<noBreak>" */
+ || (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'Z'
+ && unicode_attributes[ch].category[1] == 's'
+ && !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
+}
+
+static bool
+is_space (unsigned int ch)
+{
+ /* Don't make U+00A0 a space. Non-breaking space means that all programs
+ should treat it like a punctuation character, not like a space. */
+ return (ch == 0x0020 /* ' ' */
+ || ch == 0x000C /* '\f' */
+ || ch == 0x000A /* '\n' */
+ || ch == 0x000D /* '\r' */
+ || ch == 0x0009 /* '\t' */
+ || ch == 0x000B /* '\v' */
+ /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
+ || (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'Z'
+ && (unicode_attributes[ch].category[1] == 'l'
+ || unicode_attributes[ch].category[1] == 'p'
+ || (unicode_attributes[ch].category[1] == 's'
+ && !strstr (unicode_attributes[ch].decomposition,
+ "<noBreak>")))));
+}
+
+static bool
+is_cntrl (unsigned int ch)
+{
+ return (unicode_attributes[ch].name != NULL
+ && (strcmp (unicode_attributes[ch].name, "<control>") == 0
+ /* Categories Zl and Zp */
+ || (unicode_attributes[ch].category[0] == 'Z'
+ && (unicode_attributes[ch].category[1] == 'l'
+ || unicode_attributes[ch].category[1] == 'p'))));
+}
+
+static bool
+is_xdigit (unsigned int ch)
+{
+#if 0
+ return is_digit (ch)
+ || (ch >= 0x0041 && ch <= 0x0046)
+ || (ch >= 0x0061 && ch <= 0x0066);
+#else
+ /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
+ takes it away:
+ 7.25.2.1.12:
+ The iswxdigit function tests for any wide character that corresponds
+ to a hexadecimal-digit character (as defined in 6.4.4.1).
+ 6.4.4.1:
+ hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
+ */
+ return (ch >= 0x0030 && ch <= 0x0039)
+ || (ch >= 0x0041 && ch <= 0x0046)
+ || (ch >= 0x0061 && ch <= 0x0066);
+#endif
+}
+
+static bool
+is_graph (unsigned int ch)
+{
+ return (unicode_attributes[ch].name != NULL
+ && strcmp (unicode_attributes[ch].name, "<control>")
+ && !is_space (ch));
+}
+
+static bool
+is_print (unsigned int ch)
+{
+ return (unicode_attributes[ch].name != NULL
+ && strcmp (unicode_attributes[ch].name, "<control>")
+ /* Categories Zl and Zp */
+ && !(unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'Z'
+ && (unicode_attributes[ch].category[1] == 'l'
+ || unicode_attributes[ch].category[1] == 'p')));
+}
+
+static bool
+is_punct (unsigned int ch)
+{
+#if 0
+ return (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'P');
+#else
+ /* The traditional POSIX definition of punctuation is every graphic,
+ non-alphanumeric character. */
+ return (is_graph (ch) && !is_alpha (ch) && !is_digit (ch));
+#endif
+}
+
+/* Output all properties. */
+static void
+output_old_ctype (const char *version)
+{
+#define PROPERTY(P) \
+ debug_output_predicate ("unictype/ctype_" #P ".txt", is_ ## P); \
+ output_predicate_test ("../tests/unictype/test-ctype_" #P ".c", is_ ## P, "uc_is_" #P " (c)"); \
+ output_predicate ("unictype/ctype_" #P ".h", is_ ## P, "u_is_" #P, "ISO C <ctype.h> like properties", version);
+ PROPERTY(alnum)
+ PROPERTY(alpha)
+ PROPERTY(cntrl)
+ PROPERTY(digit)
+ PROPERTY(graph)
+ PROPERTY(lower)
+ PROPERTY(print)
+ PROPERTY(punct)
+ PROPERTY(space)
+ PROPERTY(upper)
+ PROPERTY(xdigit)
+ PROPERTY(blank)
+#undef PROPERTY
+}
+
+#if 0
+
+static bool
+is_combining (unsigned int ch)
+{
+ /* Up to Unicode 3.0.1 we took the Combining property from the PropList.txt
+ file. In 3.0.1 it was identical to the union of the general categories
+ "Mn", "Mc", "Me". In Unicode 3.1 this property has been dropped from the
+ PropList.txt file, so we take the latter definition. */
+ return (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'M'
+ && (unicode_attributes[ch].category[1] == 'n'
+ || unicode_attributes[ch].category[1] == 'c'
+ || unicode_attributes[ch].category[1] == 'e'));
+}
+
+static bool
+is_combining_level3 (unsigned int ch)
+{
+ return is_combining (ch)
+ && !(unicode_attributes[ch].combining[0] != '\0'
+ && unicode_attributes[ch].combining[0] != '0'
+ && strtoul (unicode_attributes[ch].combining, NULL, 10) >= 200);
+}
+
+/* Return the UCS symbol string for a Unicode character. */
+static const char *
+ucs_symbol (unsigned int i)
+{
+ static char buf[11+1];
+
+ sprintf (buf, (i < 0x10000 ? "<U%04X>" : "<U%08X>"), i);
+ return buf;
+}
+
+/* Return the UCS symbol range string for a Unicode characters interval. */
+static const char *
+ucs_symbol_range (unsigned int low, unsigned int high)
+{
+ static char buf[24+1];
+
+ strcpy (buf, ucs_symbol (low));
+ strcat (buf, "..");
+ strcat (buf, ucs_symbol (high));
+ return buf;
+}
+
+/* Output a character class (= property) table. */
+
+static void
+output_charclass (FILE *stream, const char *classname,
+ bool (*func) (unsigned int))
+{
+ char table[0x110000];
+ unsigned int i;
+ bool need_semicolon;
+ const int max_column = 75;
+ int column;
+
+ for (i = 0; i < 0x110000; i++)
+ table[i] = (int) func (i);
+
+ fprintf (stream, "%s ", classname);
+ need_semicolon = false;
+ column = 1000;
+ for (i = 0; i < 0x110000; )
+ {
+ if (!table[i])
+ i++;
+ else
+ {
+ unsigned int low, high;
+ char buf[25];
+
+ low = i;
+ do
+ i++;
+ while (i < 0x110000 && table[i]);
+ high = i - 1;
+
+ if (low == high)
+ strcpy (buf, ucs_symbol (low));
+ else
+ strcpy (buf, ucs_symbol_range (low, high));
+
+ if (need_semicolon)
+ {
+ fprintf (stream, ";");
+ column++;
+ }
+
+ if (column + strlen (buf) > max_column)
+ {
+ fprintf (stream, "/\n ");
+ column = 3;
+ }
+
+ fprintf (stream, "%s", buf);
+ column += strlen (buf);
+ need_semicolon = true;
+ }
+ }
+ fprintf (stream, "\n");
+}
+
+/* Output a character mapping table. */
+
+static void
+output_charmap (FILE *stream, const char *mapname,
+ unsigned int (*func) (unsigned int))
+{
+ char table[0x110000];
+ unsigned int i;
+ bool need_semicolon;
+ const int max_column = 75;
+ int column;
+
+ for (i = 0; i < 0x110000; i++)
+ table[i] = (func (i) != i);
+
+ fprintf (stream, "%s ", mapname);
+ need_semicolon = false;
+ column = 1000;
+ for (i = 0; i < 0x110000; i++)
+ if (table[i])
+ {
+ char buf[25+1];
+
+ strcpy (buf, "(");
+ strcat (buf, ucs_symbol (i));
+ strcat (buf, ",");
+ strcat (buf, ucs_symbol (func (i)));
+ strcat (buf, ")");
+
+ if (need_semicolon)
+ {
+ fprintf (stream, ";");
+ column++;
+ }
+
+ if (column + strlen (buf) > max_column)
+ {
+ fprintf (stream, "/\n ");
+ column = 3;
+ }
+
+ fprintf (stream, "%s", buf);
+ column += strlen (buf);
+ need_semicolon = true;
+ }
+ fprintf (stream, "\n");
+}
+
+/* Output the width table. */
+
+static void
+output_widthmap (FILE *stream)
+{
+}
+
+/* Output the tables to the given file. */
+
+static void
+output_tables (const char *filename, const char *version)
+{
+ FILE *stream;
+ unsigned int ch;
+
+ stream = fopen (filename, "w");
+ if (stream == NULL)
+ {
+ fprintf (stderr, "cannot open '%s' for writing\n", filename);
+ exit (1);
+ }
+
+ fprintf (stream, "escape_char /\n");
+ fprintf (stream, "comment_char %%\n");
+ fprintf (stream, "\n");
+ fprintf (stream, "%% Generated automatically by gen-uni-tables.c for Unicode %s.\n",
+ version);
+ fprintf (stream, "\n");
+
+ fprintf (stream, "LC_IDENTIFICATION\n");
+ fprintf (stream, "title \"Unicode %s FDCC-set\"\n", version);
+ fprintf (stream, "source \"UnicodeData.txt, PropList.txt\"\n");
+ fprintf (stream, "address \"\"\n");
+ fprintf (stream, "contact \"\"\n");
+ fprintf (stream, "email \"bug-glibc@gnu.org\"\n");
+ fprintf (stream, "tel \"\"\n");
+ fprintf (stream, "fax \"\"\n");
+ fprintf (stream, "language \"\"\n");
+ fprintf (stream, "territory \"Earth\"\n");
+ fprintf (stream, "revision \"%s\"\n", version);
+ {
+ time_t now;
+ char date[11];
+ now = time (NULL);
+ strftime (date, sizeof (date), "%Y-%m-%d", gmtime (&now));
+ fprintf (stream, "date \"%s\"\n", date);
+ }
+ fprintf (stream, "category \"unicode:2001\";LC_CTYPE\n");
+ fprintf (stream, "END LC_IDENTIFICATION\n");
+ fprintf (stream, "\n");
+
+ /* Verification. */
+ for (ch = 0; ch < 0x110000; ch++)
+ {
+ /* toupper restriction: "Only characters specified for the keywords
+ lower and upper shall be specified. */
+ if (to_upper (ch) != ch && !(is_lower (ch) || is_upper (ch)))
+ fprintf (stderr,
+ "%s is not upper|lower but toupper(0x%04X) = 0x%04X\n",
+ ucs_symbol (ch), ch, to_upper (ch));
+
+ /* tolower restriction: "Only characters specified for the keywords
+ lower and upper shall be specified. */
+ if (to_lower (ch) != ch && !(is_lower (ch) || is_upper (ch)))
+ fprintf (stderr,
+ "%s is not upper|lower but tolower(0x%04X) = 0x%04X\n",
+ ucs_symbol (ch), ch, to_lower (ch));
+
+ /* alpha restriction: "Characters classified as either upper or lower
+ shall automatically belong to this class. */
+ if ((is_lower (ch) || is_upper (ch)) && !is_alpha (ch))
+ fprintf (stderr, "%s is upper|lower but not alpha\n", ucs_symbol (ch));
+
+ /* alpha restriction: "No character specified for the keywords cntrl,
+ digit, punct or space shall be specified." */
+ if (is_alpha (ch) && is_cntrl (ch))
+ fprintf (stderr, "%s is alpha and cntrl\n", ucs_symbol (ch));
+ if (is_alpha (ch) && is_digit (ch))
+ fprintf (stderr, "%s is alpha and digit\n", ucs_symbol (ch));
+ if (is_alpha (ch) && is_punct (ch))
+ fprintf (stderr, "%s is alpha and punct\n", ucs_symbol (ch));
+ if (is_alpha (ch) && is_space (ch))
+ fprintf (stderr, "%s is alpha and space\n", ucs_symbol (ch));
+
+ /* space restriction: "No character specified for the keywords upper,
+ lower, alpha, digit, graph or xdigit shall be specified."
+ upper, lower, alpha already checked above. */
+ if (is_space (ch) && is_digit (ch))
+ fprintf (stderr, "%s is space and digit\n", ucs_symbol (ch));
+ if (is_space (ch) && is_graph (ch))
+ fprintf (stderr, "%s is space and graph\n", ucs_symbol (ch));
+ if (is_space (ch) && is_xdigit (ch))
+ fprintf (stderr, "%s is space and xdigit\n", ucs_symbol (ch));
+
+ /* cntrl restriction: "No character specified for the keywords upper,
+ lower, alpha, digit, punct, graph, print or xdigit shall be
+ specified." upper, lower, alpha already checked above. */
+ if (is_cntrl (ch) && is_digit (ch))
+ fprintf (stderr, "%s is cntrl and digit\n", ucs_symbol (ch));
+ if (is_cntrl (ch) && is_punct (ch))
+ fprintf (stderr, "%s is cntrl and punct\n", ucs_symbol (ch));
+ if (is_cntrl (ch) && is_graph (ch))
+ fprintf (stderr, "%s is cntrl and graph\n", ucs_symbol (ch));
+ if (is_cntrl (ch) && is_print (ch))
+ fprintf (stderr, "%s is cntrl and print\n", ucs_symbol (ch));
+ if (is_cntrl (ch) && is_xdigit (ch))
+ fprintf (stderr, "%s is cntrl and xdigit\n", ucs_symbol (ch));
+
+ /* punct restriction: "No character specified for the keywords upper,
+ lower, alpha, digit, cntrl, xdigit or as the <space> character shall
+ be specified." upper, lower, alpha, cntrl already checked above. */
+ if (is_punct (ch) && is_digit (ch))
+ fprintf (stderr, "%s is punct and digit\n", ucs_symbol (ch));
+ if (is_punct (ch) && is_xdigit (ch))
+ fprintf (stderr, "%s is punct and xdigit\n", ucs_symbol (ch));
+ if (is_punct (ch) && (ch == 0x0020))
+ fprintf (stderr, "%s is punct\n", ucs_symbol (ch));
+
+ /* graph restriction: "No character specified for the keyword cntrl
+ shall be specified." Already checked above. */
+
+ /* print restriction: "No character specified for the keyword cntrl
+ shall be specified." Already checked above. */
+
+ /* graph - print relation: differ only in the <space> character.
+ How is this possible if there are more than one space character?!
+ I think susv2/xbd/locale.html should speak of "space characters",
+ not "space character". */
+ if (is_print (ch) && !(is_graph (ch) || /* ch == 0x0020 */ is_space (ch)))
+ fprintf (stderr,
+ "%s is print but not graph|<space>\n", ucs_symbol (ch));
+ if (!is_print (ch) && (is_graph (ch) || ch == 0x0020))
+ fprintf (stderr,
+ "%s is graph|<space> but not print\n", ucs_symbol (ch));
+ }
+
+ fprintf (stream, "LC_CTYPE\n");
+ output_charclass (stream, "upper", is_upper);
+ output_charclass (stream, "lower", is_lower);
+ output_charclass (stream, "alpha", is_alpha);
+ output_charclass (stream, "digit", is_digit);
+ output_charclass (stream, "outdigit", is_outdigit);
+ output_charclass (stream, "blank", is_blank);
+ output_charclass (stream, "space", is_space);
+ output_charclass (stream, "cntrl", is_cntrl);
+ output_charclass (stream, "punct", is_punct);
+ output_charclass (stream, "xdigit", is_xdigit);
+ output_charclass (stream, "graph", is_graph);
+ output_charclass (stream, "print", is_print);
+ output_charclass (stream, "class \"combining\";", is_combining);
+ output_charclass (stream, "class \"combining_level3\";", is_combining_level3);
+ output_charmap (stream, "toupper", to_upper);
+ output_charmap (stream, "tolower", to_lower);
+ output_charmap (stream, "map \"totitle\";", to_title);
+ output_widthmap (stream);
+ fprintf (stream, "END LC_CTYPE\n");
+
+ if (ferror (stream) || fclose (stream))
+ {
+ fprintf (stderr, "error writing to '%s'\n", filename);
+ exit (1);
+ }
+}
+
+#endif
+
+/* ========================================================================= */
+
+/* The width property from the EastAsianWidth.txt file.
+ Each is NULL (unassigned) or "N", "A", "H", "W", "F", "Na". */
+const char * unicode_width[0x110000];
+
+/* Stores in unicode_width[] the width property from the EastAsianWidth.txt
+ file. */
+static void
+fill_width (const char *width_filename)
+{
+ unsigned int i, j;
+ FILE *stream;
+ char field0[FIELDLEN];
+ char field1[FIELDLEN];
+ char field2[FIELDLEN];
+ int lineno = 0;
+
+ for (i = 0; i < 0x110000; i++)
+ unicode_width[i] = (unicode_attributes[i].name != NULL ? "N" : NULL);
+
+ stream = fopen (width_filename, "r");
+ if (stream == NULL)
+ {
+ fprintf (stderr, "error during fopen of '%s'\n", width_filename);
+ exit (1);
+ }
+
+ for (;;)
+ {
+ int n;
+ int c;
+
+ lineno++;
+ c = getc (stream);
+ if (c == EOF)
+ break;
+ if (c == '#')
+ {
+ do c = getc (stream); while (c != EOF && c != '\n');
+ continue;
+ }
+ ungetc (c, stream);
+ n = getfield (stream, field0, ';');
+ n += getfield (stream, field1, ' ');
+ n += getfield (stream, field2, '\n');
+ if (n == 0)
+ break;
+ if (n != 3)
+ {
+ fprintf (stderr, "short line in '%s':%d\n", width_filename, lineno);
+ exit (1);
+ }
+ i = strtoul (field0, NULL, 16);
+ if (strstr (field0, "..") != NULL)
+ {
+ /* Deal with a range. */
+ j = strtoul (strstr (field0, "..") + 2, NULL, 16);
+ for (; i <= j; i++)
+ unicode_width[i] = strdup (field1);
+ }
+ else
+ {
+ /* Single character line. */
+ unicode_width[i] = strdup (field1);
+ }
+ }
+
+ if (ferror (stream) || fclose (stream))
+ {
+ fprintf (stderr, "error reading from '%s'\n", width_filename);
+ exit (1);
+ }