New module 'unicase/u32-is-casefolded'.

[gnulib.git] / lib / gen-uni-tables.c
diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c

index b1149fd..a507517 100644 (file)
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -1,5 +1,6 @@
  /* Generate Unicode conforming character classification tables and
-   Line Break Properties tables from a UnicodeData file.
+   line break properties tables and word break property tables and
+   decomposition/composition and case mapping tables from a UnicodeData file.
     Copyright (C) 2000-2002, 2004, 2007-2009 Free Software Foundation, Inc.
     Written by Bruno Haible <bruno@clisp.org>, 2000-2002.
  
@@ -25,7 +26,11 @@
                        /usr/local/share/Unidata/PropList-3.0.1.txt \
                        /usr/local/share/Unidata/EastAsianWidth.txt \
                        /usr/local/share/Unidata/LineBreak.txt \
-                      5.0.0
+                      /usr/local/share/Unidata/WordBreakProperty.txt \
+                      /usr/local/share/Unidata/CompositionExclusions.txt \
+                      /usr/local/share/Unidata/SpecialCasing.txt \
+                      /usr/local/share/Unidata/CaseFolding.txt \
+                      5.1.0
   */
  
  #include <stdbool.h>
@@ -736,7 +741,7 @@ output_predicate (const char *filename, bool (*predicate) (unsigned int), const
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd * sizeof (int) / sizeof (short) + %5zd",
+       fprintf (stream, " %5zu * sizeof (int) / sizeof (short) + %5zu",
                  1 + t.level1_size, (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
          fprintf (stream, ",");
@@ -756,7 +761,7 @@ output_predicate (const char *filename, bool (*predicate) (unsigned int), const
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd + %5zd * sizeof (short) / sizeof (int) + %5zd",
+       fprintf (stream, " %5zu + %5zu * sizeof (short) / sizeof (int) + %5zu",
                  1 + t.level1_size, t.level2_size << t.q, (offset - level3_offset) / sizeof (uint32_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -1054,7 +1059,7 @@ output_category (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -1074,7 +1079,7 @@ output_category (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -1205,7 +1210,7 @@ output_combclass (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -1225,7 +1230,7 @@ output_combclass (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -1544,7 +1549,7 @@ output_bidi_category (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -1564,7 +1569,7 @@ output_bidi_category (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -1751,7 +1756,7 @@ output_decimal_digit (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -1771,7 +1776,7 @@ output_decimal_digit (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -1938,7 +1943,7 @@ output_digit (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -1958,7 +1963,7 @@ output_digit (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -2194,7 +2199,7 @@ output_numeric (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -2214,7 +2219,7 @@ output_numeric (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -2432,7 +2437,7 @@ output_mirror (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -2452,7 +2457,7 @@ output_mirror (const char *filename, const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (int32_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -2766,6 +2771,7 @@ is_property_alphabetic (unsigned int ch)
         Alphabetic but not as having property Other_Alphabetic.  */
      || (ch >= 0x16EE && ch <= 0x16F0) /* RUNIC SYMBOLS */
      || (ch >= 0x2160 && ch <= 0x2182) /* ROMAN NUMERALS */
+    || (ch >= 0x2185 && ch <= 0x2188) /* ROMAN NUMERALS */
      || (ch >= 0x24D0 && ch <= 0x24E9) /* CIRCLED LATIN SMALL LETTER */
      || (ch == 0x3007) /* IDEOGRAPHIC NUMBER ZERO */
      || (ch >= 0x3021 && ch <= 0x3029) /* HANGZHOU NUMERAL */
@@ -2804,12 +2810,10 @@ is_property_default_ignorable_code_point (unsigned int ch)
  {
    bool result1 =
      (is_category_Cf (ch)
-     && !(ch >= 0xFFF9 && ch <= 0xFFFB)) /* Annotations */
-    || ((is_category_Cc (ch) || is_category_Cs (ch))
-       && !is_property_white_space (ch))
+     && !(ch >= 0xFFF9 && ch <= 0xFFFB) /* Annotations */
+     && !((ch >= 0x0600 && ch <= 0x0603) || ch == 0x06DD || ch == 0x070F))
      || ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0)
-    || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0)
-    || is_property_not_a_character (ch);
+    || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0);
    bool result2 =
      ((unicode_properties[ch] & (1ULL << PROP_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
  
@@ -3753,7 +3757,7 @@ output_scripts (const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -3773,7 +3777,7 @@ output_scripts (const char *version)
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -4429,7 +4433,7 @@ output_ident_category (const char *filename, int (*predicate) (unsigned int), co
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level2_offset) / sizeof (uint32_t));
        if (i+1 < t.level1_size)
         fprintf (stream, ",");
@@ -4449,7 +4453,7 @@ output_ident_category (const char *filename, int (*predicate) (unsigned int), co
        if (offset == 0)
         fprintf (stream, " %5d", -1);
        else
-       fprintf (stream, " %5zd",
+       fprintf (stream, " %5zu",
                  (offset - level3_offset) / sizeof (uint8_t));
        if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
@@ -5299,16 +5303,8 @@ get_lbp (unsigned int ch)
           || ch == 0x0FBE /* TIBETAN KU RU KHA */
           || ch == 0x0FBF /* TIBETAN KU RU KHA BZHI MIG CAN */
           || ch == 0x0FD2 /* TIBETAN MARK NYIS TSHEG */
-#if !REVISION_22
-         || ch == 0x1802 /* MONGOLIAN COMMA */
-         || ch == 0x1803 /* MONGOLIAN FULL STOP */
-#endif
           || ch == 0x1804 /* MONGOLIAN COLON */
           || ch == 0x1805 /* MONGOLIAN FOUR DOTS */
-#if !REVISION_22
-         || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
-         || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
-#endif
           || ch == 0x1B5A /* BALINESE PANTI */
           || ch == 0x1B5B /* BALINESE PAMADA */
           || ch == 0x1B5C /* BALINESE WINDU */
@@ -5321,15 +5317,9 @@ get_lbp (unsigned int ch)
           || ch == 0x1C3F /* LEPCHA PUNCTUATION TSHOOK */
           || ch == 0x1C7E /* OL CHIKI PUNCTUATION MUCAAD */
           || ch == 0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD */
-#if !REVISION_22
-         || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
-#endif
           || ch == 0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK */
           || ch == 0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK */
           || ch == 0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER */
-#if !REVISION_22
-         || ch == 0x2CFE /* COPTIC FULL STOP */
-#endif
           || ch == 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER */
           || (ch >= 0x2E0E && ch <= 0x2E15) /* EDITORIAL CORONIS .. UPWARDS ANCORA */
           || ch == 0x2E17 /* DOUBLE OBLIQUE HYPHEN */
@@ -5344,9 +5334,6 @@ get_lbp (unsigned int ch)
           || ch == 0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM */
           || ch == 0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS */
           /* Extra characters for compatibility with Unicode LineBreak.txt.  */
-#if !REVISION_22
-         || ch == 0x1A1E /* BUGINESE PALLAWA */
-#endif
           || ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
           || ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
           || ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */)
@@ -5354,10 +5341,8 @@ get_lbp (unsigned int ch)
  
        /* break opportunity before */
        if (ch == 0x00B4 /* ACUTE ACCENT */
-#if REVISION_22
           || ch == 0x1FFD /* GREEK OXIA */
           || ch == 0x02DF /* MODIFIER LETTER CROSS ACCENT */
-#endif
           || ch == 0x02C8 /* MODIFIER LETTER VERTICAL LINE */
           || ch == 0x02CC /* MODIFIER LETTER LOW VERTICAL LINE */
           || ch == 0x0F01 /* TIBETAN MARK GTER YIG MGO TRUNCATED A */
@@ -5403,15 +5388,9 @@ get_lbp (unsigned int ch)
        if (ch == 0x0021 /* EXCLAMATION MARK */
           || ch == 0x003F /* QUESTION MARK */
           || ch == 0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA */
-#if !REVISION_22
-         || ch == 0x060C /* ARABIC COMMA */
-#endif
           || ch == 0x061B /* ARABIC SEMICOLON */
           || ch == 0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK */
           || ch == 0x061F /* ARABIC QUESTION MARK */
-#if !REVISION_22
-         || ch == 0x066A /* ARABIC PERCENT SIGN */
-#endif
           || ch == 0x06D4 /* ARABIC FULL STOP */
           || ch == 0x07F9 /* NKO EXCLAMATION MARK */
           || ch == 0x0F0D /* TIBETAN MARK SHAD */
@@ -5420,22 +5399,20 @@ get_lbp (unsigned int ch)
           || ch == 0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD */
           || ch == 0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD */
           || ch == 0x0F14 /* TIBETAN MARK GTER TSHEG */
-#if REVISION_22
           || ch == 0x1802 /* MONGOLIAN COMMA */
           || ch == 0x1803 /* MONGOLIAN FULL STOP */
           || ch == 0x1808 /* MONGOLIAN MANCHU COMMA */
           || ch == 0x1809 /* MONGOLIAN MANCHU FULL STOP */
-#endif
           || ch == 0x1944 /* LIMBU EXCLAMATION MARK */
           || ch == 0x1945 /* LIMBU QUESTION MARK */
           || ch == 0x2762 /* HEAVY EXCLAMATION MARK ORNAMENT */
           || ch == 0x2763 /* HEAVY HEART EXCLAMATION MARK ORNAMENT */
-#if REVISION_22
           || ch == 0x2CF9 /* COPTIC OLD NUBIAN FULL STOP */
           || ch == 0x2CFE /* COPTIC FULL STOP */
-#endif
           || ch == 0x2E2E /* REVERSED QUESTION MARK */
+#if REVISION_22
           || ch == 0xA60C /* VAI SYLLABLE LENGTHENER */
+#endif
           || ch == 0xA60E /* VAI FULL STOP */
           || ch == 0xA876 /* PHAGS-PA MARK SHAD */
           || ch == 0xA877 /* PHAGS-PA MARK DOUBLE SHAD */
@@ -5490,10 +5467,8 @@ get_lbp (unsigned int ch)
        /* opening punctuation */
        if ((unicode_attributes[ch].category[0] == 'P'
            && unicode_attributes[ch].category[1] == 's')
-#if REVISION_22
           || ch == 0x00A1 /* INVERTED EXCLAMATION MARK */
           || ch == 0x00BF /* INVERTED QUESTION MARK */
-#endif
           || ch == 0x2E18 /* INVERTED INTERROBANG */)
         attr |= 1 << LBP_OP;
  
@@ -5522,9 +5497,7 @@ get_lbp (unsigned int ch)
           || ch == 0x003B /* SEMICOLON */
           || ch == 0x037E /* GREEK QUESTION MARK */
           || ch == 0x0589 /* ARMENIAN FULL STOP */
-#if REVISION_22
           || ch == 0x060C /* ARABIC COMMA */
-#endif
           || ch == 0x060D /* ARABIC DATE SEPARATOR */
           || ch == 0x07F8 /* NKO COMMA */
           || ch == 0x2044 /* FRACTION SLASH */
@@ -5546,9 +5519,7 @@ get_lbp (unsigned int ch)
           || ch == 0x00A2 /* CENT SIGN */
           || ch == 0x00B0 /* DEGREE SIGN */
           || ch == 0x060B /* AFGHANI SIGN */
-#if REVISION_22
           || ch == 0x066A /* ARABIC PERCENT SIGN */
-#endif
           || ch == 0x2030 /* PER MILLE SIGN */
           || ch == 0x2031 /* PER TEN THOUSAND SIGN */
           || ch == 0x2032 /* PRIME */
@@ -5563,7 +5534,11 @@ get_lbp (unsigned int ch)
           || ch == 0xFDFC /* RIAL SIGN */
           || ch == 0xFE6A /* SMALL PERCENT SIGN */
           || ch == 0xFF05 /* FULLWIDTH PERCENT SIGN */
-         || ch == 0xFFE0 /* FULLWIDTH DIGIT ZERO */)
+         || ch == 0xFFE0 /* FULLWIDTH DIGIT ZERO */
+         /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+         || ch == 0x0609 /* ARABIC-INDIC PER MILLE SIGN */
+         || ch == 0x060A /* ARABIC-INDIC PER TEN THOUSAND SIGN */
+         || ch == 0x0D79 /* MALAYALAM DATE MARK */)
         attr |= 1 << LBP_PO;
  
        /* prefix (numeric) */
@@ -5607,6 +5582,8 @@ get_lbp (unsigned int ch)
                && (unicode_attributes[ch].category[1] == 'c'
                    || unicode_attributes[ch].category[1] == 'n'))
            /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+          || ch == 0x109E /* MYANMAR SYMBOL SHAN ONE */
+          || ch == 0x109F /* MYANMAR SYMBOL SHAN EXCLAMATION */
            || ch == 0x19DE /* NEW TAI LUE SIGN LAE */
            || ch == 0x19DF /* NEW TAI LUE SIGN LAEV */)
           && ((ch >= 0x0E00 && ch <= 0x0EFF)
@@ -5632,7 +5609,7 @@ get_lbp (unsigned int ch)
           || (ch >= 0x3040 && ch <= 0x309F) /* HIRAGANA */
           || (ch >= 0x30A0 && ch <= 0x30FF) /* KATAKANA */
           || (ch >= 0x3400 && ch <= 0x4DB5) /* CJK Ideograph Extension A */
-         || (ch >= 0x4E00 && ch <= 0x9FBB) /* CJK Ideograph */
+         || (ch >= 0x4E00 && ch <= 0x9FC3) /* CJK Ideograph */
           || (ch >= 0xF900 && ch <= 0xFAD9) /* CJK COMPATIBILITY IDEOGRAPH */
           || (ch >= 0xA000 && ch <= 0xA48F) /* YI SYLLABLE */
           || (ch >= 0xA490 && ch <= 0xA4CF) /* YI RADICAL */
@@ -6136,9 +6113,13 @@ output_lbp (FILE *stream1, FILE *stream2)
        if (i > 0 && (i % 8) == 0)
         fprintf (stream2, "\n   ");
        offset = ((uint32_t *) (t.result + level1_offset))[i];
-      fprintf (stream2, " %5zd%s",
-              offset == 0 ? -1 : (offset - level2_offset) / sizeof (uint32_t),
-              (i+1 < t.level1_size ? "," : ""));
+      if (offset == 0)
+       fprintf (stream2, " %5d", -1);
+      else
+       fprintf (stream2, " %5zu",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream2, ",");
      }
    if (t.level1_size > 8)
      fprintf (stream2, "\n ");
@@ -6152,9 +6133,13 @@ output_lbp (FILE *stream1, FILE *stream2)
        if (i > 0 && (i % 8) == 0)
         fprintf (stream2, "\n   ");
        offset = ((uint32_t *) (t.result + level2_offset))[i];
-      fprintf (stream2, " %5zd%s",
-              offset == 0 ? -1 : (offset - level3_offset) / sizeof (uint8_t),
-              (i+1 < t.level2_size << t.q ? "," : ""));
+      if (offset == 0)
+       fprintf (stream2, " %5d", -1);
+      else
+       fprintf (stream2, " %5zu",
+                (offset - level3_offset) / sizeof (unsigned char));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream2, ",");
      }
    if (t.level2_size << t.q > 8)
      fprintf (stream2, "\n ");
@@ -6280,67 +6265,2043 @@ output_lbrk_tables (const char *filename1, const char *filename2, const char *ve
  
  /* ========================================================================= */
  
-int
-main (int argc, char * argv[])
+/* Word break property.  */
+
+/* Possible values of the Word_Break property.  */
+enum
  {
-  const char *unicodedata_filename;
-  const char *proplist_filename;
-  const char *derivedproplist_filename;
-  const char *scripts_filename;
-  const char *blocks_filename;
-  const char *proplist30_filename;
-  const char *eastasianwidth_filename;
-  const char *linebreak_filename;
-  const char *version;
+  WBP_OTHER        = 0,
+  WBP_CR           = 11,
+  WBP_LF           = 12,
+  WBP_NEWLINE      = 10,
+  WBP_EXTEND       = 8,
+  WBP_FORMAT       = 9,
+  WBP_KATAKANA     = 1,
+  WBP_ALETTER      = 2,
+  WBP_MIDNUMLET    = 3,
+  WBP_MIDLETTER    = 4,
+  WBP_MIDNUM       = 5,
+  WBP_NUMERIC      = 6,
+  WBP_EXTENDNUMLET = 7
+};
+
+/* Returns the word breaking property for ch, as a bit mask.  */
+static int
+get_wbp (unsigned int ch)
+{
+  int attr = 0;
  
-  if (argc != 10)
+  if (unicode_attributes[ch].name != NULL)
      {
-      fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt EastAsianWidth.txt LineBreak.txt version\n",
-              argv[0]);
+      if (ch == 0x000D)
+       attr |= 1 << WBP_CR;
+
+      if (ch == 0x000A)
+       attr |= 1 << WBP_LF;
+
+      if (ch == 0x000B || ch == 0x000C
+         || ch == 0x0085
+         || ch == 0x2028 || ch == 0x2029)
+       attr |= 1 << WBP_NEWLINE;
+
+      if (((unicode_properties[ch] >> PROP_GRAPHEME_EXTEND) & 1) != 0
+         || (unicode_attributes[ch].category != NULL
+             && strcmp (unicode_attributes[ch].category, "Mc") == 0))
+       attr |= 1 << WBP_EXTEND;
+
+      if (unicode_attributes[ch].category != NULL
+         && strcmp (unicode_attributes[ch].category, "Cf") == 0
+         && ch != 0x200C && ch != 0x200D)
+       attr |= 1 << WBP_FORMAT;
+
+      if ((unicode_scripts[ch] < numscripts
+          && strcmp (scripts[unicode_scripts[ch]], "Katakana") == 0)
+         || (ch >= 0x3031 && ch <= 0x3035)
+         || ch == 0x309B || ch == 0x309C || ch == 0x30A0 || ch == 0x30FC
+         || ch == 0xFF70)
+       attr |= 1 << WBP_KATAKANA;
+
+      if ((((unicode_properties[ch] >> PROP_ALPHABETIC) & 1) != 0
+          || ch == 0x05F3)
+         && ((unicode_properties[ch] >> PROP_IDEOGRAPHIC) & 1) == 0
+         && (attr & (1 << WBP_KATAKANA)) == 0
+         && ((get_lbp (ch) >> LBP_SA) & 1) == 0
+         && !(unicode_scripts[ch] < numscripts
+              && strcmp (scripts[unicode_scripts[ch]], "Hiragana") == 0)
+         && (attr & (1 << WBP_EXTEND)) == 0)
+       attr |= 1 << WBP_ALETTER;
+
+      if (ch == 0x0027 || ch == 0x002E || ch == 0x2018 || ch == 0x2019
+         || ch == 0x2024 || ch == 0xFE52 || ch == 0xFF07 || ch == 0xFF0E)
+       attr |= 1 << WBP_MIDNUMLET;
+
+      if (ch == 0x00B7 || ch == 0x05F4 || ch == 0x2027 || ch == 0x003A
+         || ch == 0x0387 || ch == 0xFE13 || ch == 0xFE55 || ch == 0xFF1A)
+       attr |= 1 << WBP_MIDLETTER;
+
+      if ((((get_lbp (ch) >> LBP_IS) & 1) != 0
+          || ch == 0x066C || ch == 0xFE50 || ch == 0xFE54 || ch == 0xFF0C
+          || ch == 0xFF1B)
+         && ch != 0x003A && ch != 0xFE13 && ch != 0x002E)
+       attr |= 1 << WBP_MIDNUM;
+
+      if (((get_lbp (ch) >> LBP_NU) & 1) != 0
+         && ch != 0x066C)
+       attr |= 1 << WBP_NUMERIC;
+
+      if (unicode_attributes[ch].category != NULL
+         && strcmp (unicode_attributes[ch].category, "Pc") == 0)
+       attr |= 1 << WBP_EXTENDNUMLET;
+    }
+
+  if (attr == 0)
+    /* other */
+    attr |= 1 << WBP_OTHER;
+
+  return attr;
+}
+
+/* Output the word break property in a human readable format.  */
+static void
+debug_output_wbp (FILE *stream)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = get_wbp (i);
+      if (attr != 1 << WBP_OTHER)
+       {
+         fprintf (stream, "0x%04X", i);
+         if (attr & (1 << WBP_CR))
+           fprintf (stream, " CR");
+         if (attr & (1 << WBP_LF))
+           fprintf (stream, " LF");
+         if (attr & (1 << WBP_NEWLINE))
+           fprintf (stream, " Newline");
+         if (attr & (1 << WBP_EXTEND))
+           fprintf (stream, " Extend");
+         if (attr & (1 << WBP_FORMAT))
+           fprintf (stream, " Format");
+         if (attr & (1 << WBP_KATAKANA))
+           fprintf (stream, " Katakana");
+         if (attr & (1 << WBP_ALETTER))
+           fprintf (stream, " ALetter");
+         if (attr & (1 << WBP_MIDNUMLET))
+           fprintf (stream, " MidNumLet");
+         if (attr & (1 << WBP_MIDLETTER))
+           fprintf (stream, " MidLetter");
+         if (attr & (1 << WBP_MIDNUM))
+           fprintf (stream, " MidNum");
+         if (attr & (1 << WBP_NUMERIC))
+           fprintf (stream, " Numeric");
+         if (attr & (1 << WBP_EXTENDNUMLET))
+           fprintf (stream, " ExtendNumLet");
+         fprintf (stream, "\n");
+       }
+    }
+}
+
+static void
+debug_output_wbrk_tables (const char *filename)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
        exit (1);
      }
  
-  unicodedata_filename = argv[1];
-  proplist_filename = argv[2];
-  derivedproplist_filename = argv[3];
-  scripts_filename = argv[4];
-  blocks_filename = argv[5];
-  proplist30_filename = argv[6];
-  eastasianwidth_filename = argv[7];
-  linebreak_filename = argv[8];
-  version = argv[9];
+  debug_output_wbp (stream);
  
-  fill_attributes (unicodedata_filename);
-  clear_properties ();
-  fill_properties (proplist_filename);
-  fill_properties (derivedproplist_filename);
-  fill_properties30 (proplist30_filename);
-  fill_scripts (scripts_filename);
-  fill_blocks (blocks_filename);
-  fill_width (eastasianwidth_filename);
-  fill_org_lbp (linebreak_filename);
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
  
-  output_categories (version);
-  output_category ("unictype/categ_of.h", version);
-  output_combclass ("unictype/combining.h", version);
-  output_bidi_category ("unictype/bidi_of.h", version);
-  output_decimal_digit_test ("../tests/unictype/test-decdigit.h", version);
-  output_decimal_digit ("unictype/decdigit.h", version);
-  output_digit_test ("../tests/unictype/test-digit.h", version);
-  output_digit ("unictype/digit.h", version);
-  output_numeric_test ("../tests/unictype/test-numeric.h", version);
-  output_numeric ("unictype/numeric.h", version);
-  output_mirror ("unictype/mirror.h", version);
-  output_properties (version);
-  output_scripts (version);
-  output_scripts_byname (version);
-  output_blocks (version);
-  output_ident_properties (version);
-  output_old_ctype (version);
+/* The word break property from the WordBreakProperty.txt file.  */
+int unicode_org_wbp[0x110000];
  
-  debug_output_lbrk_tables ("unilbrk/lbrkprop.txt");
-  debug_output_org_lbrk_tables ("unilbrk/lbrkprop_org.txt");
-  output_lbrk_tables ("unilbrk/lbrkprop1.h", "unilbrk/lbrkprop2.h", version);
+/* Stores in unicode_org_wbp[] the word break property from the
+   WordBreakProperty.txt file.  */
+static void
+fill_org_wbp (const char *wordbreakproperty_filename)
+{
+  unsigned int i;
+  FILE *stream;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_org_wbp[i] = WBP_OTHER;
+
+  stream = fopen (wordbreakproperty_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", wordbreakproperty_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      char buf[200+1];
+      unsigned int i1, i2;
+      char padding[200+1];
+      char propname[200+1];
+      int propvalue;
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, propname) != 4)
+       {
+         if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, propname) != 3)
+           {
+             fprintf (stderr, "parse error in '%s'\n",
+                      wordbreakproperty_filename);
+             exit (1);
+           }
+         i2 = i1;
+       }
+#define PROP(name,value) \
+      if (strcmp (propname, name) == 0) propvalue = value; else
+      PROP ("CR", WBP_CR)
+      PROP ("LF", WBP_LF)
+      PROP ("Newline", WBP_NEWLINE)
+      PROP ("Extend", WBP_EXTEND)
+      PROP ("Format", WBP_FORMAT)
+      PROP ("Katakana", WBP_KATAKANA)
+      PROP ("ALetter", WBP_ALETTER)
+      PROP ("MidNumLet", WBP_MIDNUMLET)
+      PROP ("MidLetter", WBP_MIDLETTER)
+      PROP ("MidNum", WBP_MIDNUM)
+      PROP ("Numeric", WBP_NUMERIC)
+      PROP ("ExtendNumLet", WBP_EXTENDNUMLET)
+#undef PROP
+       {
+         fprintf (stderr, "unknown property value '%s' in '%s'\n", propname,
+                  wordbreakproperty_filename);
+         exit (1);
+       }
+      if (!(i1 <= i2 && i2 < 0x110000))
+       abort ();
+
+      for (i = i1; i <= i2; i++)
+       unicode_org_wbp[i] = propvalue;
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", wordbreakproperty_filename);
+      exit (1);
+    }
+}
+
+/* Output the word break property in a human readable format.  */
+static void
+debug_output_org_wbp (FILE *stream)
+{
+  unsigned int i;
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int propvalue = unicode_org_wbp[i];
+      if (propvalue != WBP_OTHER)
+       {
+         fprintf (stream, "0x%04X", i);
+#define PROP(name,value) \
+         if (propvalue == value) fprintf (stream, " " name); else
+         PROP ("CR", WBP_CR)
+         PROP ("LF", WBP_LF)
+         PROP ("Newline", WBP_NEWLINE)
+         PROP ("Extend", WBP_EXTEND)
+         PROP ("Format", WBP_FORMAT)
+         PROP ("Katakana", WBP_KATAKANA)
+         PROP ("ALetter", WBP_ALETTER)
+         PROP ("MidNumLet", WBP_MIDNUMLET)
+         PROP ("MidLetter", WBP_MIDLETTER)
+         PROP ("MidNum", WBP_MIDNUM)
+         PROP ("Numeric", WBP_NUMERIC)
+         PROP ("ExtendNumLet", WBP_EXTENDNUMLET)
+#undef PROP
+         fprintf (stream, " ??");
+         fprintf (stream, "\n");
+       }
+    }
+}
+
+static void
+debug_output_org_wbrk_tables (const char *filename)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  debug_output_org_wbp (stream);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE wbp_table
+#define ELEMENT unsigned char
+#define DEFAULT WBP_OTHER
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+static void
+output_wbp (FILE *stream)
+{
+  unsigned int i;
+  struct wbp_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  t.p = 7;
+  t.q = 9;
+  wbp_table_init (&t);
+
+  for (i = 0; i < 0x110000; i++)
+    {
+      int attr = get_wbp (i);
+
+      /* Now attr should contain exactly one bit.  */
+      if (attr == 0 || ((attr & (attr - 1)) != 0))
+       abort ();
+
+      if (attr != 1 << WBP_OTHER)
+       {
+         unsigned int log2_attr;
+         for (log2_attr = 0; attr > 1; attr >>= 1, log2_attr++);
+
+         wbp_table_add (&t, i, log2_attr);
+       }
+    }
+
+  wbp_table_finalize (&t);
+
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define wbrkprop_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "\n");
+  fprintf (stream, "typedef struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    int level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    unsigned char level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "wbrkprop_t;\n");
+  fprintf (stream, "static const wbrkprop_t uniwbrkprop =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zu",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zu",
+                (offset - level3_offset) / sizeof (unsigned char));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level3_size << t.p > 4)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned char value = ((unsigned char *) (t.result + level3_offset))[i];
+      const char *value_string;
+      switch (value)
+       {
+#define CASE(x) case x: value_string = #x; break;
+         CASE(WBP_OTHER);
+         CASE(WBP_CR);
+         CASE(WBP_LF);
+         CASE(WBP_NEWLINE);
+         CASE(WBP_EXTEND);
+         CASE(WBP_FORMAT);
+         CASE(WBP_KATAKANA);
+         CASE(WBP_ALETTER);
+         CASE(WBP_MIDNUMLET);
+         CASE(WBP_MIDLETTER);
+         CASE(WBP_MIDNUM);
+         CASE(WBP_NUMERIC);
+         CASE(WBP_EXTENDNUMLET);
+#undef CASE
+         default:
+           abort ();
+       }
+      if (i > 0 && (i % 4) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " %s%s", value_string,
+              (i+1 < t.level3_size << t.p ? "," : ""));
+    }
+  if (t.level3_size << t.p > 4)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+}
+
+static void
+output_wbrk_tables (const char *filename, const char *version)
+{
+  FILE *stream;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Line breaking properties of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-uni-tables for Unicode %s.  */\n",
+          version);
+  fprintf (stream, "\n");
+
+  /* Put a GPL header on it.  The gnulib module is under LGPL (although it
+     still carries the GPL header), and it's gnulib-tool which replaces the
+     GPL header with an LGPL header.  */
+  fprintf (stream, "/* Copyright (C) 2000-2002, 2004, 2007-2009 Free Software Foundation, Inc.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
+  fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
+  fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
+  fprintf (stream, "   (at your option) any later version.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
+  fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+  fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+  fprintf (stream, "   GNU General Public License for more details.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
+  fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
+  fprintf (stream, "\n");
+
+  output_wbp (stream);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Maximum number of characters into which a single Unicode character can be
+   decomposed.  */
+#define MAX_DECOMP_LENGTH 18
+
+enum
+{
+  UC_DECOMP_CANONICAL,/*            Canonical decomposition.                  */
+  UC_DECOMP_FONT,    /*   <font>    A font variant (e.g. a blackletter form). */
+  UC_DECOMP_NOBREAK, /* <noBreak>   A no-break version of a space or hyphen.  */
+  UC_DECOMP_INITIAL, /* <initial>   An initial presentation form (Arabic).    */
+  UC_DECOMP_MEDIAL,  /*  <medial>   A medial presentation form (Arabic).      */
+  UC_DECOMP_FINAL,   /*  <final>    A final presentation form (Arabic).       */
+  UC_DECOMP_ISOLATED,/* <isolated>  An isolated presentation form (Arabic).   */
+  UC_DECOMP_CIRCLE,  /*  <circle>   An encircled form.                        */
+  UC_DECOMP_SUPER,   /*  <super>    A superscript form.                       */
+  UC_DECOMP_SUB,     /*   <sub>     A subscript form.                         */
+  UC_DECOMP_VERTICAL,/* <vertical>  A vertical layout presentation form.      */
+  UC_DECOMP_WIDE,    /*   <wide>    A wide (or zenkaku) compatibility character. */
+  UC_DECOMP_NARROW,  /*  <narrow>   A narrow (or hankaku) compatibility character. */
+  UC_DECOMP_SMALL,   /*  <small>    A small variant form (CNS compatibility). */
+  UC_DECOMP_SQUARE,  /*  <square>   A CJK squared font variant.               */
+  UC_DECOMP_FRACTION,/* <fraction>  A vulgar fraction form.                   */
+  UC_DECOMP_COMPAT   /*  <compat>   Otherwise unspecified compatibility character. */
+};
+
+/* Return the decomposition for a Unicode character (ignoring Hangul Jamo
+   decompositions).  Return the type, or -1 for none.  */
+static int
+get_decomposition (unsigned int ch,
+                  unsigned int *lengthp, unsigned int decomposed[MAX_DECOMP_LENGTH])
+{
+  const char *decomposition = unicode_attributes[ch].decomposition;
+
+  if (decomposition != NULL && decomposition[0] != '\0')
+    {
+      int type = UC_DECOMP_CANONICAL;
+      unsigned int length;
+      char *endptr;
+
+      if (decomposition[0] == '<')
+       {
+         const char *rangle;
+         size_t typelen;
+
+         rangle = strchr (decomposition + 1, '>');
+         if (rangle == NULL)
+           abort ();
+         typelen = rangle + 1 - decomposition;
+#define TYPE(t1,t2) \
+         if (typelen == (sizeof (t1) - 1) && memcmp (decomposition, t1, typelen) == 0) \
+           type = t2; \
+         else
+         TYPE ("<font>", UC_DECOMP_FONT)
+         TYPE ("<noBreak>", UC_DECOMP_NOBREAK)
+         TYPE ("<initial>", UC_DECOMP_INITIAL)
+         TYPE ("<medial>", UC_DECOMP_MEDIAL)
+         TYPE ("<final>", UC_DECOMP_FINAL)
+         TYPE ("<isolated>", UC_DECOMP_ISOLATED)
+         TYPE ("<circle>", UC_DECOMP_CIRCLE)
+         TYPE ("<super>", UC_DECOMP_SUPER)
+         TYPE ("<sub>", UC_DECOMP_SUB)
+         TYPE ("<vertical>", UC_DECOMP_VERTICAL)
+         TYPE ("<wide>", UC_DECOMP_WIDE)
+         TYPE ("<narrow>", UC_DECOMP_NARROW)
+         TYPE ("<small>", UC_DECOMP_SMALL)
+         TYPE ("<square>", UC_DECOMP_SQUARE)
+         TYPE ("<fraction>", UC_DECOMP_FRACTION)
+         TYPE ("<compat>", UC_DECOMP_COMPAT)
+           {
+             fprintf (stderr, "unknown decomposition type %*s\n", (int)typelen, decomposition);
+             exit (1);
+           }
+#undef TYPE
+         decomposition = rangle + 1;
+         if (decomposition[0] == ' ')
+           decomposition++;
+       }
+      for (length = 0; length < MAX_DECOMP_LENGTH; length++)
+       {
+         decomposed[length] = strtoul (decomposition, &endptr, 16);
+         if (endptr == decomposition)
+           break;
+         decomposition = endptr;
+         if (decomposition[0] == ' ')
+           decomposition++;
+       }
+      if (*decomposition != '\0')
+       /* MAX_DECOMP_LENGTH is too small.  */
+       abort ();
+
+      *lengthp = length;
+      return type;
+    }
+  else
+    return -1;
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE decomp_table
+#define ELEMENT uint16_t
+#define DEFAULT (uint16_t)(-1)
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+static void
+output_decomposition (FILE *stream1, FILE *stream2)
+{
+  struct decomp_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+  unsigned int offset;
+  unsigned int ch;
+  unsigned int i;
+
+  t.p = 5;
+  t.q = 5;
+  decomp_table_init (&t);
+
+  fprintf (stream1, "extern const unsigned char gl_uninorm_decomp_chars_table[];\n");
+  fprintf (stream1, "\n");
+  fprintf (stream2, "const unsigned char gl_uninorm_decomp_chars_table[] =\n{");
+  offset = 0;
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      unsigned int length;
+      unsigned int decomposed[MAX_DECOMP_LENGTH];
+      int type = get_decomposition (ch, &length, decomposed);
+
+      if (type >= 0)
+       {
+         if (!(offset < (1 << 15)))
+           abort ();
+         decomp_table_add (&t, ch, ((type == UC_DECOMP_CANONICAL ? 0 : 1) << 15) | offset);
+
+         /* Produce length 3-bytes entries.  */
+         if (length == 0)
+           /* We would need a special representation of zero-length entries.  */
+           abort ();
+         for (i = 0; i < length; i++)
+           {
+             if (offset > 0)
+               fprintf (stream2, ",");
+             if ((offset % 4) == 0)
+               fprintf (stream2, "\n ");
+             if (!(decomposed[i] < (1 << 18)))
+               abort ();
+             fprintf (stream2, " 0x%02X, 0x%02X, 0x%02X",
+                      (((i+1 < length ? (1 << 23) : 0)
+                        | (i == 0 ? (type << 18) : 0)
+                        | decomposed[i]) >> 16) & 0xff,
+                      (decomposed[i] >> 8) & 0xff,
+                      decomposed[i] & 0xff);
+             offset++;
+           }
+       }
+    }
+
+  fprintf (stream2, "\n};\n");
+  fprintf (stream2, "\n");
+
+  decomp_table_finalize (&t);
+
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream1, "#define decomp_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream1, "\n");
+  fprintf (stream1, "typedef struct\n");
+  fprintf (stream1, "  {\n");
+  fprintf (stream1, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream1, "    int level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream1, "    unsigned short level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream1, "  }\n");
+  fprintf (stream1, "decomp_index_table_t;\n");
+  fprintf (stream1, "extern const decomp_index_table_t gl_uninorm_decomp_index_table;\n");
+  fprintf (stream2, "const decomp_index_table_t gl_uninorm_decomp_index_table =\n");
+  fprintf (stream2, "{\n");
+  fprintf (stream2, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream2, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream2, " %5d", -1);
+      else
+       fprintf (stream2, " %5zu",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream2, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " },\n");
+  fprintf (stream2, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream2, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream2, " %5d", -1);
+      else
+       fprintf (stream2, " %5zu",
+                (offset - level3_offset) / sizeof (uint16_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream2, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " },\n");
+  fprintf (stream2, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream2, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      uint16_t value = ((uint16_t *) (t.result + level3_offset))[i];
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream2, "\n   ");
+      fprintf (stream2, " %5d", value == (uint16_t)(-1) ? -1 : value);
+      if (i+1 < t.level3_size << t.p)
+       fprintf (stream2, ",");
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream2, "\n ");
+  fprintf (stream2, " }\n");
+  fprintf (stream2, "};\n");
+}
+
+static void
+output_decomposition_tables (const char *filename1, const char *filename2, const char *version)
+{
+  const char *filenames[2];
+  FILE *streams[2];
+  size_t i;
+
+  filenames[0] = filename1;
+  filenames[1] = filename2;
+
+  for (i = 0; i < 2; i++)
+    {
+      streams[i] = fopen (filenames[i], "w");
+      if (streams[i] == NULL)
+       {
+         fprintf (stderr, "cannot open '%s' for writing\n", filenames[i]);
+         exit (1);
+       }
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      FILE *stream = streams[i];
+
+      fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+      fprintf (stream, "/* Decomposition of Unicode characters.  */\n");
+      fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode %s.  */\n",
+              version);
+      fprintf (stream, "\n");
+    }
+
+  output_decomposition (streams[0], streams[1]);
+
+  for (i = 0; i < 2; i++)
+    {
+      if (ferror (streams[i]) || fclose (streams[i]))
+       {
+         fprintf (stderr, "error writing to '%s'\n", filenames[i]);
+         exit (1);
+       }
+    }
+}
+
+/* The "excluded from composition" property from the CompositionExclusions.txt file.  */
+char unicode_composition_exclusions[0x110000];
+
+static void
+fill_composition_exclusions (const char *compositionexclusions_filename)
+{
+  FILE *stream;
+  unsigned int i;
+
+  stream = fopen (compositionexclusions_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", compositionexclusions_filename);
+      exit (1);
+    }
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_composition_exclusions[i] = 0;
+
+  for (;;)
+    {
+      char buf[200+1];
+      unsigned int i;
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      if (sscanf (buf, "%X", &i) != 1)
+       {
+         fprintf (stderr, "parse error in '%s'\n", compositionexclusions_filename);
+         exit (1);
+       }
+      if (!(i < 0x110000))
+       abort ();
+
+      unicode_composition_exclusions[i] = 1;
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", compositionexclusions_filename);
+      exit (1);
+    }
+}
+
+static void
+debug_output_composition_tables (const char *filename)
+{
+  FILE *stream;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      unsigned int length;
+      unsigned int decomposed[MAX_DECOMP_LENGTH];
+      int type = get_decomposition (ch, &length, decomposed);
+
+      if (type == UC_DECOMP_CANONICAL
+         /* Consider only binary decompositions.
+            Exclude singleton decompositions.  */
+         && length == 2)
+       {
+         unsigned int code1 = decomposed[0];
+         unsigned int code2 = decomposed[1];
+         unsigned int combined = ch;
+
+         /* Exclude decompositions where the first part is not a starter,
+            i.e. is not of canonical combining class 0.  */
+         if (strcmp (unicode_attributes[code1].combining, "0") == 0
+             /* Exclude characters listed in CompositionExclusions.txt.  */
+             && !unicode_composition_exclusions[combined])
+           {
+             /* The combined character must now also be a starter.
+                Verify this.  */
+             if (strcmp (unicode_attributes[combined].combining, "0") != 0)
+               abort ();
+
+             fprintf (stream, "0x%04X\t0x%04X\t0x%04X\t%s\n",
+                      code1,
+                      code2,
+                      combined,
+                      unicode_attributes[code2].combining);
+           }
+       }
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+static void
+output_composition_tables (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Canonical composition of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-uni-tables for Unicode %s.  */\n",
+          version);
+  fprintf (stream, "\n");
+
+  /* Put a GPL header on it.  The gnulib module is under LGPL (although it
+     still carries the GPL header), and it's gnulib-tool which replaces the
+     GPL header with an LGPL header.  */
+  fprintf (stream, "/* Copyright (C) 2009 Free Software Foundation, Inc.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
+  fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
+  fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
+  fprintf (stream, "   (at your option) any later version.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
+  fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+  fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+  fprintf (stream, "   GNU General Public License for more details.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
+  fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
+  fprintf (stream, "\n");
+
+  /* The composition table is a set of mappings (code1, code2) -> combined,
+     with 928 entries,
+     367 values for code1 (from 0x003C to 0x30FD),
+      54 values for code2 (from 0x0300 to 0x309A).
+     For a fixed code1, there are from 1 to 19 possible values for code2.
+     For a fixed code2, there are from 1 to 117 possible values for code1.
+     This is a very sparse matrix.
+
+     We want an O(1) hash lookup.
+
+     We could implement the hash lookup by mapping (code1, code2) to a linear
+     combination  mul1*code1 + mul2*code2, which is then used as an index into
+     a 3-level table.  But this leads to a table of size 37 KB.
+
+     We use gperf to implement the hash lookup, giving it the 928 sets of
+     4 bytes (code1, code2) as input.  gperf generates a hash table of size
+     1527, which is quite good (60% filled).  It requires an auxiliary table
+     lookup in a table of size 0.5 KB.  The total tables size is 11 KB.  */
+
+  fprintf (stream, "struct composition_rule { char codes[4]; };\n");
+  fprintf (stream, "%%struct-type\n");
+  fprintf (stream, "%%language=ANSI-C\n");
+  fprintf (stream, "%%define slot-name codes\n");
+  fprintf (stream, "%%define hash-function-name gl_uninorm_compose_hash\n");
+  fprintf (stream, "%%define lookup-function-name gl_uninorm_compose_lookup\n");
+  fprintf (stream, "%%compare-lengths\n");
+  fprintf (stream, "%%compare-strncmp\n");
+  fprintf (stream, "%%readonly-tables\n");
+  fprintf (stream, "%%omit-struct-type\n");
+  fprintf (stream, "%%%%\n");
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      unsigned int length;
+      unsigned int decomposed[MAX_DECOMP_LENGTH];
+      int type = get_decomposition (ch, &length, decomposed);
+
+      if (type == UC_DECOMP_CANONICAL
+         /* Consider only binary decompositions.
+            Exclude singleton decompositions.  */
+         && length == 2)
+       {
+         unsigned int code1 = decomposed[0];
+         unsigned int code2 = decomposed[1];
+         unsigned int combined = ch;
+
+         /* Exclude decompositions where the first part is not a starter,
+            i.e. is not of canonical combining class 0.  */
+         if (strcmp (unicode_attributes[code1].combining, "0") == 0
+             /* Exclude characters listed in CompositionExclusions.txt.  */
+             && !unicode_composition_exclusions[combined])
+           {
+             /* The combined character must now also be a starter.
+                Verify this.  */
+             if (strcmp (unicode_attributes[combined].combining, "0") != 0)
+               abort ();
+
+             if (!(code1 < 0x10000))
+               abort ();
+             if (!(code2 < 0x10000))
+               abort ();
+             if (!(combined < 0x10000))
+               abort ();
+
+             fprintf (stream, "\"\\x%02x\\x%02x\\x%02x\\x%02x\", 0x%04x\n",
+                      (code1 >> 8) & 0xff, code1 & 0xff,
+                      (code2 >> 8) & 0xff, code2 & 0xff,
+                      combined);
+           }
+       }
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* Output the test for a simple character mapping table to the given file.  */
+
+static void
+output_simple_mapping_test (const char *filename,
+                           const char *function_name,
+                           unsigned int (*func) (unsigned int),
+                           const char *version)
+{
+  FILE *stream;
+  bool need_comma;
+  unsigned int ch;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Test the Unicode character mapping functions.\n");
+  fprintf (stream, "   Copyright (C) 2009 Free Software Foundation, Inc.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is free software: you can redistribute it and/or modify\n");
+  fprintf (stream, "   it under the terms of the GNU General Public License as published by\n");
+  fprintf (stream, "   the Free Software Foundation; either version 3 of the License, or\n");
+  fprintf (stream, "   (at your option) any later version.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   This program is distributed in the hope that it will be useful,\n");
+  fprintf (stream, "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+  fprintf (stream, "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+  fprintf (stream, "   GNU General Public License for more details.\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "   You should have received a copy of the GNU General Public License\n");
+  fprintf (stream, "   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */\n");
+  fprintf (stream, "\n");
+  fprintf (stream, "/* Generated automatically by gen-case.c for Unicode %s.  */\n",
+          version);
+  fprintf (stream, "\n");
+  fprintf (stream, "#include \"test-mapping-part1.h\"\n");
+  fprintf (stream, "\n");
+
+  need_comma = false;
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      unsigned int value = func (ch);
+
+      if (value != ch)
+       {
+         if (need_comma)
+           fprintf (stream, ",\n");
+         fprintf (stream, "    { 0x%04X, 0x%04X }", ch, value);
+         need_comma = true;
+       }
+    }
+  if (need_comma)
+    fprintf (stream, "\n");
+
+  fprintf (stream, "\n");
+  fprintf (stream, "#define MAP(c) %s (c)\n", function_name);
+  fprintf (stream, "#include \"test-mapping-part2.h\"\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Construction of sparse 3-level tables.  */
+#define TABLE mapping_table
+#define ELEMENT int32_t
+#define DEFAULT 0
+#define xmalloc malloc
+#define xrealloc realloc
+#include "3level.h"
+
+/* Output a simple character mapping table to the given file.  */
+
+static void
+output_simple_mapping (const char *filename,
+                      unsigned int (*func) (unsigned int),
+                      const char *version)
+{
+  FILE *stream;
+  unsigned int ch, i;
+  struct mapping_table t;
+  unsigned int level1_offset, level2_offset, level3_offset;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Simple character mapping of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-case.c for Unicode %s.  */\n",
+          version);
+
+  t.p = 7;
+  t.q = 9;
+  mapping_table_init (&t);
+
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      int value = (int) func (ch) - (int) ch;
+
+      mapping_table_add (&t, ch, value);
+    }
+
+  mapping_table_finalize (&t);
+
+  /* Offsets in t.result, in memory of this process.  */
+  level1_offset =
+    5 * sizeof (uint32_t);
+  level2_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t);
+  level3_offset =
+    5 * sizeof (uint32_t)
+    + t.level1_size * sizeof (uint32_t)
+    + (t.level2_size << t.q) * sizeof (uint32_t);
+
+  for (i = 0; i < 5; i++)
+    fprintf (stream, "#define mapping_header_%d %d\n", i,
+            ((uint32_t *) t.result)[i]);
+  fprintf (stream, "static const\n");
+  fprintf (stream, "struct\n");
+  fprintf (stream, "  {\n");
+  fprintf (stream, "    int level1[%zu];\n", t.level1_size);
+  fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
+  fprintf (stream, "    int level3[%zu << %d];\n", t.level3_size, t.p);
+  fprintf (stream, "  }\n");
+  fprintf (stream, "u_mapping =\n");
+  fprintf (stream, "{\n");
+  fprintf (stream, "  {");
+  if (t.level1_size > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level1_size; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level1_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zu",
+                (offset - level2_offset) / sizeof (uint32_t));
+      if (i+1 < t.level1_size)
+       fprintf (stream, ",");
+    }
+  if (t.level1_size > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level2_size << t.q; i++)
+    {
+      uint32_t offset;
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      offset = ((uint32_t *) (t.result + level2_offset))[i];
+      if (offset == 0)
+       fprintf (stream, " %5d", -1);
+      else
+       fprintf (stream, " %5zu",
+                (offset - level3_offset) / sizeof (int32_t));
+      if (i+1 < t.level2_size << t.q)
+       fprintf (stream, ",");
+    }
+  if (t.level2_size << t.q > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " },\n");
+  fprintf (stream, "  {");
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n   ");
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      if (i > 0 && (i % 8) == 0)
+       fprintf (stream, "\n   ");
+      fprintf (stream, " %5d", ((int32_t *) (t.result + level3_offset))[i]);
+      if (i+1 < t.level3_size << t.p)
+       fprintf (stream, ",");
+    }
+  if (t.level3_size << t.p > 8)
+    fprintf (stream, "\n ");
+  fprintf (stream, " }\n");
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+/* A special casing context.
+   A context is negated through x -> -x.  */
+enum
+{
+  SCC_ALWAYS             = 0,
+  SCC_FINAL_SIGMA,
+  SCC_AFTER_SOFT_DOTTED,
+  SCC_MORE_ABOVE,
+  SCC_BEFORE_DOT,
+  SCC_AFTER_I
+};
+
+/* A special casing rule.  */
+struct special_casing_rule
+{
+  unsigned int code;
+  unsigned int lower_mapping[3];
+  unsigned int title_mapping[3];
+  unsigned int upper_mapping[3];
+  unsigned int casefold_mapping[3];
+  const char *language;
+  int context;
+};
+
+/* The special casing rules.  */
+struct special_casing_rule **casing_rules;
+unsigned int num_casing_rules;
+unsigned int allocated_casing_rules;
+
+static void
+add_casing_rule (struct special_casing_rule *new_rule)
+{
+  if (num_casing_rules == allocated_casing_rules)
+    {
+      allocated_casing_rules = 2 * allocated_casing_rules;
+      if (allocated_casing_rules < 16)
+       allocated_casing_rules = 16;
+      casing_rules =
+       (struct special_casing_rule **)
+       realloc (casing_rules, allocated_casing_rules * sizeof (struct special_casing_rule *));
+    }
+  casing_rules[num_casing_rules++] = new_rule;
+}
+
+/* Stores in casing_rules the special casing rules found in
+   specialcasing_filename.  */
+static void
+fill_casing_rules (const char *specialcasing_filename)
+{
+  FILE *stream;
+
+  stream = fopen (specialcasing_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", specialcasing_filename);
+      exit (1);
+    }
+
+  casing_rules = NULL;
+  num_casing_rules = 0;
+  allocated_casing_rules = 0;
+
+  for (;;)
+    {
+      char buf[200+1];
+      char *scanptr;
+      char *endptr;
+      int i;
+
+      unsigned int code;
+      unsigned int lower_mapping[3];
+      unsigned int title_mapping[3];
+      unsigned int upper_mapping[3];
+      char *language;
+      int context;
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      /* Scan code.  */
+      scanptr = buf;
+      code = strtoul (scanptr, &endptr, 16);
+      if (endptr == scanptr)
+       {
+         fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+         exit (1);
+       }
+      scanptr = endptr;
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Scan lower mapping.  */
+      for (i = 0; i < 3; i++)
+       lower_mapping[i] = 0;
+      for (i = 0; i < 3; i++)
+       {
+         while (*scanptr == ' ')
+           scanptr++;
+         if (*scanptr == ';')
+           break;
+         lower_mapping[i] = strtoul (scanptr, &endptr, 16);
+         if (endptr == scanptr)
+           {
+             fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+             exit (1);
+           }
+         scanptr = endptr;
+       }
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Scan title mapping.  */
+      for (i = 0; i < 3; i++)
+       title_mapping[i] = 0;
+      for (i = 0; i < 3; i++)
+       {
+         while (*scanptr == ' ')
+           scanptr++;
+         if (*scanptr == ';')
+           break;
+         title_mapping[i] = strtoul (scanptr, &endptr, 16);
+         if (endptr == scanptr)
+           {
+             fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+             exit (1);
+           }
+         scanptr = endptr;
+       }
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Scan upper mapping.  */
+      for (i = 0; i < 3; i++)
+       upper_mapping[i] = 0;
+      for (i = 0; i < 3; i++)
+       {
+         while (*scanptr == ' ')
+           scanptr++;
+         if (*scanptr == ';')
+           break;
+         upper_mapping[i] = strtoul (scanptr, &endptr, 16);
+         if (endptr == scanptr)
+           {
+             fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+             exit (1);
+           }
+         scanptr = endptr;
+       }
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Scan language and context.  */
+      language = NULL;
+      context = SCC_ALWAYS;
+      while (*scanptr == ' ')
+       scanptr++;
+      if (*scanptr != '\0' && *scanptr != '#')
+       {
+         const char *word_begin = scanptr;
+         const char *word_end;
+
+         while (*scanptr != '\0' && *scanptr != '#' && *scanptr != ';' && *scanptr != ' ')
+           scanptr++;
+         word_end = scanptr;
+
+         while (*scanptr == ' ')
+           scanptr++;
+
+         if (word_end - word_begin == 2)
+           {
+             language = (char *) malloc ((word_end - word_begin) + 1);
+             memcpy (language, word_begin, 2);
+             language[word_end - word_begin] = '\0';
+             word_begin = word_end = NULL;
+
+             if (*scanptr != '\0' && *scanptr != '#' &&  *scanptr != ';')
+               {
+                 word_begin = scanptr;
+                 while (*scanptr != '\0' && *scanptr != '#' && *scanptr != ';' && *scanptr != ' ')
+                   scanptr++;
+                 word_end = scanptr;
+               }
+           }
+
+         if (word_end > word_begin)
+           {
+             bool negate = false;
+
+             if (word_end - word_begin >= 4 && memcmp (word_begin, "Not_", 4) == 0)
+               {
+                 word_begin += 4;
+                 negate = true;
+               }
+             if (word_end - word_begin == 11 && memcmp (word_begin, "Final_Sigma", 11) == 0)
+               context = SCC_FINAL_SIGMA;
+             else if (word_end - word_begin == 17 && memcmp (word_begin, "After_Soft_Dotted", 17) == 0)
+               context = SCC_AFTER_SOFT_DOTTED;
+             else if (word_end - word_begin == 10 && memcmp (word_begin, "More_Above", 10) == 0)
+               context = SCC_MORE_ABOVE;
+             else if (word_end - word_begin == 10 && memcmp (word_begin, "Before_Dot", 10) == 0)
+               context = SCC_BEFORE_DOT;
+             else if (word_end - word_begin == 7 && memcmp (word_begin, "After_I", 7) == 0)
+               context = SCC_AFTER_I;
+             else
+               {
+                 fprintf (stderr, "unknown context type in '%s'\n", specialcasing_filename);
+                 exit (1);
+               }
+             if (negate)
+               context = - context;
+           }
+
+         if (*scanptr != '\0' && *scanptr != '#' &&  *scanptr != ';')
+           {
+             fprintf (stderr, "parse error in '%s'\n", specialcasing_filename);
+             exit (1);
+           }
+       }
+
+      /* Store the rule.  */
+      {
+       struct special_casing_rule *new_rule =
+         (struct special_casing_rule *) malloc (sizeof (struct special_casing_rule));
+       new_rule->code = code;
+       new_rule->language = language;
+       new_rule->context = context;
+       memcpy (new_rule->lower_mapping, lower_mapping, sizeof (new_rule->lower_mapping));
+       memcpy (new_rule->title_mapping, title_mapping, sizeof (new_rule->title_mapping));
+       memcpy (new_rule->upper_mapping, upper_mapping, sizeof (new_rule->upper_mapping));
+
+       add_casing_rule (new_rule);
+      }
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", specialcasing_filename);
+      exit (1);
+    }
+}
+
+/* A casefolding rule.  */
+struct casefold_rule
+{
+  unsigned int code;
+  unsigned int mapping[3];
+  const char *language;
+};
+
+/* The casefolding rules.  */
+struct casefold_rule **casefolding_rules;
+unsigned int num_casefolding_rules;
+unsigned int allocated_casefolding_rules;
+
+/* Stores in casefolding_rules the case folding rules found in
+   casefolding_filename.  */
+static void
+fill_casefolding_rules (const char *casefolding_filename)
+{
+  FILE *stream;
+
+  stream = fopen (casefolding_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", casefolding_filename);
+      exit (1);
+    }
+
+  casefolding_rules = NULL;
+  num_casefolding_rules = 0;
+  allocated_casefolding_rules = 0;
+
+  for (;;)
+    {
+      char buf[200+1];
+      char *scanptr;
+      char *endptr;
+      int i;
+
+      unsigned int code;
+      char type;
+      unsigned int mapping[3];
+
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
+       break;
+
+      if (buf[0] == '\0' || buf[0] == '#')
+       continue;
+
+      /* Scan code.  */
+      scanptr = buf;
+      code = strtoul (scanptr, &endptr, 16);
+      if (endptr == scanptr)
+       {
+         fprintf (stderr, "parse error in '%s'\n", casefolding_filename);
+         exit (1);
+       }
+      scanptr = endptr;
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", casefolding_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Scan type.  */
+      while (*scanptr == ' ')
+       scanptr++;
+
+      switch (*scanptr)
+       {
+       case 'C': case 'F': case 'S': case 'T':
+         type = *scanptr;
+         break;
+       default:
+         fprintf (stderr, "parse error in '%s'\n", casefolding_filename);
+         exit (1);
+       }
+      scanptr++;
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", casefolding_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Scan casefold mapping.  */
+      for (i = 0; i < 3; i++)
+       mapping[i] = 0;
+      for (i = 0; i < 3; i++)
+       {
+         while (*scanptr == ' ')
+           scanptr++;
+         if (*scanptr == ';')
+           break;
+         mapping[i] = strtoul (scanptr, &endptr, 16);
+         if (endptr == scanptr)
+           {
+             fprintf (stderr, "parse error in '%s'\n", casefolding_filename);
+             exit (1);
+           }
+         scanptr = endptr;
+       }
+      if (*scanptr != ';')
+       {
+         fprintf (stderr, "parse error in '%s'\n", casefolding_filename);
+         exit (1);
+       }
+      scanptr++;
+
+      /* Ignore rules of type 'S'; we use the rules of type 'F' instead.  */
+      if (type != 'S')
+       {
+         const char * const *languages;
+         unsigned int languages_count;
+
+         /* Type 'T' indicates that the rule is applicable to Turkish
+            languages only.  */
+         if (type == 'T')
+           {
+             static const char * const turkish_languages[] = { "tr", "az" };
+             languages = turkish_languages;
+             languages_count = 2;
+           }
+         else
+           {
+             static const char * const all_languages[] = { NULL };
+             languages = all_languages;
+             languages_count = 1;
+           }
+
+         for (i = 0; i < languages_count; i++)
+           {
+             /* Store a new rule.  */
+             struct casefold_rule *new_rule =
+               (struct casefold_rule *) malloc (sizeof (struct casefold_rule));
+             new_rule->code = code;
+             memcpy (new_rule->mapping, mapping, sizeof (new_rule->mapping));
+             new_rule->language = languages[i];
+
+             if (num_casefolding_rules == allocated_casefolding_rules)
+               {
+                 allocated_casefolding_rules = 2 * allocated_casefolding_rules;
+                 if (allocated_casefolding_rules < 16)
+                   allocated_casefolding_rules = 16;
+                 casefolding_rules =
+                   (struct casefold_rule **)
+                   realloc (casefolding_rules,
+                            allocated_casefolding_rules * sizeof (struct casefold_rule *));
+               }
+             casefolding_rules[num_casefolding_rules++] = new_rule;
+           }
+       }
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", casefolding_filename);
+      exit (1);
+    }
+}
+
+/* Casefold mapping, when it maps to a single character.  */
+unsigned int unicode_casefold[0x110000];
+
+static unsigned int
+to_casefold (unsigned int ch)
+{
+  return unicode_casefold[ch];
+}
+
+/* Redistribute the casefolding_rules:
+   - Rules that map to a single character, language independently, are stored
+     in unicode_casefold.
+   - Other rules are merged into casing_rules.  */
+static void
+redistribute_casefolding_rules (void)
+{
+  unsigned int ch, i, j;
+
+  /* Fill unicode_casefold[].  */
+  for (ch = 0; ch < 0x110000; ch++)
+    unicode_casefold[ch] = ch;
+  for (i = 0; i < num_casefolding_rules; i++)
+    {
+      struct casefold_rule *cfrule = casefolding_rules[i];
+
+      if (cfrule->language == NULL && cfrule->mapping[1] == 0)
+       {
+         ch = cfrule->code;
+         if (!(ch < 0x110000))
+           abort ();
+         unicode_casefold[ch] = cfrule->mapping[0];
+       }
+    }
+
+  /* Extend the special casing rules by filling in their casefold_mapping[]
+     field.  */
+  for (j = 0; j < num_casing_rules; j++)
+    {
+      struct special_casing_rule *rule = casing_rules[j];
+      unsigned int k;
+
+      rule->casefold_mapping[0] = to_casefold (rule->code);
+      for (k = 1; k < 3; k++)
+       rule->casefold_mapping[k] = 0;
+    }
+
+  /* Now merge the other casefolding rules into casing_rules.  */
+  for (i = 0; i < num_casefolding_rules; i++)
+    {
+      struct casefold_rule *cfrule = casefolding_rules[i];
+
+      if (!(cfrule->language == NULL && cfrule->mapping[1] == 0))
+       {
+         /* Find a rule that applies to the same code, same language, and it
+            has context SCC_ALWAYS.  At the same time, update all rules that
+            have the same code and same or more specific language.  */
+         struct special_casing_rule *found_rule = NULL;
+
+         for (j = 0; j < num_casing_rules; j++)
+           {
+             struct special_casing_rule *rule = casing_rules[j];
+
+             if (rule->code == cfrule->code
+                 && (cfrule->language == NULL
+                     || (rule->language != NULL
+                         && strcmp (rule->language, cfrule->language) == 0)))
+               {
+                 memcpy (rule->casefold_mapping, cfrule->mapping,
+                         sizeof (rule->casefold_mapping));
+
+                 if ((cfrule->language == NULL
+                      ? rule->language == NULL
+                      : rule->language != NULL
+                        && strcmp (rule->language, cfrule->language) == 0)
+                     && rule->context == SCC_ALWAYS)
+                   {
+                     /* Found it.  */
+                     found_rule = rule;
+                   }
+               }
+           }
+
+         if (found_rule == NULL)
+           {
+             /* Create a new rule.  */
+             struct special_casing_rule *new_rule =
+               (struct special_casing_rule *) malloc (sizeof (struct special_casing_rule));
+
+             /* Try to find a rule that applies to the same code, no language
+                restriction, and with context SCC_ALWAYS.  */
+             for (j = 0; j < num_casing_rules; j++)
+               {
+                 struct special_casing_rule *rule = casing_rules[j];
+
+                 if (rule->code == cfrule->code
+                     && rule->context == SCC_ALWAYS
+                     && rule->language == NULL)
+                   {
+                     /* Found it.  */
+                     found_rule = rule;
+                     break;
+                   }
+               }
+
+             new_rule->code = cfrule->code;
+             new_rule->language = cfrule->language;
+             new_rule->context = SCC_ALWAYS;
+             if (found_rule != NULL)
+               {
+                 memcpy (new_rule->lower_mapping, found_rule->lower_mapping,
+                         sizeof (new_rule->lower_mapping));
+                 memcpy (new_rule->title_mapping, found_rule->title_mapping,
+                         sizeof (new_rule->title_mapping));
+                 memcpy (new_rule->upper_mapping, found_rule->upper_mapping,
+                         sizeof (new_rule->upper_mapping));
+               }
+             else
+               {
+                 unsigned int k;
+
+                 new_rule->lower_mapping[0] = to_lower (cfrule->code);
+                 for (k = 1; k < 3; k++)
+                   new_rule->lower_mapping[k] = 0;
+                 new_rule->title_mapping[0] = to_title (cfrule->code);
+                 for (k = 1; k < 3; k++)
+                   new_rule->title_mapping[k] = 0;
+                 new_rule->upper_mapping[0] = to_upper (cfrule->code);
+                 for (k = 1; k < 3; k++)
+                   new_rule->upper_mapping[k] = 0;
+               }
+             memcpy (new_rule->casefold_mapping, cfrule->mapping,
+                     sizeof (new_rule->casefold_mapping));
+
+             add_casing_rule (new_rule);
+           }
+       }
+    }
+}
+
+static int
+compare_casing_rules (const void *a, const void *b)
+{
+  struct special_casing_rule *a_rule = *(struct special_casing_rule **) a;
+  struct special_casing_rule *b_rule = *(struct special_casing_rule **) b;
+  unsigned int a_code = a_rule->code;
+  unsigned int b_code = b_rule->code;
+
+  if (a_code < b_code)
+    return -1;
+  if (a_code > b_code)
+    return 1;
+
+  /* Sort the more specific rules before the more general ones.  */
+  return (- ((a_rule->language != NULL ? 1 : 0) + (a_rule->context != SCC_ALWAYS ? 1 : 0))
+         + ((b_rule->language != NULL ? 1 : 0) + (b_rule->context != SCC_ALWAYS ? 1 : 0)));
+}
+
+static void
+sort_casing_rules (void)
+{
+  /* Sort the rules 1. by code, 2. by specificity.  */
+  if (num_casing_rules > 1)
+    qsort (casing_rules, num_casing_rules, sizeof (struct special_casing_rule *),
+          compare_casing_rules);
+}
+
+/* Output the special casing rules.  */
+static void
+output_casing_rules (const char *filename, const char *version)
+{
+  FILE *stream;
+  unsigned int i, j;
+  unsigned int minor;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Special casing rules of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode %s.  */\n",
+          version);
+  fprintf (stream, "struct special_casing_rule { char code[3]; };\n");
+  fprintf (stream, "%%struct-type\n");
+  fprintf (stream, "%%language=ANSI-C\n");
+  fprintf (stream, "%%define slot-name code\n");
+  fprintf (stream, "%%define hash-function-name gl_unicase_special_hash\n");
+  fprintf (stream, "%%define lookup-function-name gl_unicase_special_lookup\n");
+  fprintf (stream, "%%compare-lengths\n");
+  fprintf (stream, "%%compare-strncmp\n");
+  fprintf (stream, "%%readonly-tables\n");
+  fprintf (stream, "%%omit-struct-type\n");
+  fprintf (stream, "%%%%\n");
+
+  minor = 0;
+  for (i = 0; i < num_casing_rules; i++)
+    {
+      struct special_casing_rule *rule = casing_rules[i];
+      int context;
+
+      if (i > 0 && rule->code == casing_rules[i - 1]->code)
+       minor += 1;
+      else
+       minor = 0;
+
+      if (!(rule->code < 0x10000))
+       {
+         fprintf (stderr, "special rule #%u: code %u out of range\n", i, rule->code);
+         exit (1);
+       }
+
+      fprintf (stream, "\"\\x%02x\\x%02x\\x%02x\", ",
+              (rule->code >> 8) & 0xff, rule->code & 0xff, minor);
+
+      fprintf (stream, "%d, ",
+              i + 1 < num_casing_rules && casing_rules[i + 1]->code == rule->code ? 1 : 0);
+
+      context = rule->context;
+      if (context < 0)
+       {
+         fprintf (stream, "-");
+         context = - context;
+       }
+      else
+       fprintf (stream, " ");
+      switch (context)
+       {
+       case SCC_ALWAYS:
+         fprintf (stream, "SCC_ALWAYS           ");
+         break;
+       case SCC_FINAL_SIGMA:
+         fprintf (stream, "SCC_FINAL_SIGMA      ");
+         break;
+       case SCC_AFTER_SOFT_DOTTED:
+         fprintf (stream, "SCC_AFTER_SOFT_DOTTED");
+         break;
+       case SCC_MORE_ABOVE:
+         fprintf (stream, "SCC_MORE_ABOVE       ");
+         break;
+       case SCC_BEFORE_DOT:
+         fprintf (stream, "SCC_BEFORE_DOT       ");
+         break;
+       case SCC_AFTER_I:
+         fprintf (stream, "SCC_AFTER_I          ");
+         break;
+       default:
+         abort ();
+       }
+      fprintf (stream, ", ");
+
+      if (rule->language != NULL)
+       {
+         if (strlen (rule->language) != 2)
+           abort ();
+         fprintf (stream, "{  '%c',  '%c' }, ", rule->language[0], rule->language[1]);
+       }
+      else
+       fprintf (stream, "{ '\\0', '\\0' }, ");
+
+      fprintf (stream, "{ ");
+      for (j = 0; j < 3; j++)
+       {
+         if (j > 0)
+           fprintf (stream, ", ");
+         if (!(rule->upper_mapping[j] < 0x10000))
+           {
+             fprintf (stderr, "special rule #%u: upper mapping of code %u out of range\n", i, rule->code);
+             exit (1);
+           }
+         if (rule->upper_mapping[j] != 0)
+           fprintf (stream, "0x%04X", rule->upper_mapping[j]);
+         else
+           fprintf (stream, "     0");
+       }
+      fprintf (stream, " }, { ");
+      for (j = 0; j < 3; j++)
+       {
+         if (j > 0)
+           fprintf (stream, ", ");
+         if (!(rule->lower_mapping[j] < 0x10000))
+           {
+             fprintf (stderr, "special rule #%u: lower mapping of code %u out of range\n", i, rule->code);
+             exit (1);
+           }
+         if (rule->lower_mapping[j] != 0)
+           fprintf (stream, "0x%04X", rule->lower_mapping[j]);
+         else
+           fprintf (stream, "     0");
+       }
+      fprintf (stream, " }, { ");
+      for (j = 0; j < 3; j++)
+       {
+         if (j > 0)
+           fprintf (stream, ", ");
+         if (!(rule->title_mapping[j] < 0x10000))
+           {
+             fprintf (stderr, "special rule #%u: title mapping of code %u out of range\n", i, rule->code);
+             exit (1);
+           }
+         if (rule->title_mapping[j] != 0)
+           fprintf (stream, "0x%04X", rule->title_mapping[j]);
+         else
+           fprintf (stream, "     0");
+       }
+      fprintf (stream, " }, { ");
+      for (j = 0; j < 3; j++)
+       {
+         if (j > 0)
+           fprintf (stream, ", ");
+         if (!(rule->casefold_mapping[j] < 0x10000))
+           {
+             fprintf (stderr, "special rule #%u: casefold mapping of code %u out of range\n", i, rule->code);
+             exit (1);
+           }
+         if (rule->casefold_mapping[j] != 0)
+           fprintf (stream, "0x%04X", rule->casefold_mapping[j]);
+         else
+           fprintf (stream, "     0");
+       }
+      fprintf (stream, " }\n");
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
+int
+main (int argc, char * argv[])
+{
+  const char *unicodedata_filename;
+  const char *proplist_filename;
+  const char *derivedproplist_filename;
+  const char *scripts_filename;
+  const char *blocks_filename;
+  const char *proplist30_filename;
+  const char *eastasianwidth_filename;
+  const char *linebreak_filename;
+  const char *wordbreakproperty_filename;
+  const char *compositionexclusions_filename;
+  const char *specialcasing_filename;
+  const char *casefolding_filename;
+  const char *version;
+
+  if (argc != 14)
+    {
+      fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt EastAsianWidth.txt LineBreak.txt WordBreakProperty.txt CompositionExclusions.txt SpecialCasing.txt CaseFolding.txt version\n",
+              argv[0]);
+      exit (1);
+    }
+
+  unicodedata_filename = argv[1];
+  proplist_filename = argv[2];
+  derivedproplist_filename = argv[3];
+  scripts_filename = argv[4];
+  blocks_filename = argv[5];
+  proplist30_filename = argv[6];
+  eastasianwidth_filename = argv[7];
+  linebreak_filename = argv[8];
+  wordbreakproperty_filename = argv[9];
+  compositionexclusions_filename = argv[10];
+  specialcasing_filename = argv[11];
+  casefolding_filename = argv[12];
+  version = argv[13];
+
+  fill_attributes (unicodedata_filename);
+  clear_properties ();
+  fill_properties (proplist_filename);
+  fill_properties (derivedproplist_filename);
+  fill_properties30 (proplist30_filename);
+  fill_scripts (scripts_filename);
+  fill_blocks (blocks_filename);
+  fill_width (eastasianwidth_filename);
+  fill_org_lbp (linebreak_filename);
+  fill_org_wbp (wordbreakproperty_filename);
+  fill_composition_exclusions (compositionexclusions_filename);
+  fill_casing_rules (specialcasing_filename);
+  fill_casefolding_rules (casefolding_filename);
+  redistribute_casefolding_rules ();
+  sort_casing_rules ();
+
+  output_categories (version);
+  output_category ("unictype/categ_of.h", version);
+  output_combclass ("unictype/combining.h", version);
+  output_bidi_category ("unictype/bidi_of.h", version);
+  output_decimal_digit_test ("../tests/unictype/test-decdigit.h", version);
+  output_decimal_digit ("unictype/decdigit.h", version);
+  output_digit_test ("../tests/unictype/test-digit.h", version);
+  output_digit ("unictype/digit.h", version);
+  output_numeric_test ("../tests/unictype/test-numeric.h", version);
+  output_numeric ("unictype/numeric.h", version);
+  output_mirror ("unictype/mirror.h", version);
+  output_properties (version);
+  output_scripts (version);
+  output_scripts_byname (version);
+  output_blocks (version);
+  output_ident_properties (version);
+  output_old_ctype (version);
+
+  debug_output_lbrk_tables ("unilbrk/lbrkprop.txt");
+  debug_output_org_lbrk_tables ("unilbrk/lbrkprop_org.txt");
+  output_lbrk_tables ("unilbrk/lbrkprop1.h", "unilbrk/lbrkprop2.h", version);
+
+  debug_output_wbrk_tables ("uniwbrk/wbrkprop.txt");
+  debug_output_org_wbrk_tables ("uniwbrk/wbrkprop_org.txt");
+  output_wbrk_tables ("uniwbrk/wbrkprop.h", version);
+
+  output_decomposition_tables ("uninorm/decomposition-table1.h", "uninorm/decomposition-table2.h", version);
+  debug_output_composition_tables ("uninorm/composition.txt");
+  output_composition_tables ("uninorm/composition-table.gperf", version);
+
+  output_simple_mapping_test ("../tests/unicase/test-uc_toupper.c", "uc_toupper", to_upper, version);
+  output_simple_mapping_test ("../tests/unicase/test-uc_tolower.c", "uc_tolower", to_lower, version);
+  output_simple_mapping_test ("../tests/unicase/test-uc_totitle.c", "uc_totitle", to_title, version);
+  output_simple_mapping ("unicase/toupper.h", to_upper, version);
+  output_simple_mapping ("unicase/tolower.h", to_lower, version);
+  output_simple_mapping ("unicase/totitle.h", to_title, version);
+  output_simple_mapping ("unicase/tocasefold.h", to_casefold, version);
+  output_casing_rules ("unicase/special-casing-table.gperf", version);
  
    return 0;
  }
@@ -6351,15 +8312,19 @@ main (int argc, char * argv[])
   * compile-command: "
     gcc -O -Wall gen-uni-tables.c -Iunictype -o gen-uni-tables && \
     ./gen-uni-tables \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/PropList.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/DerivedCoreProperties.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Scripts.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Blocks.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/UnicodeData.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/PropList.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/DerivedCoreProperties.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/Scripts.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/Blocks.txt \
          /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/3.0.1/PropList-3.0.1.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/EastAsianWidth.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/LineBreak.txt \
-        5.0.0
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/EastAsianWidth.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/LineBreak.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/auxiliary/WordBreakProperty.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/CompositionExclusions.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/SpecialCasing.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/CaseFolding.txt \
+        5.1.0
     "
   * End:
   */