maint: update copyright
[gnulib.git] / lib / uniname / uniname.c
index cf96c67..8f4b32d 100644 (file)
@@ -1,20 +1,18 @@
 /* Association between Unicode characters and their names.
-   Copyright (C) 2000-2002, 2005-2007 Free Software Foundation, Inc.
+   Copyright (C) 2000-2002, 2005-2007, 2009-2014 Free Software Foundation, Inc.
 
-   This program is free software; you can redistribute it and/or modify it
-   under the terms of the GNU Library General Public License as published
-   by the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
 
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU Library General Public
-   License along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
-   USA.  */
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include <config.h>
 
 
 /* Table of Unicode character names, derived from UnicodeData.txt.
    This table is generated in a way to minimize the memory footprint:
-     1. its compiled size is small (less than 300 KB),
+     1. its compiled size is small (less than 350 KB),
      2. it resides entirely in the text or read-only data segment of the
         executable or shared library: the table contains only immediate
         integers, no pointers, and the functions don't do heap allocation.
  */
 #include "uninames.h"
 /* It contains:
-  static const char unicode_name_words[34594] = ...;
-  #define UNICODE_CHARNAME_NUM_WORDS 5906
+  static const char unicode_name_words[36303] = ...;
+  #define UNICODE_CHARNAME_NUM_WORDS 6260
   static const struct { uint16_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[26] = ...;
-  #define UNICODE_CHARNAME_WORD_HANGUL 3624
-  #define UNICODE_CHARNAME_WORD_SYLLABLE 4654
-  #define UNICODE_CHARNAME_WORD_CJK 401
-  #define UNICODE_CHARNAME_WORD_COMPATIBILITY 5755
-  static const uint16_t unicode_names[62620] = ...;
-  static const struct { uint16_t code; uint16_t name; } unicode_name_to_code[15257] = ...;
-  static const struct { uint16_t code; uint16_t name; } unicode_code_to_name[15257] = ...;
+  #define UNICODE_CHARNAME_WORD_HANGUL 3902
+  #define UNICODE_CHARNAME_WORD_SYLLABLE 4978
+  #define UNICODE_CHARNAME_WORD_CJK 417
+  #define UNICODE_CHARNAME_WORD_COMPATIBILITY 6107
+  static const uint16_t unicode_names[68940] = ...;
+  static const struct { uint16_t code; uint32_t name:24; } unicode_name_to_code[16626] = ...;
+  static const struct { uint16_t code; uint32_t name:24; } unicode_code_to_name[16626] = ...;
   #define UNICODE_CHARNAME_MAX_LENGTH 83
   #define UNICODE_CHARNAME_MAX_WORDS 13
 */
@@ -75,16 +73,16 @@ unicode_name_word (unsigned int index, unsigned int *lengthp)
     {
       unsigned int i = (i1 + i2) >> 1;
       if (unicode_name_by_length[i].ind_offset <= index)
-       i1 = i;
+        i1 = i;
       else
-       i2 = i;
+        i2 = i;
     }
   i = i1;
   assert (unicode_name_by_length[i].ind_offset <= index
-         && index < unicode_name_by_length[i+1].ind_offset);
+          && index < unicode_name_by_length[i+1].ind_offset);
   *lengthp = i;
   return &unicode_name_words[unicode_name_by_length[i].extra_offset
-                            + (index-unicode_name_by_length[i].ind_offset)*i];
+                             + (index-unicode_name_by_length[i].ind_offset)*i];
 }
 
 /* Looks up the index of a word.  */
@@ -99,32 +97,32 @@ unicode_name_word_lookup (const char *word, unsigned int length)
       unsigned int i1 = i0;
       unsigned int i2 = unicode_name_by_length[length+1].ind_offset;
       while (i2 - i1 > 0)
-       {
-         unsigned int i = (i1 + i2) >> 1;
-         const char *p = &unicode_name_words[extra_offset + (i-i0)*length];
-         const char *w = word;
-         unsigned int n = length;
-         for (;;)
-           {
-             if (*p < *w)
-               {
-                 if (i1 == i)
-                   return -1;
-                 /* Note here: i1 < i < i2.  */
-                 i1 = i;
-                 break;
-               }
-             if (*p > *w)
-               {
-                 /* Note here: i1 <= i < i2.  */
-                 i2 = i;
-                 break;
-               }
-             p++; w++; n--;
-             if (n == 0)
-               return i;
-           }
-       }
+        {
+          unsigned int i = (i1 + i2) >> 1;
+          const char *p = &unicode_name_words[extra_offset + (i-i0)*length];
+          const char *w = word;
+          unsigned int n = length;
+          for (;;)
+            {
+              if (*p < *w)
+                {
+                  if (i1 == i)
+                    return -1;
+                  /* Note here: i1 < i < i2.  */
+                  i1 = i;
+                  break;
+                }
+              if (*p > *w)
+                {
+                  /* Note here: i1 <= i < i2.  */
+                  i2 = i;
+                  break;
+                }
+              p++; w++; n--;
+              if (n == 0)
+                return i;
+            }
+        }
     }
   return -1;
 }
@@ -173,21 +171,21 @@ unicode_character_name (ucs4_t c, char *buf)
 
       q = jamo_initial_short_name[index1];
       while (*q != '\0')
-       *ptr++ = *q++;
+        *ptr++ = *q++;
       q = jamo_medial_short_name[index2];
       while (*q != '\0')
-       *ptr++ = *q++;
+        *ptr++ = *q++;
       q = jamo_final_short_name[index3];
       while (*q != '\0')
-       *ptr++ = *q++;
+        *ptr++ = *q++;
       *ptr = '\0';
       return buf;
     }
   else if ((c >= 0xF900 && c <= 0xFA2D) || (c >= 0xFA30 && c <= 0xFA6A)
-          || (c >= 0xFA70 && c <= 0xFAD9) || (c >= 0x2F800 && c <= 0x2FA1D))
+           || (c >= 0xFA70 && c <= 0xFAD9) || (c >= 0x2F800 && c <= 0x2FA1D))
     {
       /* Special case for CJK compatibility ideographs. Keeps the tables
-        small.  */
+         small.  */
       char *ptr;
       int i;
 
@@ -196,10 +194,10 @@ unicode_character_name (ucs4_t c, char *buf)
       ptr = buf + 28;
 
       for (i = (c < 0x10000 ? 12 : 16); i >= 0; i -= 4)
-       {
-         unsigned int x = (c >> i) & 0xf;
-         *ptr++ = (x < 10 ? '0' : 'A' - 10) + x;
-       }
+        {
+          unsigned int x = (c >> i) & 0xf;
+          *ptr++ = (x < 10 ? '0' : 'A' - 10) + x;
+        }
       *ptr = '\0';
       return buf;
     }
@@ -209,85 +207,91 @@ unicode_character_name (ucs4_t c, char *buf)
 
       /* Transform the code so that it fits in 16 bits.  */
       switch (c >> 12)
-       {
-       case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
-         break;
-       case 0x0A:
-         c -= 0x05000;
-         break;
-       case 0x0F:
-         c -= 0x09000;
-         break;
-       case 0x10:
-         c -= 0x09000;
-         break;
-       case 0x1D:
-         c -= 0x15000;
-         break;
-       case 0x2F:
-         c -= 0x26000;
-         break;
-       case 0xE0:
-         c -= 0xD6000;
-         break;
-       default:
-         return NULL;
-       }
+        {
+        case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
+          break;
+        case 0x0A:
+          c -= 0x05000;
+          break;
+        case 0x0F:
+          c -= 0x09000;
+          break;
+        case 0x10:
+          c -= 0x09000;
+          break;
+        case 0x12:
+          c -= 0x0A000;
+          break;
+        case 0x1D:
+          c -= 0x14000;
+          break;
+        case 0x1F:
+          c -= 0x15000;
+          break;
+        case 0x2F:
+          c -= 0x24000;
+          break;
+        case 0xE0:
+          c -= 0xD4000;
+          break;
+        default:
+          return NULL;
+        }
 
       {
-       /* Binary search in unicode_code_to_name.  */
-       unsigned int i1 = 0;
-       unsigned int i2 = SIZEOF (unicode_code_to_name);
-       for (;;)
-         {
-           unsigned int i = (i1 + i2) >> 1;
-           if (unicode_code_to_name[i].code == c)
-             {
-               words = &unicode_names[unicode_code_to_name[i].name];
-               break;
-             }
-           else if (unicode_code_to_name[i].code < c)
-             {
-               if (i1 == i)
-                 {
-                   words = NULL;
-                   break;
-                 }
-               /* Note here: i1 < i < i2.  */
-               i1 = i;
-             }
-           else if (unicode_code_to_name[i].code > c)
-             {
-               if (i2 == i)
-                 {
-                   words = NULL;
-                   break;
-                 }
-               /* Note here: i1 <= i < i2.  */
-               i2 = i;
-             }
-         }
+        /* Binary search in unicode_code_to_name.  */
+        unsigned int i1 = 0;
+        unsigned int i2 = SIZEOF (unicode_code_to_name);
+        for (;;)
+          {
+            unsigned int i = (i1 + i2) >> 1;
+            if (unicode_code_to_name[i].code == c)
+              {
+                words = &unicode_names[unicode_code_to_name[i].name];
+                break;
+              }
+            else if (unicode_code_to_name[i].code < c)
+              {
+                if (i1 == i)
+                  {
+                    words = NULL;
+                    break;
+                  }
+                /* Note here: i1 < i < i2.  */
+                i1 = i;
+              }
+            else if (unicode_code_to_name[i].code > c)
+              {
+                if (i2 == i)
+                  {
+                    words = NULL;
+                    break;
+                  }
+                /* Note here: i1 <= i < i2.  */
+                i2 = i;
+              }
+          }
       }
       if (words != NULL)
-       {
-         /* Found it in unicode_code_to_name. Now concatenate the words.  */
-         /* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes.  */
-         char *ptr = buf;
-         for (;;)
-           {
-             unsigned int wordlen;
-             const char *word = unicode_name_word (*words>>1, &wordlen);
-             do
-               *ptr++ = *word++;
-             while (--wordlen > 0);
-             if ((*words & 1) == 0)
-               break;
-             *ptr++ = ' ';
-             words++;
-           }
-         *ptr = '\0';
-         return buf;
-       }
+        {
+          /* Found it in unicode_code_to_name. Now concatenate the words.  */
+          /* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes.  */
+          char *ptr = buf;
+          for (;;)
+            {
+              unsigned int wordlen;
+              const char *word = unicode_name_word (*words>>1, &wordlen);
+              do
+                *ptr++ = *word++;
+              while (--wordlen > 0);
+              if ((*words & 1) == 0)
+                break;
+              *ptr++ = ' ';
+              words++;
+            }
+          *ptr = '\0';
+          return buf;
+        }
       return NULL;
     }
 }
@@ -304,209 +308,209 @@ unicode_name_character (const char *name)
       char buf[UNICODE_CHARNAME_MAX_LENGTH];
       char *ptr = buf;
       for (;;)
-       {
-         char c = *name++;
-         if (!(c >= ' ' && c <= '~'))
-           break;
-         *ptr++ = (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
-         if (--len == 0)
-           goto filled_buf;
-       }
+        {
+          char c = *name++;
+          if (!(c >= ' ' && c <= '~'))
+            break;
+          *ptr++ = (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
+          if (--len == 0)
+            goto filled_buf;
+        }
       if (false)
       filled_buf:
-       {
-         /* Convert the constituents to uint16_t words.  */
-         uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
-         uint16_t *wordptr = words;
-         {
-           const char *p1 = buf;
-           for (;;)
-             {
-               {
-                 int word;
-                 const char *p2 = p1;
-                 while (p2 < ptr && *p2 != ' ')
-                   p2++;
-                 word = unicode_name_word_lookup (p1, p2 - p1);
-                 if (word < 0)
-                   break;
-                 if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
-                   break;
-                 *wordptr++ = word;
-                 if (p2 == ptr)
-                   goto filled_words;
-                 p1 = p2 + 1;
-               }
-               /* Special case for Hangul syllables. Keeps the tables small. */
-               if (wordptr == &words[2]
-                   && words[0] == UNICODE_CHARNAME_WORD_HANGUL
-                   && words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
-                 {
-                   /* Split the last word [p1..ptr) into three parts:
-                        1) [BCDGHJKMNPRST]
-                        2) [AEIOUWY]
-                        3) [BCDGHIJKLMNPST]
-                    */
-                   const char *p2;
-                   const char *p3;
-                   const char *p4;
-
-                   p2 = p1;
-                   while (p2 < ptr
-                          && (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
-                              || *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
-                              || *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
-                              || *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
-                              || *p2 == 'T'))
-                     p2++;
-                   p3 = p2;
-                   while (p3 < ptr
-                          && (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
-                              || *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
-                              || *p3 == 'Y'))
-                     p3++;
-                   p4 = p3;
-                   while (p4 < ptr
-                          && (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
-                              || *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
-                              || *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
-                              || *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
-                              || *p4 == 'S' || *p4 == 'T'))
-                     p4++;
-                   if (p4 == ptr)
-                     {
-                       unsigned int n1 = p2 - p1;
-                       unsigned int n2 = p3 - p2;
-                       unsigned int n3 = p4 - p3;
-
-                       if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
-                         {
-                           unsigned int index1;
-
-                           for (index1 = 0; index1 < 19; index1++)
-                             if (memcmp(jamo_initial_short_name[index1], p1, n1) == 0
-                                 && jamo_initial_short_name[index1][n1] == '\0')
-                               {
-                                 unsigned int index2;
-
-                                 for (index2 = 0; index2 < 21; index2++)
-                                   if (memcmp(jamo_medial_short_name[index2], p2, n2) == 0
-                                       && jamo_medial_short_name[index2][n2] == '\0')
-                                     {
-                                       unsigned int index3;
-
-                                       for (index3 = 0; index3 < 28; index3++)
-                                         if (memcmp(jamo_final_short_name[index3], p3, n3) == 0
-                                             && jamo_final_short_name[index3][n3] == '\0')
-                                           {
-                                             return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
-                                           }
-                                       break;
-                                     }
-                                 break;
-                               }
-                         }
-                     }
-                 }
-               /* Special case for CJK compatibility ideographs. Keeps the
-                  tables small.  */
-               if (wordptr == &words[2]
-                   && words[0] == UNICODE_CHARNAME_WORD_CJK
-                   && words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
-                   && p1 + 14 <= ptr
-                   && p1 + 15 >= ptr
-                   && memcmp (p1, "IDEOGRAPH-", 10) == 0)
-                 {
-                   const char *p2 = p1 + 10;
-
-                   if (*p2 != '0')
-                     {
-                       unsigned int c = 0;
-
-                       for (;;)
-                         {
-                           if (*p2 >= '0' && *p2 <= '9')
-                             c += (*p2 - '0');
-                           else if (*p2 >= 'A' && *p2 <= 'F')
-                             c += (*p2 - 'A' + 10);
-                           else
-                             break;
-                           p2++;
-                           if (p2 == ptr)
-                             {
-                               if ((c >= 0xF900 && c <= 0xFA2D)
-                                   || (c >= 0xFA30 && c <= 0xFA6A)
-                                   || (c >= 0xFA70 && c <= 0xFAD9)
-                                   || (c >= 0x2F800 && c <= 0x2FA1D))
-                                 return c;
-                               else
-                                 break;
-                             }
-                           c = c << 4;
-                         }
-                     }
-                 }
-             }
-         }
-         if (false)
-         filled_words:
-           {
-             /* Multiply by 2, to simplify later comparisons.  */
-             unsigned int words_length = wordptr - words;
-             {
-               int i = words_length - 1;
-               words[i] = 2 * words[i];
-               for (; --i >= 0; )
-                 words[i] = 2 * words[i] + 1;
-             }
-             /* Binary search in unicode_name_to_code.  */
-             {
-               unsigned int i1 = 0;
-               unsigned int i2 = SIZEOF (unicode_name_to_code);
-               for (;;)
-                 {
-                   unsigned int i = (i1 + i2) >> 1;
-                   const uint16_t *w = words;
-                   const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
-                   unsigned int n = words_length;
-                   for (;;)
-                     {
-                       if (*p < *w)
-                         {
-                           if (i1 == i)
-                             goto name_not_found;
-                           /* Note here: i1 < i < i2.  */
-                           i1 = i;
-                           break;
-                         }
-                       else if (*p > *w)
-                         {
-                           if (i2 == i)
-                             goto name_not_found;
-                           /* Note here: i1 <= i < i2.  */
-                           i2 = i;
-                           break;
-                         }
-                       p++; w++; n--;
-                       if (n == 0)
-                         {
-                           unsigned int c = unicode_name_to_code[i].code;
-
-                           /* Undo the transformation to 16-bit space.  */
-                           static const unsigned int offset[11] =
-                             {
-                               0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
-                               0x05000, 0x09000, 0x09000, 0x15000, 0x26000,
-                               0xD6000
-                             };
-                           return c + offset[c >> 12];
-                         }
-                     }
-                 }
-             }
-           name_not_found: ;
-           }
-       }
+        {
+          /* Convert the constituents to uint16_t words.  */
+          uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
+          uint16_t *wordptr = words;
+          {
+            const char *p1 = buf;
+            for (;;)
+              {
+                {
+                  int word;
+                  const char *p2 = p1;
+                  while (p2 < ptr && *p2 != ' ')
+                    p2++;
+                  word = unicode_name_word_lookup (p1, p2 - p1);
+                  if (word < 0)
+                    break;
+                  if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
+                    break;
+                  *wordptr++ = word;
+                  if (p2 == ptr)
+                    goto filled_words;
+                  p1 = p2 + 1;
+                }
+                /* Special case for Hangul syllables. Keeps the tables small. */
+                if (wordptr == &words[2]
+                    && words[0] == UNICODE_CHARNAME_WORD_HANGUL
+                    && words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
+                  {
+                    /* Split the last word [p1..ptr) into three parts:
+                         1) [BCDGHJKMNPRST]
+                         2) [AEIOUWY]
+                         3) [BCDGHIJKLMNPST]
+                     */
+                    const char *p2;
+                    const char *p3;
+                    const char *p4;
+
+                    p2 = p1;
+                    while (p2 < ptr
+                           && (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
+                               || *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
+                               || *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
+                               || *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
+                               || *p2 == 'T'))
+                      p2++;
+                    p3 = p2;
+                    while (p3 < ptr
+                           && (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
+                               || *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
+                               || *p3 == 'Y'))
+                      p3++;
+                    p4 = p3;
+                    while (p4 < ptr
+                           && (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
+                               || *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
+                               || *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
+                               || *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
+                               || *p4 == 'S' || *p4 == 'T'))
+                      p4++;
+                    if (p4 == ptr)
+                      {
+                        unsigned int n1 = p2 - p1;
+                        unsigned int n2 = p3 - p2;
+                        unsigned int n3 = p4 - p3;
+
+                        if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
+                          {
+                            unsigned int index1;
+
+                            for (index1 = 0; index1 < 19; index1++)
+                              if (memcmp (jamo_initial_short_name[index1], p1, n1) == 0
+                                  && jamo_initial_short_name[index1][n1] == '\0')
+                                {
+                                  unsigned int index2;
+
+                                  for (index2 = 0; index2 < 21; index2++)
+                                    if (memcmp (jamo_medial_short_name[index2], p2, n2) == 0
+                                        && jamo_medial_short_name[index2][n2] == '\0')
+                                      {
+                                        unsigned int index3;
+
+                                        for (index3 = 0; index3 < 28; index3++)
+                                          if (memcmp (jamo_final_short_name[index3], p3, n3) == 0
+                                              && jamo_final_short_name[index3][n3] == '\0')
+                                            {
+                                              return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
+                                            }
+                                        break;
+                                      }
+                                  break;
+                                }
+                          }
+                      }
+                  }
+                /* Special case for CJK compatibility ideographs. Keeps the
+                   tables small.  */
+                if (wordptr == &words[2]
+                    && words[0] == UNICODE_CHARNAME_WORD_CJK
+                    && words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
+                    && p1 + 14 <= ptr
+                    && p1 + 15 >= ptr
+                    && memcmp (p1, "IDEOGRAPH-", 10) == 0)
+                  {
+                    const char *p2 = p1 + 10;
+
+                    if (*p2 != '0')
+                      {
+                        unsigned int c = 0;
+
+                        for (;;)
+                          {
+                            if (*p2 >= '0' && *p2 <= '9')
+                              c += (*p2 - '0');
+                            else if (*p2 >= 'A' && *p2 <= 'F')
+                              c += (*p2 - 'A' + 10);
+                            else
+                              break;
+                            p2++;
+                            if (p2 == ptr)
+                              {
+                                if ((c >= 0xF900 && c <= 0xFA2D)
+                                    || (c >= 0xFA30 && c <= 0xFA6A)
+                                    || (c >= 0xFA70 && c <= 0xFAD9)
+                                    || (c >= 0x2F800 && c <= 0x2FA1D))
+                                  return c;
+                                else
+                                  break;
+                              }
+                            c = c << 4;
+                          }
+                      }
+                  }
+              }
+          }
+          if (false)
+          filled_words:
+            {
+              /* Multiply by 2, to simplify later comparisons.  */
+              unsigned int words_length = wordptr - words;
+              {
+                int i = words_length - 1;
+                words[i] = 2 * words[i];
+                for (; --i >= 0; )
+                  words[i] = 2 * words[i] + 1;
+              }
+              /* Binary search in unicode_name_to_code.  */
+              {
+                unsigned int i1 = 0;
+                unsigned int i2 = SIZEOF (unicode_name_to_code);
+                for (;;)
+                  {
+                    unsigned int i = (i1 + i2) >> 1;
+                    const uint16_t *w = words;
+                    const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
+                    unsigned int n = words_length;
+                    for (;;)
+                      {
+                        if (*p < *w)
+                          {
+                            if (i1 == i)
+                              goto name_not_found;
+                            /* Note here: i1 < i < i2.  */
+                            i1 = i;
+                            break;
+                          }
+                        else if (*p > *w)
+                          {
+                            if (i2 == i)
+                              goto name_not_found;
+                            /* Note here: i1 <= i < i2.  */
+                            i2 = i;
+                            break;
+                          }
+                        p++; w++; n--;
+                        if (n == 0)
+                          {
+                            unsigned int c = unicode_name_to_code[i].code;
+
+                            /* Undo the transformation to 16-bit space.  */
+                            static const unsigned int offset[13] =
+                              {
+                                0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
+                                0x05000, 0x09000, 0x09000, 0x0A000, 0x14000,
+                                0x15000, 0x24000, 0xD4000
+                              };
+                            return c + offset[c >> 12];
+                          }
+                      }
+                  }
+              }
+            name_not_found: ;
+            }
+        }
     }
   return UNINAME_INVALID;
 }