Adjust getdate's grammar to accept a slightly more regular language.

[gnulib.git] / lib / uniname / uniname.c
diff --git a/lib/uniname/uniname.c b/lib/uniname/uniname.c

index 371822c..37272a9 100644 (file)
--- a/lib/uniname/uniname.c
+++ b/lib/uniname/uniname.c
@@ -1,20 +1,18 @@
  /* Association between Unicode characters and their names.
     Copyright (C) 2000-2002, 2005-2007 Free Software Foundation, Inc.
  
-   This program is free software; you can redistribute it and/or modify it
-   under the terms of the GNU Library General Public License as published
-   by the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
  
     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
  
-   You should have received a copy of the GNU Library General Public
-   License along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
-   USA.  */
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  
  #include <config.h>
  
@@ -30,19 +28,25 @@
  #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
  
  
-/* Table of Unicode character names, derived from UnicodeData.txt.  */
+/* Table of Unicode character names, derived from UnicodeData.txt.
+   This table is generated in a way to minimize the memory footprint:
+     1. its compiled size is small (less than 350 KB),
+     2. it resides entirely in the text or read-only data segment of the
+        executable or shared library: the table contains only immediate
+        integers, no pointers, and the functions don't do heap allocation.
+ */
  #include "uninames.h"
  /* It contains:
-  static const char unicode_name_words[34594] = ...;
-  #define UNICODE_CHARNAME_NUM_WORDS 5906
+  static const char unicode_name_words[36303] = ...;
+  #define UNICODE_CHARNAME_NUM_WORDS 6260
    static const struct { uint16_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[26] = ...;
-  #define UNICODE_CHARNAME_WORD_HANGUL 3624
-  #define UNICODE_CHARNAME_WORD_SYLLABLE 4654
-  #define UNICODE_CHARNAME_WORD_CJK 401
-  #define UNICODE_CHARNAME_WORD_COMPATIBILITY 5755
-  static const uint16_t unicode_names[62620] = ...;
-  static const struct { uint16_t code; uint16_t name; } unicode_name_to_code[15257] = ...;
-  static const struct { uint16_t code; uint16_t name; } unicode_code_to_name[15257] = ...;
+  #define UNICODE_CHARNAME_WORD_HANGUL 3902
+  #define UNICODE_CHARNAME_WORD_SYLLABLE 4978
+  #define UNICODE_CHARNAME_WORD_CJK 417
+  #define UNICODE_CHARNAME_WORD_COMPATIBILITY 6107
+  static const uint16_t unicode_names[68940] = ...;
+  static const struct { uint16_t code; uint32_t name:24; } unicode_name_to_code[16626] = ...;
+  static const struct { uint16_t code; uint32_t name:24; } unicode_code_to_name[16626] = ...;
    #define UNICODE_CHARNAME_MAX_LENGTH 83
    #define UNICODE_CHARNAME_MAX_WORDS 13
  */
@@ -215,14 +219,17 @@ unicode_character_name (ucs4_t c, char *buf)
         case 0x10:
           c -= 0x09000;
           break;
+       case 0x12:
+         c -= 0x0A000;
+         break;
         case 0x1D:
-         c -= 0x15000;
+         c -= 0x14000;
           break;
         case 0x2F:
-         c -= 0x26000;
+         c -= 0x25000;
           break;
         case 0xE0:
-         c -= 0xD6000;
+         c -= 0xD5000;
           break;
         default:
           return NULL;
@@ -487,11 +494,11 @@ unicode_name_character (const char *name)
                             unsigned int c = unicode_name_to_code[i].code;
  
                             /* Undo the transformation to 16-bit space.  */
-                           static const unsigned int offset[11] =
+                           static const unsigned int offset[12] =
                               {
                                 0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
-                               0x05000, 0x09000, 0x09000, 0x15000, 0x26000,
-                               0xD6000
+                               0x05000, 0x09000, 0x09000, 0x0A000, 0x14000,
+                               0x25000, 0xD5000
                               };
                             return c + offset[c >> 12];
                           }