tests/uniname/test-uninames.c

   1 /* Test the Unicode character name functions.
   2    Copyright (C) 2000-2003, 2005, 2007 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 #include <config.h>
  18
  19 #include <stdio.h>
  20 #include <stdlib.h>
  21 #include <string.h>
  22
  23 #include "xalloc.h"
  24 #include "uniname.h"
  25
  26 /* The names according to the UnicodeData.txt file, modified to contain the
  27    Hangul syllable names, as described in the Unicode 3.0 book.  */
  28 const char * unicode_names [0x110000];
  29
  30 /* Maximum length of a field in the UnicodeData.txt file.  */
  31 #define FIELDLEN 120
  32
  33 /* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
  34    Reads up to (but excluding) DELIM.
  35    Returns 1 when a field was successfully read, otherwise 0.  */
  36 static int
  37 getfield (FILE *stream, char *buffer, int delim)
  38 {
  39   int count = 0;
  40   int c;
  41
  42   for (; (c = getc (stream)), (c != EOF && c != delim); )
  43     {
  44       /* Put c into the buffer.  */
  45       if (++count >= FIELDLEN - 1)
  46         {
  47           fprintf (stderr, "field too long\n");
  48           exit (EXIT_FAILURE);
  49         }
  50       *buffer++ = c;
  51     }
  52
  53   if (c == EOF)
  54     return 0;
  55
  56   *buffer = '\0';
  57   return 1;
  58 }
  59
  60 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
  61    file.  */
  62 static void
  63 fill_names (const char *unicodedata_filename)
  64 {
  65   unsigned int i;
  66   FILE *stream;
  67   char field0[FIELDLEN];
  68   char field1[FIELDLEN];
  69   int lineno = 0;
  70
  71   for (i = 0; i < 0x110000; i++)
  72     unicode_names[i] = NULL;
  73
  74   stream = fopen (unicodedata_filename, "r");
  75   if (stream == NULL)
  76     {
  77       fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
  78       exit (EXIT_FAILURE);
  79     }
  80
  81   for (;;)
  82     {
  83       int n;
  84       int c;
  85
  86       lineno++;
  87       n = getfield (stream, field0, ';');
  88       n += getfield (stream, field1, ';');
  89       if (n == 0)
  90         break;
  91       if (n != 2)
  92         {
  93           fprintf (stderr, "short line in '%s':%d\n",
  94                    unicodedata_filename, lineno);
  95           exit (EXIT_FAILURE);
  96         }
  97       for (; (c = getc (stream)), (c != EOF && c != '\n'); )
  98         ;
  99       i = strtoul (field0, NULL, 16);
 100       if (i >= 0x110000)
 101         {
 102           fprintf (stderr, "index too large\n");
 103           exit (EXIT_FAILURE);
 104         }
 105       unicode_names[i] = xstrdup (field1);
 106     }
 107   if (ferror (stream) || fclose (stream))
 108     {
 109       fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
 110       exit (1);
 111     }
 112 }
 113
 114 /* Perform an exhaustive test of the unicode_character_name function.  */
 115 static int
 116 test_name_lookup ()
 117 {
 118   int error = 0;
 119   unsigned int i;
 120   char buf[UNINAME_MAX];
 121
 122   for (i = 0; i < 0x11000; i++)
 123     {
 124       char *result = unicode_character_name (i, buf);
 125
 126       if (unicode_names[i] != NULL)
 127         {
 128           if (result == NULL)
 129             {
 130               fprintf (stderr, "\\u%04X name lookup failed!\n", i);
 131               error = 1;
 132             }
 133           else if (strcmp (result, unicode_names[i]) != 0)
 134             {
 135               fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 136                                i, result);
 137               error = 1;
 138             }
 139         }
 140       else
 141         {
 142           if (result != NULL)
 143             {
 144               fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 145                                i, result);
 146               error = 1;
 147             }
 148         }
 149     }
 150
 151   for (i = 0x110000; i < 0x1000000; i++)
 152     {
 153       char *result = unicode_character_name (i, buf);
 154
 155       if (result != NULL)
 156         {
 157           fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 158                            i, result);
 159           error = 1;
 160         }
 161     }
 162
 163   return error;
 164 }
 165
 166 /* Perform a test of the unicode_name_character function.  */
 167 static int
 168 test_inverse_lookup ()
 169 {
 170   int error = 0;
 171   unsigned int i;
 172
 173   /* First, verify all valid character names are recognized.  */
 174   for (i = 0; i < 0x110000; i++)
 175     if (unicode_names[i] != NULL)
 176       {
 177         unsigned int result = unicode_name_character (unicode_names[i]);
 178         if (result != i)
 179           {
 180             if (result == UNINAME_INVALID)
 181               fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
 182                        unicode_names[i]);
 183             else
 184               fprintf (stderr,
 185                        "inverse name lookup of \"%s\" returned 0x%04X\n",
 186                        unicode_names[i], result);
 187             error = 1;
 188           }
 189       }
 190
 191   /* Second, generate random but likely names and verify they are not
 192      recognized unless really valid.  */
 193   for (i = 0; i < 10000; i++)
 194     {
 195       unsigned int i1, i2;
 196       const char *s1;
 197       const char *s2;
 198       unsigned int l1, l2, j1, j2;
 199       char buf[2*UNINAME_MAX];
 200       unsigned int result;
 201
 202       do i1 = ((rand () % 0x11) << 16)
 203               + ((rand () & 0xff) << 8)
 204               + (rand () & 0xff);
 205       while (unicode_names[i1] == NULL);
 206
 207       do i2 = ((rand () % 0x11) << 16)
 208               + ((rand () & 0xff) << 8)
 209               + (rand () & 0xff);
 210       while (unicode_names[i2] == NULL);
 211
 212       s1 = unicode_names[i1];
 213       l1 = strlen (s1);
 214       s2 = unicode_names[i2];
 215       l2 = strlen (s2);
 216
 217       /* Concatenate a starting piece of s1 with an ending piece of s2.  */
 218       for (j1 = 1; j1 <= l1; j1++)
 219         if (j1 == l1 || s1[j1] == ' ')
 220           for (j2 = 0; j2 < l2; j2++)
 221             if (j2 == 0 || s2[j2-1] == ' ')
 222               {
 223                 memcpy (buf, s1, j1);
 224                 buf[j1] = ' ';
 225                 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
 226
 227                 result = unicode_name_character (buf);
 228                 if (result != UNINAME_INVALID
 229                     && !(unicode_names[result] != NULL
 230                          && strcmp (unicode_names[result], buf) == 0))
 231                   {
 232                     fprintf (stderr,
 233                              "inverse name lookup of \"%s\" returned 0x%04X\n",
 234                              unicode_names[i], result);
 235                     error = 1;
 236                   }
 237               }
 238     }
 239
 240   /* Third, some extreme case that used to loop.  */
 241   if (unicode_name_character ("A A") != UNINAME_INVALID)
 242     error = 1;
 243
 244   return error;
 245 }
 246
 247 int
 248 main (int argc, char *argv[])
 249 {
 250   int error = 0;
 251
 252   fill_names (argv[1]);
 253
 254   error |= test_name_lookup ();
 255   error |= test_inverse_lookup ();
 256
 257   return error;
 258 }