tests/uniname/test-uninames.c

   1 /* Test the Unicode character name functions.
   2    Copyright (C) 2000-2003, 2005, 2007, 2009-2014 Free Software Foundation,
   3    Inc.
   4
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #include <config.h>
  19
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23
  24 #include "xalloc.h"
  25 #include "uniname.h"
  26 #include "progname.h"
  27
  28 /* The names according to the UnicodeData.txt file, modified to contain the
  29    Hangul syllable names, as described in the Unicode 3.0 book.  */
  30 const char * unicode_names [0x110000];
  31
  32 /* Maximum length of a field in the UnicodeData.txt file.  */
  33 #define FIELDLEN 120
  34
  35 /* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
  36    Reads up to (but excluding) DELIM.
  37    Returns 1 when a field was successfully read, otherwise 0.  */
  38 static int
  39 getfield (FILE *stream, char *buffer, int delim)
  40 {
  41   int count = 0;
  42   int c;
  43
  44   for (; (c = getc (stream)), (c != EOF && c != delim); )
  45     {
  46       /* Put c into the buffer.  */
  47       if (++count >= FIELDLEN - 1)
  48         {
  49           fprintf (stderr, "field too long\n");
  50           exit (EXIT_FAILURE);
  51         }
  52       *buffer++ = c;
  53     }
  54
  55   if (c == EOF)
  56     return 0;
  57
  58   *buffer = '\0';
  59   return 1;
  60 }
  61
  62 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
  63    file.  */
  64 static void
  65 fill_names (const char *unicodedata_filename)
  66 {
  67   unsigned int i;
  68   FILE *stream;
  69   char field0[FIELDLEN];
  70   char field1[FIELDLEN];
  71   int lineno = 0;
  72
  73   for (i = 0; i < 0x110000; i++)
  74     unicode_names[i] = NULL;
  75
  76   stream = fopen (unicodedata_filename, "r");
  77   if (stream == NULL)
  78     {
  79       fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
  80       exit (EXIT_FAILURE);
  81     }
  82
  83   for (;;)
  84     {
  85       int n;
  86       int c;
  87
  88       lineno++;
  89       n = getfield (stream, field0, ';');
  90       n += getfield (stream, field1, ';');
  91       if (n == 0)
  92         break;
  93       if (n != 2)
  94         {
  95           fprintf (stderr, "short line in '%s':%d\n",
  96                    unicodedata_filename, lineno);
  97           exit (EXIT_FAILURE);
  98         }
  99       for (; (c = getc (stream)), (c != EOF && c != '\n'); )
 100         ;
 101       i = strtoul (field0, NULL, 16);
 102       if (i >= 0x110000)
 103         {
 104           fprintf (stderr, "index too large\n");
 105           exit (EXIT_FAILURE);
 106         }
 107       unicode_names[i] = xstrdup (field1);
 108     }
 109   if (ferror (stream) || fclose (stream))
 110     {
 111       fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
 112       exit (1);
 113     }
 114 }
 115
 116 /* Perform an exhaustive test of the unicode_character_name function.  */
 117 static int
 118 test_name_lookup ()
 119 {
 120   int error = 0;
 121   unsigned int i;
 122   char buf[UNINAME_MAX];
 123
 124   for (i = 0; i < 0x11000; i++)
 125     {
 126       char *result = unicode_character_name (i, buf);
 127
 128       if (unicode_names[i] != NULL)
 129         {
 130           if (result == NULL)
 131             {
 132               fprintf (stderr, "\\u%04X name lookup failed!\n", i);
 133               error = 1;
 134             }
 135           else if (strcmp (result, unicode_names[i]) != 0)
 136             {
 137               fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 138                                i, result);
 139               error = 1;
 140             }
 141         }
 142       else
 143         {
 144           if (result != NULL)
 145             {
 146               fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 147                                i, result);
 148               error = 1;
 149             }
 150         }
 151     }
 152
 153   for (i = 0x110000; i < 0x1000000; i++)
 154     {
 155       char *result = unicode_character_name (i, buf);
 156
 157       if (result != NULL)
 158         {
 159           fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 160                            i, result);
 161           error = 1;
 162         }
 163     }
 164
 165   return error;
 166 }
 167
 168 /* Perform a test of the unicode_name_character function.  */
 169 static int
 170 test_inverse_lookup ()
 171 {
 172   int error = 0;
 173   unsigned int i;
 174
 175   /* First, verify all valid character names are recognized.  */
 176   for (i = 0; i < 0x110000; i++)
 177     if (unicode_names[i] != NULL)
 178       {
 179         unsigned int result = unicode_name_character (unicode_names[i]);
 180         if (result != i)
 181           {
 182             if (result == UNINAME_INVALID)
 183               fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
 184                        unicode_names[i]);
 185             else
 186               fprintf (stderr,
 187                        "inverse name lookup of \"%s\" returned 0x%04X\n",
 188                        unicode_names[i], result);
 189             error = 1;
 190           }
 191       }
 192
 193   /* Second, generate random but likely names and verify they are not
 194      recognized unless really valid.  */
 195   for (i = 0; i < 10000; i++)
 196     {
 197       unsigned int i1, i2;
 198       const char *s1;
 199       const char *s2;
 200       unsigned int l1, l2, j1, j2;
 201       char buf[2*UNINAME_MAX];
 202       unsigned int result;
 203
 204       do i1 = ((rand () % 0x11) << 16)
 205               + ((rand () & 0xff) << 8)
 206               + (rand () & 0xff);
 207       while (unicode_names[i1] == NULL);
 208
 209       do i2 = ((rand () % 0x11) << 16)
 210               + ((rand () & 0xff) << 8)
 211               + (rand () & 0xff);
 212       while (unicode_names[i2] == NULL);
 213
 214       s1 = unicode_names[i1];
 215       l1 = strlen (s1);
 216       s2 = unicode_names[i2];
 217       l2 = strlen (s2);
 218
 219       /* Concatenate a starting piece of s1 with an ending piece of s2.  */
 220       for (j1 = 1; j1 <= l1; j1++)
 221         if (j1 == l1 || s1[j1] == ' ')
 222           for (j2 = 0; j2 < l2; j2++)
 223             if (j2 == 0 || s2[j2-1] == ' ')
 224               {
 225                 memcpy (buf, s1, j1);
 226                 buf[j1] = ' ';
 227                 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
 228
 229                 result = unicode_name_character (buf);
 230                 if (result != UNINAME_INVALID
 231                     && !(unicode_names[result] != NULL
 232                          && strcmp (unicode_names[result], buf) == 0))
 233                   {
 234                     fprintf (stderr,
 235                              "inverse name lookup of \"%s\" returned 0x%04X\n",
 236                              unicode_names[i], result);
 237                     error = 1;
 238                   }
 239               }
 240     }
 241
 242   /* Third, some extreme case that used to loop.  */
 243   if (unicode_name_character ("A A") != UNINAME_INVALID)
 244     error = 1;
 245
 246   return error;
 247 }
 248
 249 int
 250 main (int argc, char *argv[])
 251 {
 252   int error = 0;
 253
 254   set_program_name (argv[0]);
 255
 256   fill_names (argv[1]);
 257
 258   error |= test_name_lookup ();
 259   error |= test_inverse_lookup ();
 260
 261   return error;
 262 }