1 /* Test of Unicode compliance of normalization of UTF-32 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
22 #include "test-u32-normalize-big.h"
24 #if GNULIB_UNINORM_U32_NORMALIZE
32 #define ASSERT(expr) \
37 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
44 #define ASSERT_WITH_LINE(expr, file, line) \
49 fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \
50 __FILE__, __LINE__, file, line); \
58 cmp_ucs4_t (const void *a, const void *b)
60 ucs4_t a_value = *(const ucs4_t *)a;
61 ucs4_t b_value = *(const ucs4_t *)b;
62 return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0);
66 read_normalization_test_file (const char *filename,
67 struct normalization_test_file *file)
72 struct normalization_test_line *lines;
74 size_t lines_allocated;
76 stream = fopen (filename, "r");
79 fprintf (stderr, "error during fopen of '%s'\n", filename);
83 for (part_index = 0; part_index < 4; part_index++)
85 file->parts[part_index].lines = NULL;
86 file->parts[part_index].lines_length = 0;
101 struct normalization_test_line line;
102 size_t sequence_index;
111 if (c == EOF || c == '\n')
115 while (ptr < buf + 1000);
120 /* Ignore empty lines and comment lines. */
121 if (buf[0] == '\0' || buf[0] == '#')
124 /* Handle lines that introduce a new part. */
127 /* Switch to the next part. */
131 (struct normalization_test_line *)
132 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
133 file->parts[part_index].lines = lines;
134 file->parts[part_index].lines_length = lines_length;
143 /* It's a line containing 5 sequences of Unicode characters.
144 Parse it and append it to the current part. */
145 if (!(part_index >= 0 && part_index < 4))
147 fprintf (stderr, "unexpected structure of '%s'\n", filename);
151 line.lineno = lineno;
152 for (sequence_index = 0; sequence_index < 5; sequence_index++)
153 line.sequences[sequence_index] = NULL;
154 for (sequence_index = 0; sequence_index < 5; sequence_index++)
156 uint32_t *sequence = XNMALLOC (1, uint32_t);
157 size_t sequence_length = 0;
164 uc = strtoul (ptr, &endptr, 16);
169 /* Append uc to the sequence. */
172 xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t));
173 sequence[sequence_length] = uc;
179 if (sequence_length == 0)
181 fprintf (stderr, "empty character sequence in '%s'\n", filename);
184 sequence[sequence_length] = 0; /* terminator */
186 line.sequences[sequence_index] = sequence;
190 fprintf (stderr, "error parsing '%s'\n", filename);
196 /* Append the line to the current part. */
197 if (lines_length == lines_allocated)
199 lines_allocated = 2 * lines_allocated;
200 if (lines_allocated < 7)
203 (struct normalization_test_line *)
204 xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line));
206 lines[lines_length] = line;
213 (struct normalization_test_line *)
214 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
215 file->parts[part_index].lines = lines;
216 file->parts[part_index].lines_length = lines_length;
220 /* Collect all c1 values from the part 1 in an array. */
221 const struct normalization_test_part *p = &file->parts[1];
222 ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t);
225 for (line_index = 0; line_index < p->lines_length; line_index++)
227 const uint32_t *sequence = p->lines[line_index].sequences[0];
228 /* In part 1, every sequences[0] consists of a single character. */
229 if (!(sequence[0] != 0 && sequence[1] == 0))
231 c1_array[line_index] = sequence[0];
234 /* Sort this array. */
235 qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t);
237 /* Add the sentinel at the end. */
238 c1_array[p->lines_length] = 0x110000;
240 file->part1_c1_sorted = c1_array;
243 file->filename = xstrdup (filename);
245 if (ferror (stream) || fclose (stream))
247 fprintf (stderr, "error reading from '%s'\n", filename);
253 test_specific (const struct normalization_test_file *file,
254 int (*check) (const uint32_t *c1, size_t c1_length,
255 const uint32_t *c2, size_t c2_length,
256 const uint32_t *c3, size_t c3_length,
257 const uint32_t *c4, size_t c4_length,
258 const uint32_t *c5, size_t c5_length))
262 for (part_index = 0; part_index < 4; part_index++)
264 const struct normalization_test_part *p = &file->parts[part_index];
267 for (line_index = 0; line_index < p->lines_length; line_index++)
269 const struct normalization_test_line *l = &p->lines[line_index];
271 ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]),
272 l->sequences[1], u32_strlen (l->sequences[1]),
273 l->sequences[2], u32_strlen (l->sequences[2]),
274 l->sequences[3], u32_strlen (l->sequences[3]),
275 l->sequences[4], u32_strlen (l->sequences[4]))
277 file->filename, l->lineno);
283 test_other (const struct normalization_test_file *file, uninorm_t nf)
285 /* Check that for every character not listed in part 1 of the
286 NormalizationTest.txt file, the character maps to itself in each
287 of the four normalization forms. */
288 const ucs4_t *p = file->part1_c1_sorted;
291 for (uc = 0; uc < 0x110000; uc++)
293 if (uc >= 0xD800 && uc < 0xE000)
295 /* A surrogate, not a character. Skip uc. */
309 result = u32_normalize (nf, input, 1, NULL, &length);
310 ASSERT (result != NULL && length == 1 && result[0] == uc);