1 /* Test of canonical decomposition of UTF-8 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_UNINORM_U8_NORMALIZE
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
46 check (const uint8_t *input, size_t input_length,
47 const uint8_t *expected, size_t expected_length)
52 /* Test return conventions with resultbuf == NULL. */
53 result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length);
54 if (!(result != NULL))
56 if (!(length == expected_length))
58 if (!(u8_cmp (result, expected, expected_length) == 0))
62 /* Test return conventions with resultbuf too small. */
63 if (expected_length > 0)
65 uint8_t *preallocated;
67 length = expected_length - 1;
68 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
69 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
70 if (!(result != NULL))
72 if (!(result != preallocated))
74 if (!(length == expected_length))
76 if (!(u8_cmp (result, expected, expected_length) == 0))
82 /* Test return conventions with resultbuf large enough. */
84 uint8_t *preallocated;
86 length = expected_length;
87 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
88 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
89 if (!(result != NULL))
91 if (!(preallocated == NULL || result == preallocated))
93 if (!(length == expected_length))
95 if (!(u8_cmp (result, expected, expected_length) == 0))
106 { /* Empty string. */
107 ASSERT (check (NULL, 0, NULL, 0) == 0);
110 static const uint8_t input[] = { 0x20 };
111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
114 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
115 static const uint8_t input[] = { 0xC3, 0x84 };
116 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
117 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
120 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
121 static const uint8_t input[] = { 0xC7, 0x9E };
122 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
123 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
126 { /* GREEK DIALYTIKA AND PERISPOMENI */
127 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
128 static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 };
129 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
132 { /* SCRIPT SMALL L */
133 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
134 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
137 { /* NO-BREAK SPACE */
138 static const uint8_t input[] = { 0xC2, 0xA0 };
139 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
142 { /* ARABIC LETTER VEH INITIAL FORM */
143 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
144 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
147 { /* ARABIC LETTER VEH MEDIAL FORM */
148 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
149 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
152 { /* ARABIC LETTER VEH FINAL FORM */
153 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
154 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
157 { /* ARABIC LETTER VEH ISOLATED FORM */
158 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
159 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
162 { /* CIRCLED NUMBER FIFTEEN */
163 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
164 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
167 { /* TRADE MARK SIGN */
168 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
169 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
172 { /* LATIN SUBSCRIPT SMALL LETTER I */
173 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
174 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
177 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
178 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
179 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
182 { /* FULLWIDTH LATIN CAPITAL LETTER A */
183 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
184 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
187 { /* HALFWIDTH IDEOGRAPHIC COMMA */
188 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
189 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
192 { /* SMALL IDEOGRAPHIC COMMA */
193 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
194 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
198 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
199 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
202 { /* VULGAR FRACTION THREE EIGHTHS */
203 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
204 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
208 static const uint8_t input[] = { 0xC2, 0xB5 };
209 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
212 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
213 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
214 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
217 { /* HANGUL SYLLABLE GEUL */
218 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
219 static const uint8_t expected[] =
220 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
221 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
224 { /* HANGUL SYLLABLE GEU */
225 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
226 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
227 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
230 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
231 static const uint8_t input[] =
232 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
233 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
234 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
235 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
236 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
237 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
238 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
240 0xEA, 0xB8, 0x80, '\n'
242 static const uint8_t expected[] =
243 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
244 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
245 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
246 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
247 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
248 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
249 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
250 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
251 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
253 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
257 /* Declare failure if test takes too long, by using default abort
258 caused by SIGALRM. */
259 signal (SIGALRM, SIG_DFL);
263 /* Check that the sorting is not O(n²) but O(n log n). */
266 for (pass = 0; pass < 3; pass++)
270 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
273 uint8_t *expected = input + (2 * m - 1);
275 size_t m2 = (m - 1) / 2;
276 /* NB: m1 + m2 == m - 1. */
285 for (i = 0; i < m1; i++)
290 for (i = 0; i < m2; i++)
298 for (i = 0; i < m2; i++)
303 for (i = 0; i < m1; i++)
311 for (i = 0; i < m2; i++)
331 for (i = 0; i < m1; i++)
336 for (i = 0; i < m2; i++)
342 for (; repeat > 0; repeat--)
343 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);