1 /* Test of compatibility decomposition of UTF-8 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_UNINORM_U8_NORMALIZE
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
46 check (const uint8_t *input, size_t input_length,
47 const uint8_t *expected, size_t expected_length)
52 /* Test return conventions with resultbuf == NULL. */
53 result = u8_normalize (UNINORM_NFKD, input, input_length, NULL, &length);
54 if (!(result != NULL))
56 if (!(length == expected_length))
58 if (!(u8_cmp (result, expected, expected_length) == 0))
62 /* Test return conventions with resultbuf too small. */
63 if (expected_length > 0)
65 uint8_t *preallocated;
67 length = expected_length - 1;
68 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
69 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
70 if (!(result != NULL))
72 if (!(result != preallocated))
74 if (!(length == expected_length))
76 if (!(u8_cmp (result, expected, expected_length) == 0))
82 /* Test return conventions with resultbuf large enough. */
84 uint8_t *preallocated;
86 length = expected_length;
87 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
88 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
89 if (!(result != NULL))
91 if (!(result == preallocated))
93 if (!(length == expected_length))
95 if (!(u8_cmp (result, expected, expected_length) == 0))
107 static const uint8_t input[] = { 0x20 };
108 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
111 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
112 static const uint8_t input[] = { 0xC3, 0x84 };
113 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
114 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
117 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
118 static const uint8_t input[] = { 0xC7, 0x9E };
119 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
120 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
123 { /* GREEK DIALYTIKA AND PERISPOMENI */
124 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
125 static const uint8_t expected[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
126 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
129 { /* SCRIPT SMALL L */
130 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
131 static const uint8_t expected[] = { 0x6C };
132 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
135 { /* NO-BREAK SPACE */
136 static const uint8_t input[] = { 0xC2, 0xA0 };
137 static const uint8_t expected[] = { 0x20 };
138 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
141 { /* ARABIC LETTER VEH INITIAL FORM */
142 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
143 static const uint8_t expected[] = { 0xDA, 0xA4 };
144 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
147 { /* ARABIC LETTER VEH MEDIAL FORM */
148 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
149 static const uint8_t expected[] = { 0xDA, 0xA4 };
150 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
153 { /* ARABIC LETTER VEH FINAL FORM */
154 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
155 static const uint8_t expected[] = { 0xDA, 0xA4 };
156 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
159 { /* ARABIC LETTER VEH ISOLATED FORM */
160 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
161 static const uint8_t expected[] = { 0xDA, 0xA4 };
162 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
165 { /* CIRCLED NUMBER FIFTEEN */
166 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
167 static const uint8_t expected[] = { 0x31, 0x35 };
168 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
171 { /* TRADE MARK SIGN */
172 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
173 static const uint8_t expected[] = { 0x54, 0x4D };
174 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
177 { /* LATIN SUBSCRIPT SMALL LETTER I */
178 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
179 static const uint8_t expected[] = { 0x69 };
180 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
183 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
184 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
185 static const uint8_t expected[] = { 0x28 };
186 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
189 { /* FULLWIDTH LATIN CAPITAL LETTER A */
190 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
191 static const uint8_t expected[] = { 0x41 };
192 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
195 { /* HALFWIDTH IDEOGRAPHIC COMMA */
196 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
197 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
198 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
201 { /* SMALL IDEOGRAPHIC COMMA */
202 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
203 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
204 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
208 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
209 static const uint8_t expected[] = { 0x4D, 0x48, 0x7A };
210 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
213 { /* VULGAR FRACTION THREE EIGHTHS */
214 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
215 static const uint8_t expected[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
216 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
220 static const uint8_t input[] = { 0xC2, 0xB5 };
221 static const uint8_t expected[] = { 0xCE, 0xBC };
222 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
225 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
226 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
227 static const uint8_t expected[] =
228 { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
229 0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
230 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
232 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
235 { /* HANGUL SYLLABLE GEUL */
236 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
237 static const uint8_t expected[] =
238 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
239 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
242 { /* HANGUL SYLLABLE GEU */
243 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
244 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
245 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
248 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
249 static const uint8_t input[] =
250 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
251 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
252 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
253 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
254 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
255 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
256 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
258 0xEA, 0xB8, 0x80, '\n'
260 static const uint8_t expected[] =
261 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
262 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
263 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
264 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
265 's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
266 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
267 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
268 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
269 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
271 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
275 /* Declare failure if test takes too long, by using default abort
276 caused by SIGALRM. */
277 signal (SIGALRM, SIG_DFL);
281 /* Check that the sorting is not O(n²) but O(n log n). */
284 for (pass = 0; pass < 3; pass++)
288 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
291 uint8_t *expected = input + (2 * m - 1);
293 size_t m2 = (m - 1) / 2;
294 /* NB: m1 + m2 == m - 1. */
303 for (i = 0; i < m1; i++)
308 for (i = 0; i < m2; i++)
316 for (i = 0; i < m2; i++)
321 for (i = 0; i < m1; i++)
329 for (i = 0; i < m2; i++)
349 for (i = 0; i < m1; i++)
354 for (i = 0; i < m2; i++)
360 for (; repeat > 0; repeat--)
361 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);