1 /* Test of canonical decomposition of UTF-8 strings.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_UNINORM_U8_NORMALIZE
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
46 check (const uint8_t *input, size_t input_length,
47 const uint8_t *expected, size_t expected_length)
52 /* Test return conventions with resultbuf == NULL. */
53 result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length);
54 if (!(result != NULL))
56 if (!(length == expected_length))
58 if (!(u8_cmp (result, expected, expected_length) == 0))
62 /* Test return conventions with resultbuf too small. */
63 if (expected_length > 0)
65 uint8_t *preallocated;
67 length = expected_length - 1;
68 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
69 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
70 if (!(result != NULL))
72 if (!(result != preallocated))
74 if (!(length == expected_length))
76 if (!(u8_cmp (result, expected, expected_length) == 0))
82 /* Test return conventions with resultbuf large enough. */
84 uint8_t *preallocated;
86 length = expected_length;
87 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
88 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
89 if (!(result != NULL))
91 if (!(result == preallocated))
93 if (!(length == expected_length))
95 if (!(u8_cmp (result, expected, expected_length) == 0))
107 static const uint8_t input[] = { 0x20 };
108 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
111 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
112 static const uint8_t input[] = { 0xC3, 0x84 };
113 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
114 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
117 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
118 static const uint8_t input[] = { 0xC7, 0x9E };
119 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
120 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
123 { /* GREEK DIALYTIKA AND PERISPOMENI */
124 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
125 static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 };
126 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
129 { /* SCRIPT SMALL L */
130 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
134 { /* NO-BREAK SPACE */
135 static const uint8_t input[] = { 0xC2, 0xA0 };
136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
139 { /* ARABIC LETTER VEH INITIAL FORM */
140 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
144 { /* ARABIC LETTER VEH MEDIAL FORM */
145 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
149 { /* ARABIC LETTER VEH FINAL FORM */
150 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
154 { /* ARABIC LETTER VEH ISOLATED FORM */
155 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
159 { /* CIRCLED NUMBER FIFTEEN */
160 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
164 { /* TRADE MARK SIGN */
165 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
169 { /* LATIN SUBSCRIPT SMALL LETTER I */
170 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
174 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
175 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
179 { /* FULLWIDTH LATIN CAPITAL LETTER A */
180 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
184 { /* HALFWIDTH IDEOGRAPHIC COMMA */
185 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
189 { /* SMALL IDEOGRAPHIC COMMA */
190 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
195 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
199 { /* VULGAR FRACTION THREE EIGHTHS */
200 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
205 static const uint8_t input[] = { 0xC2, 0xB5 };
206 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
209 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
210 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
211 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
214 { /* HANGUL SYLLABLE GEUL */
215 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
216 static const uint8_t expected[] =
217 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
218 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
221 { /* HANGUL SYLLABLE GEU */
222 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
223 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
224 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
227 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
228 static const uint8_t input[] =
229 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
230 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
231 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
232 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
233 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
234 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
235 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
237 0xEA, 0xB8, 0x80, '\n'
239 static const uint8_t expected[] =
240 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
241 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
242 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
243 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
244 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
245 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
246 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
247 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
248 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
250 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
254 /* Declare failure if test takes too long, by using default abort
255 caused by SIGALRM. */
256 signal (SIGALRM, SIG_DFL);
260 /* Check that the sorting is not O(n²) but O(n log n). */
263 for (pass = 0; pass < 3; pass++)
267 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
270 uint8_t *expected = input + (2 * m - 1);
272 size_t m2 = (m - 1) / 2;
273 /* NB: m1 + m2 == m - 1. */
282 for (i = 0; i < m1; i++)
287 for (i = 0; i < m2; i++)
295 for (i = 0; i < m2; i++)
300 for (i = 0; i < m1; i++)
308 for (i = 0; i < m2; i++)
328 for (i = 0; i < m1; i++)
333 for (i = 0; i < m2; i++)
339 for (; repeat > 0; repeat--)
340 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);