1 /* Test of compatibility decomposition of UTF-8 strings.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
33 check (const uint8_t *input, size_t input_length,
34 const uint8_t *expected, size_t expected_length)
39 /* Test return conventions with resultbuf == NULL. */
40 result = u8_normalize (UNINORM_NFKD, input, input_length, NULL, &length);
41 if (!(result != NULL))
43 if (!(length == expected_length))
45 if (!(u8_cmp (result, expected, expected_length) == 0))
49 /* Test return conventions with resultbuf too small. */
50 if (expected_length > 0)
52 uint8_t *preallocated;
54 length = expected_length - 1;
55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
57 if (!(result != NULL))
59 if (!(result != preallocated))
61 if (!(length == expected_length))
63 if (!(u8_cmp (result, expected, expected_length) == 0))
69 /* Test return conventions with resultbuf large enough. */
71 uint8_t *preallocated;
73 length = expected_length;
74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
76 if (!(result != NULL))
78 if (!(preallocated == NULL || result == preallocated))
80 if (!(length == expected_length))
82 if (!(u8_cmp (result, expected, expected_length) == 0))
94 ASSERT (check (NULL, 0, NULL, 0) == 0);
97 static const uint8_t input[] = { 0x20 };
98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102 static const uint8_t input[] = { 0xC3, 0x84 };
103 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
104 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
107 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
108 static const uint8_t input[] = { 0xC7, 0x9E };
109 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
110 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
113 { /* GREEK DIALYTIKA AND PERISPOMENI */
114 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
115 static const uint8_t expected[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
116 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
119 { /* SCRIPT SMALL L */
120 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
121 static const uint8_t expected[] = { 0x6C };
122 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
125 { /* NO-BREAK SPACE */
126 static const uint8_t input[] = { 0xC2, 0xA0 };
127 static const uint8_t expected[] = { 0x20 };
128 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
131 { /* ARABIC LETTER VEH INITIAL FORM */
132 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
133 static const uint8_t expected[] = { 0xDA, 0xA4 };
134 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
137 { /* ARABIC LETTER VEH MEDIAL FORM */
138 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
139 static const uint8_t expected[] = { 0xDA, 0xA4 };
140 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
143 { /* ARABIC LETTER VEH FINAL FORM */
144 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
145 static const uint8_t expected[] = { 0xDA, 0xA4 };
146 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
149 { /* ARABIC LETTER VEH ISOLATED FORM */
150 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
151 static const uint8_t expected[] = { 0xDA, 0xA4 };
152 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
155 { /* CIRCLED NUMBER FIFTEEN */
156 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
157 static const uint8_t expected[] = { 0x31, 0x35 };
158 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
161 { /* TRADE MARK SIGN */
162 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
163 static const uint8_t expected[] = { 0x54, 0x4D };
164 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
167 { /* LATIN SUBSCRIPT SMALL LETTER I */
168 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
169 static const uint8_t expected[] = { 0x69 };
170 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
173 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
174 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
175 static const uint8_t expected[] = { 0x28 };
176 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
179 { /* FULLWIDTH LATIN CAPITAL LETTER A */
180 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
181 static const uint8_t expected[] = { 0x41 };
182 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
185 { /* HALFWIDTH IDEOGRAPHIC COMMA */
186 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
187 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
188 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
191 { /* SMALL IDEOGRAPHIC COMMA */
192 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
193 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
194 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
198 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
199 static const uint8_t expected[] = { 0x4D, 0x48, 0x7A };
200 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
203 { /* VULGAR FRACTION THREE EIGHTHS */
204 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
205 static const uint8_t expected[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
206 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
210 static const uint8_t input[] = { 0xC2, 0xB5 };
211 static const uint8_t expected[] = { 0xCE, 0xBC };
212 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
215 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
216 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
217 static const uint8_t expected[] =
218 { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
219 0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
220 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
222 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
225 { /* HANGUL SYLLABLE GEUL */
226 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
227 static const uint8_t expected[] =
228 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
229 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
232 { /* HANGUL SYLLABLE GEU */
233 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
234 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
235 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
238 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
239 static const uint8_t input[] =
240 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
241 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
242 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
243 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
244 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
245 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
246 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
248 0xEA, 0xB8, 0x80, '\n'
250 static const uint8_t expected[] =
251 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
252 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
253 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
254 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
255 's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
256 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
257 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
258 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
259 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
261 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
265 /* Declare failure if test takes too long, by using default abort
266 caused by SIGALRM. */
267 signal (SIGALRM, SIG_DFL);
271 /* Check that the sorting is not O(n²) but O(n log n). */
274 for (pass = 0; pass < 3; pass++)
278 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
281 uint8_t *expected = input + (2 * m - 1);
283 size_t m2 = (m - 1) / 2;
284 /* NB: m1 + m2 == m - 1. */
293 for (i = 0; i < m1; i++)
298 for (i = 0; i < m2; i++)
306 for (i = 0; i < m2; i++)
311 for (i = 0; i < m1; i++)
319 for (i = 0; i < m2; i++)
339 for (i = 0; i < m1; i++)
344 for (i = 0; i < m2; i++)
350 for (; repeat > 0; repeat--)
351 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);