1 /* Test of canonical decomposition of UTF-8 strings.
2 Copyright (C) 2009-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
33 check (const uint8_t *input, size_t input_length,
34 const uint8_t *expected, size_t expected_length)
39 /* Test return conventions with resultbuf == NULL. */
40 result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length);
41 if (!(result != NULL))
43 if (!(length == expected_length))
45 if (!(u8_cmp (result, expected, expected_length) == 0))
49 /* Test return conventions with resultbuf too small. */
50 if (expected_length > 0)
52 uint8_t *preallocated;
54 length = expected_length - 1;
55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
57 if (!(result != NULL))
59 if (!(result != preallocated))
61 if (!(length == expected_length))
63 if (!(u8_cmp (result, expected, expected_length) == 0))
69 /* Test return conventions with resultbuf large enough. */
71 uint8_t *preallocated;
73 length = expected_length;
74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
76 if (!(result != NULL))
78 if (!(preallocated == NULL || result == preallocated))
80 if (!(length == expected_length))
82 if (!(u8_cmp (result, expected, expected_length) == 0))
94 ASSERT (check (NULL, 0, NULL, 0) == 0);
97 static const uint8_t input[] = { 0x20 };
98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102 static const uint8_t input[] = { 0xC3, 0x84 };
103 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
104 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
107 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
108 static const uint8_t input[] = { 0xC7, 0x9E };
109 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
110 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
113 { /* GREEK DIALYTIKA AND PERISPOMENI */
114 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
115 static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 };
116 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
119 { /* SCRIPT SMALL L */
120 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
121 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
124 { /* NO-BREAK SPACE */
125 static const uint8_t input[] = { 0xC2, 0xA0 };
126 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
129 { /* ARABIC LETTER VEH INITIAL FORM */
130 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
134 { /* ARABIC LETTER VEH MEDIAL FORM */
135 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
139 { /* ARABIC LETTER VEH FINAL FORM */
140 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
144 { /* ARABIC LETTER VEH ISOLATED FORM */
145 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
149 { /* CIRCLED NUMBER FIFTEEN */
150 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
154 { /* TRADE MARK SIGN */
155 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
159 { /* LATIN SUBSCRIPT SMALL LETTER I */
160 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
164 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
165 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
169 { /* FULLWIDTH LATIN CAPITAL LETTER A */
170 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
174 { /* HALFWIDTH IDEOGRAPHIC COMMA */
175 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
179 { /* SMALL IDEOGRAPHIC COMMA */
180 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
185 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
189 { /* VULGAR FRACTION THREE EIGHTHS */
190 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
195 static const uint8_t input[] = { 0xC2, 0xB5 };
196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
199 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
200 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
204 { /* HANGUL SYLLABLE GEUL */
205 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
206 static const uint8_t expected[] =
207 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
208 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
211 { /* HANGUL SYLLABLE GEU */
212 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
213 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
214 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
217 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
218 static const uint8_t input[] =
219 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
220 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
221 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
222 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
223 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
224 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
225 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
227 0xEA, 0xB8, 0x80, '\n'
229 static const uint8_t expected[] =
230 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
231 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
232 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
233 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
234 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
235 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
236 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
237 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
238 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
240 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
244 /* Declare failure if test takes too long, by using default abort
245 caused by SIGALRM. */
246 signal (SIGALRM, SIG_DFL);
250 /* Check that the sorting is not O(n²) but O(n log n). */
253 for (pass = 0; pass < 3; pass++)
257 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
260 uint8_t *expected = input + (2 * m - 1);
262 size_t m2 = (m - 1) / 2;
263 /* NB: m1 + m2 == m - 1. */
272 for (i = 0; i < m1; i++)
277 for (i = 0; i < m2; i++)
285 for (i = 0; i < m2; i++)
290 for (i = 0; i < m1; i++)
298 for (i = 0; i < m2; i++)
318 for (i = 0; i < m1; i++)
323 for (i = 0; i < m2; i++)
329 for (; repeat > 0; repeat--)
330 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);