1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
24 #include "striconveha.h"
34 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
35 #define ASSERT(expr) if (!(expr)) abort ();
37 /* Magic number for detecting bounds violations. */
38 #define MAGIC 0x1983EFF1
41 new_offsets (size_t n)
43 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
51 static enum iconv_ilseq_handler handlers[] =
52 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
58 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
59 ISO-8859-2, and UTF-8. */
61 /* ------------------------- Test mem_iconveha() ------------------------- */
63 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
64 for (h = 0; h < SIZEOF (handlers); h++)
66 enum iconv_ilseq_handler handler = handlers[h];
67 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
68 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
69 for (o = 0; o < 2; o++)
71 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
74 int retval = mem_iconveha (input, strlen (input),
75 "ISO-8859-2", "ISO-8859-1",
80 ASSERT (length == strlen (expected));
81 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
84 for (i = 0; i < 37; i++)
85 ASSERT (offsets[i] == i);
86 ASSERT (offsets[37] == MAGIC);
93 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
94 for (h = 0; h < SIZEOF (handlers); h++)
96 enum iconv_ilseq_handler handler = handlers[h];
97 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
98 for (o = 0; o < 2; o++)
100 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
103 int retval = mem_iconveha (input, strlen (input),
104 "ISO-8859-2", "ISO-8859-1",
111 ASSERT (retval == -1 && errno == EILSEQ);
112 ASSERT (result == NULL);
116 case iconveh_question_mark:
118 static const char expected[] = "Rafa? Maszkowski";
119 ASSERT (retval == 0);
120 ASSERT (length == strlen (expected));
121 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
124 for (i = 0; i < 16; i++)
125 ASSERT (offsets[i] == i);
126 ASSERT (offsets[16] == MAGIC);
132 case iconveh_escape_sequence:
134 static const char expected[] = "Rafa\\u0142 Maszkowski";
135 ASSERT (retval == 0);
136 ASSERT (length == strlen (expected));
137 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
140 for (i = 0; i < 16; i++)
141 ASSERT (offsets[i] == (i < 5 ? i :
143 ASSERT (offsets[16] == MAGIC);
153 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
154 for (h = 0; h < SIZEOF (handlers); h++)
156 enum iconv_ilseq_handler handler = handlers[h];
157 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
158 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
159 for (o = 0; o < 2; o++)
161 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
164 int retval = mem_iconveha (input, strlen (input),
165 "ISO-8859-1", "UTF-8",
169 ASSERT (retval == 0);
170 ASSERT (length == strlen (expected));
171 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
174 for (i = 0; i < 37; i++)
175 ASSERT (offsets[i] == (i < 1 ? i :
179 ASSERT (offsets[37] == MAGIC);
186 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
187 for (h = 0; h < SIZEOF (handlers); h++)
189 enum iconv_ilseq_handler handler = handlers[h];
190 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
191 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
192 for (o = 0; o < 2; o++)
194 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
197 int retval = mem_iconveha (input, strlen (input),
198 "UTF-8", "ISO-8859-1",
202 ASSERT (retval == 0);
203 ASSERT (length == strlen (expected));
204 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
207 for (i = 0; i < 41; i++)
208 ASSERT (offsets[i] == (i < 1 ? i :
209 i == 1 ? (size_t)(-1) :
211 i == 13 ? (size_t)(-1) :
213 i == 20 ? (size_t)(-1) :
216 ASSERT (offsets[41] == MAGIC);
223 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
224 for (h = 0; h < SIZEOF (handlers); h++)
226 enum iconv_ilseq_handler handler = handlers[h];
227 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
228 for (o = 0; o < 2; o++)
230 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
233 int retval = mem_iconveha (input, strlen (input),
234 "UTF-8", "ISO-8859-1",
241 ASSERT (retval == -1 && errno == EILSEQ);
242 ASSERT (result == NULL);
246 case iconveh_question_mark:
248 static const char expected[] = "Rafa? Maszkowski";
249 ASSERT (retval == 0);
250 ASSERT (length == strlen (expected));
251 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
254 for (i = 0; i < 17; i++)
255 ASSERT (offsets[i] == (i < 5 ? i :
256 i == 5 ? (size_t)(-1) :
258 ASSERT (offsets[17] == MAGIC);
264 case iconveh_escape_sequence:
266 static const char expected[] = "Rafa\\u0142 Maszkowski";
267 ASSERT (retval == 0);
268 ASSERT (length == strlen (expected));
269 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
272 for (i = 0; i < 17; i++)
273 ASSERT (offsets[i] == (i < 5 ? i :
274 i == 5 ? (size_t)(-1) :
276 ASSERT (offsets[17] == MAGIC);
286 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
287 for (h = 0; h < SIZEOF (handlers); h++)
289 enum iconv_ilseq_handler handler = handlers[h];
290 static const char input[] = "\342";
291 for (o = 0; o < 2; o++)
293 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
296 int retval = mem_iconveha (input, strlen (input),
297 "UTF-8", "ISO-8859-1",
301 ASSERT (retval == 0);
302 ASSERT (length == 0);
305 ASSERT (offsets[0] == 0);
306 ASSERT (offsets[1] == MAGIC);
314 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
315 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
316 /* Test conversions from autodetect_jp to UTF-8. */
317 for (h = 0; h < SIZEOF (handlers); h++)
319 enum iconv_ilseq_handler handler = handlers[h];
320 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
321 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
322 for (o = 0; o < 2; o++)
324 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
327 int retval = mem_iconveha (input, strlen (input),
328 "autodetect_jp", "UTF-8",
332 ASSERT (retval == 0);
333 ASSERT (length == strlen (expected));
334 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
337 for (i = 0; i < 10; i++)
338 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
339 ASSERT (offsets[10] == MAGIC);
345 for (h = 0; h < SIZEOF (handlers); h++)
347 enum iconv_ilseq_handler handler = handlers[h];
348 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
349 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
350 for (o = 0; o < 2; o++)
352 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
355 int retval = mem_iconveha (input, strlen (input),
356 "autodetect_jp", "UTF-8",
360 ASSERT (retval == 0);
361 ASSERT (length == strlen (expected));
362 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
365 for (i = 0; i < 10; i++)
366 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
367 ASSERT (offsets[10] == MAGIC);
373 for (h = 0; h < SIZEOF (handlers); h++)
375 enum iconv_ilseq_handler handler = handlers[h];
376 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
377 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
378 for (o = 0; o < 2; o++)
380 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
383 int retval = mem_iconveha (input, strlen (input),
384 "autodetect_jp", "UTF-8",
388 ASSERT (retval == 0);
389 ASSERT (length == strlen (expected));
390 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
393 for (i = 0; i < 16; i++)
394 ASSERT (offsets[i] == (i == 0 ? 0 :
401 ASSERT (offsets[16] == MAGIC);
409 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
410 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
411 for (h = 0; h < SIZEOF (handlers); h++)
413 enum iconv_ilseq_handler handler = handlers[h];
414 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
415 static const char expected[] = "Costs: 27 EUR";
416 for (o = 0; o < 2; o++)
418 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
421 int retval = mem_iconveha (input, strlen (input),
422 "UTF-8", "ISO-8859-1",
426 ASSERT (retval == 0);
427 ASSERT (length == strlen (expected));
428 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
431 for (i = 0; i < 13; i++)
432 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
433 ASSERT (offsets[13] == MAGIC);
441 /* ------------------------- Test str_iconveha() ------------------------- */
443 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
444 for (h = 0; h < SIZEOF (handlers); h++)
446 enum iconv_ilseq_handler handler = handlers[h];
447 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
448 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
449 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
450 ASSERT (result != NULL);
451 ASSERT (strcmp (result, expected) == 0);
455 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
456 for (h = 0; h < SIZEOF (handlers); h++)
458 enum iconv_ilseq_handler handler = handlers[h];
459 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
460 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
464 ASSERT (result == NULL && errno == EILSEQ);
466 case iconveh_question_mark:
468 static const char expected[] = "Rafa? Maszkowski";
469 ASSERT (result != NULL);
470 ASSERT (strcmp (result, expected) == 0);
474 case iconveh_escape_sequence:
476 static const char expected[] = "Rafa\\u0142 Maszkowski";
477 ASSERT (result != NULL);
478 ASSERT (strcmp (result, expected) == 0);
485 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
486 for (h = 0; h < SIZEOF (handlers); h++)
488 enum iconv_ilseq_handler handler = handlers[h];
489 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
490 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
491 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
492 ASSERT (result != NULL);
493 ASSERT (strcmp (result, expected) == 0);
497 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
498 for (h = 0; h < SIZEOF (handlers); h++)
500 enum iconv_ilseq_handler handler = handlers[h];
501 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
502 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
503 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
504 ASSERT (result != NULL);
505 ASSERT (strcmp (result, expected) == 0);
509 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
510 for (h = 0; h < SIZEOF (handlers); h++)
512 enum iconv_ilseq_handler handler = handlers[h];
513 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
514 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
518 ASSERT (result == NULL && errno == EILSEQ);
520 case iconveh_question_mark:
522 static const char expected[] = "Costs: 27 ?";
523 ASSERT (result != NULL);
524 ASSERT (strcmp (result, expected) == 0);
528 case iconveh_escape_sequence:
530 static const char expected[] = "Costs: 27 \\u20AC";
531 ASSERT (result != NULL);
532 ASSERT (strcmp (result, expected) == 0);
539 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
540 for (h = 0; h < SIZEOF (handlers); h++)
542 enum iconv_ilseq_handler handler = handlers[h];
543 static const char input[] = "\342";
544 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
545 ASSERT (result != NULL);
546 ASSERT (strcmp (result, "") == 0);
550 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
551 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
552 /* Test conversions from autodetect_jp to UTF-8. */
553 for (h = 0; h < SIZEOF (handlers); h++)
555 enum iconv_ilseq_handler handler = handlers[h];
556 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
557 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
558 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
559 ASSERT (result != NULL);
560 ASSERT (strcmp (result, expected) == 0);
563 for (h = 0; h < SIZEOF (handlers); h++)
565 enum iconv_ilseq_handler handler = handlers[h];
566 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
567 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
568 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
569 ASSERT (result != NULL);
570 ASSERT (strcmp (result, expected) == 0);
573 for (h = 0; h < SIZEOF (handlers); h++)
575 enum iconv_ilseq_handler handler = handlers[h];
576 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
577 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
578 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
579 ASSERT (result != NULL);
580 ASSERT (strcmp (result, expected) == 0);
585 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
586 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
587 for (h = 0; h < SIZEOF (handlers); h++)
589 enum iconv_ilseq_handler handler = handlers[h];
590 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
591 static const char expected[] = "Costs: 27 EUR";
592 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
593 ASSERT (result != NULL);
594 ASSERT (strcmp (result, expected) == 0);