1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveha.h"
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
44 /* Magic number for detecting bounds violations. */
45 #define MAGIC 0x1983EFF1
48 new_offsets (size_t n)
50 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
58 static enum iconv_ilseq_handler handlers[] =
59 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
65 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
66 ISO-8859-2, and UTF-8. */
68 /* ------------------------- Test mem_iconveha() ------------------------- */
70 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
71 for (h = 0; h < SIZEOF (handlers); h++)
73 enum iconv_ilseq_handler handler = handlers[h];
74 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
75 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
76 for (o = 0; o < 2; o++)
78 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
81 int retval = mem_iconveha (input, strlen (input),
82 "ISO-8859-2", "ISO-8859-1",
87 ASSERT (length == strlen (expected));
88 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
91 for (i = 0; i < 37; i++)
92 ASSERT (offsets[i] == i);
93 ASSERT (offsets[37] == MAGIC);
100 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
101 for (h = 0; h < SIZEOF (handlers); h++)
103 enum iconv_ilseq_handler handler = handlers[h];
104 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
105 for (o = 0; o < 2; o++)
107 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
110 int retval = mem_iconveha (input, strlen (input),
111 "ISO-8859-2", "ISO-8859-1",
118 ASSERT (retval == -1 && errno == EILSEQ);
119 ASSERT (result == NULL);
123 case iconveh_question_mark:
125 static const char expected[] = "Rafa? Maszkowski";
126 ASSERT (retval == 0);
127 ASSERT (length == strlen (expected));
128 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
131 for (i = 0; i < 16; i++)
132 ASSERT (offsets[i] == i);
133 ASSERT (offsets[16] == MAGIC);
139 case iconveh_escape_sequence:
141 static const char expected[] = "Rafa\\u0142 Maszkowski";
142 ASSERT (retval == 0);
143 ASSERT (length == strlen (expected));
144 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
147 for (i = 0; i < 16; i++)
148 ASSERT (offsets[i] == (i < 5 ? i :
150 ASSERT (offsets[16] == MAGIC);
160 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
161 for (h = 0; h < SIZEOF (handlers); h++)
163 enum iconv_ilseq_handler handler = handlers[h];
164 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
165 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
166 for (o = 0; o < 2; o++)
168 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
171 int retval = mem_iconveha (input, strlen (input),
172 "ISO-8859-1", "UTF-8",
176 ASSERT (retval == 0);
177 ASSERT (length == strlen (expected));
178 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
181 for (i = 0; i < 37; i++)
182 ASSERT (offsets[i] == (i < 1 ? i :
186 ASSERT (offsets[37] == MAGIC);
193 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
194 for (h = 0; h < SIZEOF (handlers); h++)
196 enum iconv_ilseq_handler handler = handlers[h];
197 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
198 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
199 for (o = 0; o < 2; o++)
201 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
204 int retval = mem_iconveha (input, strlen (input),
205 "UTF-8", "ISO-8859-1",
209 ASSERT (retval == 0);
210 ASSERT (length == strlen (expected));
211 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
214 for (i = 0; i < 41; i++)
215 ASSERT (offsets[i] == (i < 1 ? i :
216 i == 1 ? (size_t)(-1) :
218 i == 13 ? (size_t)(-1) :
220 i == 20 ? (size_t)(-1) :
223 ASSERT (offsets[41] == MAGIC);
230 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
231 for (h = 0; h < SIZEOF (handlers); h++)
233 enum iconv_ilseq_handler handler = handlers[h];
234 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
235 for (o = 0; o < 2; o++)
237 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
240 int retval = mem_iconveha (input, strlen (input),
241 "UTF-8", "ISO-8859-1",
248 ASSERT (retval == -1 && errno == EILSEQ);
249 ASSERT (result == NULL);
253 case iconveh_question_mark:
255 static const char expected[] = "Rafa? Maszkowski";
256 ASSERT (retval == 0);
257 ASSERT (length == strlen (expected));
258 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
261 for (i = 0; i < 17; i++)
262 ASSERT (offsets[i] == (i < 5 ? i :
263 i == 5 ? (size_t)(-1) :
265 ASSERT (offsets[17] == MAGIC);
271 case iconveh_escape_sequence:
273 static const char expected[] = "Rafa\\u0142 Maszkowski";
274 ASSERT (retval == 0);
275 ASSERT (length == strlen (expected));
276 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
279 for (i = 0; i < 17; i++)
280 ASSERT (offsets[i] == (i < 5 ? i :
281 i == 5 ? (size_t)(-1) :
283 ASSERT (offsets[17] == MAGIC);
293 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
294 for (h = 0; h < SIZEOF (handlers); h++)
296 enum iconv_ilseq_handler handler = handlers[h];
297 static const char input[] = "\342";
298 for (o = 0; o < 2; o++)
300 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
303 int retval = mem_iconveha (input, strlen (input),
304 "UTF-8", "ISO-8859-1",
308 ASSERT (retval == 0);
309 ASSERT (length == 0);
312 ASSERT (offsets[0] == 0);
313 ASSERT (offsets[1] == MAGIC);
321 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
322 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
323 /* Test conversions from autodetect_jp to UTF-8. */
324 for (h = 0; h < SIZEOF (handlers); h++)
326 enum iconv_ilseq_handler handler = handlers[h];
327 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
328 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
329 for (o = 0; o < 2; o++)
331 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
334 int retval = mem_iconveha (input, strlen (input),
335 "autodetect_jp", "UTF-8",
339 ASSERT (retval == 0);
340 ASSERT (length == strlen (expected));
341 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
344 for (i = 0; i < 10; i++)
345 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
346 ASSERT (offsets[10] == MAGIC);
352 for (h = 0; h < SIZEOF (handlers); h++)
354 enum iconv_ilseq_handler handler = handlers[h];
355 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
356 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
357 for (o = 0; o < 2; o++)
359 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
362 int retval = mem_iconveha (input, strlen (input),
363 "autodetect_jp", "UTF-8",
367 ASSERT (retval == 0);
368 ASSERT (length == strlen (expected));
369 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
372 for (i = 0; i < 10; i++)
373 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
374 ASSERT (offsets[10] == MAGIC);
380 for (h = 0; h < SIZEOF (handlers); h++)
382 enum iconv_ilseq_handler handler = handlers[h];
383 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
384 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
385 for (o = 0; o < 2; o++)
387 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
390 int retval = mem_iconveha (input, strlen (input),
391 "autodetect_jp", "UTF-8",
395 ASSERT (retval == 0);
396 ASSERT (length == strlen (expected));
397 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
400 for (i = 0; i < 16; i++)
401 ASSERT (offsets[i] == (i == 0 ? 0 :
408 ASSERT (offsets[16] == MAGIC);
416 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
417 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
418 for (h = 0; h < SIZEOF (handlers); h++)
420 enum iconv_ilseq_handler handler = handlers[h];
421 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
422 static const char expected[] = "Costs: 27 EUR";
423 for (o = 0; o < 2; o++)
425 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
428 int retval = mem_iconveha (input, strlen (input),
429 "UTF-8", "ISO-8859-1",
433 ASSERT (retval == 0);
434 ASSERT (length == strlen (expected));
435 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
438 for (i = 0; i < 13; i++)
439 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
440 ASSERT (offsets[13] == MAGIC);
448 /* ------------------------- Test str_iconveha() ------------------------- */
450 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
451 for (h = 0; h < SIZEOF (handlers); h++)
453 enum iconv_ilseq_handler handler = handlers[h];
454 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
455 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
456 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
457 ASSERT (result != NULL);
458 ASSERT (strcmp (result, expected) == 0);
462 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
463 for (h = 0; h < SIZEOF (handlers); h++)
465 enum iconv_ilseq_handler handler = handlers[h];
466 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
467 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
471 ASSERT (result == NULL && errno == EILSEQ);
473 case iconveh_question_mark:
475 static const char expected[] = "Rafa? Maszkowski";
476 ASSERT (result != NULL);
477 ASSERT (strcmp (result, expected) == 0);
481 case iconveh_escape_sequence:
483 static const char expected[] = "Rafa\\u0142 Maszkowski";
484 ASSERT (result != NULL);
485 ASSERT (strcmp (result, expected) == 0);
492 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
493 for (h = 0; h < SIZEOF (handlers); h++)
495 enum iconv_ilseq_handler handler = handlers[h];
496 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
497 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
498 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
499 ASSERT (result != NULL);
500 ASSERT (strcmp (result, expected) == 0);
504 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
505 for (h = 0; h < SIZEOF (handlers); h++)
507 enum iconv_ilseq_handler handler = handlers[h];
508 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
509 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
510 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
511 ASSERT (result != NULL);
512 ASSERT (strcmp (result, expected) == 0);
516 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
517 for (h = 0; h < SIZEOF (handlers); h++)
519 enum iconv_ilseq_handler handler = handlers[h];
520 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
521 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
525 ASSERT (result == NULL && errno == EILSEQ);
527 case iconveh_question_mark:
529 static const char expected[] = "Costs: 27 ?";
530 ASSERT (result != NULL);
531 ASSERT (strcmp (result, expected) == 0);
535 case iconveh_escape_sequence:
537 static const char expected[] = "Costs: 27 \\u20AC";
538 ASSERT (result != NULL);
539 ASSERT (strcmp (result, expected) == 0);
546 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
547 for (h = 0; h < SIZEOF (handlers); h++)
549 enum iconv_ilseq_handler handler = handlers[h];
550 static const char input[] = "\342";
551 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
552 ASSERT (result != NULL);
553 ASSERT (strcmp (result, "") == 0);
557 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
558 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
559 /* Test conversions from autodetect_jp to UTF-8. */
560 for (h = 0; h < SIZEOF (handlers); h++)
562 enum iconv_ilseq_handler handler = handlers[h];
563 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
564 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
565 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
566 ASSERT (result != NULL);
567 ASSERT (strcmp (result, expected) == 0);
570 for (h = 0; h < SIZEOF (handlers); h++)
572 enum iconv_ilseq_handler handler = handlers[h];
573 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
574 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
575 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
576 ASSERT (result != NULL);
577 ASSERT (strcmp (result, expected) == 0);
580 for (h = 0; h < SIZEOF (handlers); h++)
582 enum iconv_ilseq_handler handler = handlers[h];
583 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
584 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
585 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
586 ASSERT (result != NULL);
587 ASSERT (strcmp (result, expected) == 0);
592 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
593 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
594 for (h = 0; h < SIZEOF (handlers); h++)
596 enum iconv_ilseq_handler handler = handlers[h];
597 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
598 static const char expected[] = "Costs: 27 EUR";
599 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
600 ASSERT (result != NULL);
601 ASSERT (strcmp (result, expected) == 0);