1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007-2008 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveha.h"
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
45 /* Magic number for detecting bounds violations. */
46 #define MAGIC 0x1983EFF1
49 new_offsets (size_t n)
51 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
59 static enum iconv_ilseq_handler handlers[] =
60 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
66 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
67 ISO-8859-2, and UTF-8. */
69 /* ------------------------- Test mem_iconveha() ------------------------- */
71 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
72 for (h = 0; h < SIZEOF (handlers); h++)
74 enum iconv_ilseq_handler handler = handlers[h];
75 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
76 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
77 for (o = 0; o < 2; o++)
79 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
82 int retval = mem_iconveha (input, strlen (input),
83 "ISO-8859-2", "ISO-8859-1",
88 ASSERT (length == strlen (expected));
89 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
92 for (i = 0; i < 37; i++)
93 ASSERT (offsets[i] == i);
94 ASSERT (offsets[37] == MAGIC);
101 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
102 for (h = 0; h < SIZEOF (handlers); h++)
104 enum iconv_ilseq_handler handler = handlers[h];
105 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
106 for (o = 0; o < 2; o++)
108 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
111 int retval = mem_iconveha (input, strlen (input),
112 "ISO-8859-2", "ISO-8859-1",
119 ASSERT (retval == -1 && errno == EILSEQ);
120 ASSERT (result == NULL);
124 case iconveh_question_mark:
126 static const char expected[] = "Rafa? Maszkowski";
127 ASSERT (retval == 0);
128 ASSERT (length == strlen (expected));
129 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
132 for (i = 0; i < 16; i++)
133 ASSERT (offsets[i] == i);
134 ASSERT (offsets[16] == MAGIC);
140 case iconveh_escape_sequence:
142 static const char expected[] = "Rafa\\u0142 Maszkowski";
143 ASSERT (retval == 0);
144 ASSERT (length == strlen (expected));
145 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
148 for (i = 0; i < 16; i++)
149 ASSERT (offsets[i] == (i < 5 ? i :
151 ASSERT (offsets[16] == MAGIC);
161 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
162 for (h = 0; h < SIZEOF (handlers); h++)
164 enum iconv_ilseq_handler handler = handlers[h];
165 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
166 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
167 for (o = 0; o < 2; o++)
169 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
172 int retval = mem_iconveha (input, strlen (input),
173 "ISO-8859-1", "UTF-8",
177 ASSERT (retval == 0);
178 ASSERT (length == strlen (expected));
179 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
182 for (i = 0; i < 37; i++)
183 ASSERT (offsets[i] == (i < 1 ? i :
187 ASSERT (offsets[37] == MAGIC);
194 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
195 for (h = 0; h < SIZEOF (handlers); h++)
197 enum iconv_ilseq_handler handler = handlers[h];
198 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
199 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
200 for (o = 0; o < 2; o++)
202 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
205 int retval = mem_iconveha (input, strlen (input),
206 "UTF-8", "ISO-8859-1",
210 ASSERT (retval == 0);
211 ASSERT (length == strlen (expected));
212 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
215 for (i = 0; i < 41; i++)
216 ASSERT (offsets[i] == (i < 1 ? i :
217 i == 1 ? (size_t)(-1) :
219 i == 13 ? (size_t)(-1) :
221 i == 20 ? (size_t)(-1) :
224 ASSERT (offsets[41] == MAGIC);
231 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
232 for (h = 0; h < SIZEOF (handlers); h++)
234 enum iconv_ilseq_handler handler = handlers[h];
235 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
236 for (o = 0; o < 2; o++)
238 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
241 int retval = mem_iconveha (input, strlen (input),
242 "UTF-8", "ISO-8859-1",
249 ASSERT (retval == -1 && errno == EILSEQ);
250 ASSERT (result == NULL);
254 case iconveh_question_mark:
256 static const char expected[] = "Rafa? Maszkowski";
257 ASSERT (retval == 0);
258 ASSERT (length == strlen (expected));
259 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
262 for (i = 0; i < 17; i++)
263 ASSERT (offsets[i] == (i < 5 ? i :
264 i == 5 ? (size_t)(-1) :
266 ASSERT (offsets[17] == MAGIC);
272 case iconveh_escape_sequence:
274 static const char expected[] = "Rafa\\u0142 Maszkowski";
275 ASSERT (retval == 0);
276 ASSERT (length == strlen (expected));
277 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
280 for (i = 0; i < 17; i++)
281 ASSERT (offsets[i] == (i < 5 ? i :
282 i == 5 ? (size_t)(-1) :
284 ASSERT (offsets[17] == MAGIC);
294 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
295 for (h = 0; h < SIZEOF (handlers); h++)
297 enum iconv_ilseq_handler handler = handlers[h];
298 static const char input[] = "\342";
299 for (o = 0; o < 2; o++)
301 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
304 int retval = mem_iconveha (input, strlen (input),
305 "UTF-8", "ISO-8859-1",
309 ASSERT (retval == 0);
310 ASSERT (length == 0);
313 ASSERT (offsets[0] == 0);
314 ASSERT (offsets[1] == MAGIC);
321 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
322 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
323 /* Test conversions from autodetect_jp to UTF-8. */
324 for (h = 0; h < SIZEOF (handlers); h++)
326 enum iconv_ilseq_handler handler = handlers[h];
327 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
328 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
329 for (o = 0; o < 2; o++)
331 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
334 int retval = mem_iconveha (input, strlen (input),
335 "autodetect_jp", "UTF-8",
339 ASSERT (retval == 0);
340 ASSERT (length == strlen (expected));
341 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
344 for (i = 0; i < 10; i++)
345 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
346 ASSERT (offsets[10] == MAGIC);
352 for (h = 0; h < SIZEOF (handlers); h++)
354 enum iconv_ilseq_handler handler = handlers[h];
355 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
356 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
357 for (o = 0; o < 2; o++)
359 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
362 int retval = mem_iconveha (input, strlen (input),
363 "autodetect_jp", "UTF-8",
367 ASSERT (retval == 0);
368 ASSERT (length == strlen (expected));
369 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
372 for (i = 0; i < 10; i++)
373 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
374 ASSERT (offsets[10] == MAGIC);
380 for (h = 0; h < SIZEOF (handlers); h++)
382 enum iconv_ilseq_handler handler = handlers[h];
383 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
384 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
385 for (o = 0; o < 2; o++)
387 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
390 int retval = mem_iconveha (input, strlen (input),
391 "autodetect_jp", "UTF-8",
395 ASSERT (retval == 0);
396 ASSERT (length == strlen (expected));
397 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
400 for (i = 0; i < 16; i++)
401 ASSERT (offsets[i] == (i == 0 ? 0 :
408 ASSERT (offsets[16] == MAGIC);
416 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
417 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
418 for (h = 0; h < SIZEOF (handlers); h++)
420 enum iconv_ilseq_handler handler = handlers[h];
421 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
422 static const char expected[] = "Costs: 27 EUR";
423 for (o = 0; o < 2; o++)
425 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
428 int retval = mem_iconveha (input, strlen (input),
429 "UTF-8", "ISO-8859-1",
433 ASSERT (retval == 0);
434 ASSERT (length == strlen (expected));
435 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
438 for (i = 0; i < 13; i++)
439 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
440 ASSERT (offsets[13] == MAGIC);
448 /* ------------------------- Test str_iconveha() ------------------------- */
450 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
451 for (h = 0; h < SIZEOF (handlers); h++)
453 enum iconv_ilseq_handler handler = handlers[h];
454 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
455 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
456 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
457 ASSERT (result != NULL);
458 ASSERT (strcmp (result, expected) == 0);
462 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
463 for (h = 0; h < SIZEOF (handlers); h++)
465 enum iconv_ilseq_handler handler = handlers[h];
466 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
467 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
471 ASSERT (result == NULL && errno == EILSEQ);
473 case iconveh_question_mark:
475 static const char expected[] = "Rafa? Maszkowski";
476 ASSERT (result != NULL);
477 ASSERT (strcmp (result, expected) == 0);
481 case iconveh_escape_sequence:
483 static const char expected[] = "Rafa\\u0142 Maszkowski";
484 ASSERT (result != NULL);
485 ASSERT (strcmp (result, expected) == 0);
492 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
493 for (h = 0; h < SIZEOF (handlers); h++)
495 enum iconv_ilseq_handler handler = handlers[h];
496 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
497 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
498 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
499 ASSERT (result != NULL);
500 ASSERT (strcmp (result, expected) == 0);
504 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
505 for (h = 0; h < SIZEOF (handlers); h++)
507 enum iconv_ilseq_handler handler = handlers[h];
508 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
509 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
510 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
511 ASSERT (result != NULL);
512 ASSERT (strcmp (result, expected) == 0);
516 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
517 for (h = 0; h < SIZEOF (handlers); h++)
519 enum iconv_ilseq_handler handler = handlers[h];
520 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
521 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
525 ASSERT (result == NULL && errno == EILSEQ);
527 case iconveh_question_mark:
529 static const char expected[] = "Costs: 27 ?";
530 ASSERT (result != NULL);
531 ASSERT (strcmp (result, expected) == 0);
535 case iconveh_escape_sequence:
537 static const char expected[] = "Costs: 27 \\u20AC";
538 ASSERT (result != NULL);
539 ASSERT (strcmp (result, expected) == 0);
546 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
547 for (h = 0; h < SIZEOF (handlers); h++)
549 enum iconv_ilseq_handler handler = handlers[h];
550 static const char input[] = "\342";
551 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
552 ASSERT (result != NULL);
553 ASSERT (strcmp (result, "") == 0);
557 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
558 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
559 /* Test conversions from autodetect_jp to UTF-8. */
560 for (h = 0; h < SIZEOF (handlers); h++)
562 enum iconv_ilseq_handler handler = handlers[h];
563 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
564 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
565 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
566 ASSERT (result != NULL);
567 ASSERT (strcmp (result, expected) == 0);
570 for (h = 0; h < SIZEOF (handlers); h++)
572 enum iconv_ilseq_handler handler = handlers[h];
573 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
574 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
575 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
576 ASSERT (result != NULL);
577 ASSERT (strcmp (result, expected) == 0);
580 for (h = 0; h < SIZEOF (handlers); h++)
582 enum iconv_ilseq_handler handler = handlers[h];
583 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
584 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
585 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
586 ASSERT (result != NULL);
587 ASSERT (strcmp (result, expected) == 0);
592 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
593 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
594 for (h = 0; h < SIZEOF (handlers); h++)
596 enum iconv_ilseq_handler handler = handlers[h];
597 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
598 static const char expected[] = "Costs: 27 EUR";
599 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
600 ASSERT (result != NULL);
601 ASSERT (strcmp (result, expected) == 0);