1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
24 #include "striconveh.h"
34 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
35 #define ASSERT(expr) if (!(expr)) abort ();
37 /* Magic number for detecting bounds violations. */
38 #define MAGIC 0x1983EFF1
41 new_offsets (size_t n)
43 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
51 static enum iconv_ilseq_handler handlers[] =
52 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
58 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
59 ISO-8859-2, and UTF-8. */
60 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
61 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
62 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
63 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
64 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
65 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
67 ASSERT (cd_88591_to_88592 != (iconv_t)(-1));
68 ASSERT (cd_88592_to_88591 != (iconv_t)(-1));
69 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
70 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
71 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
72 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
74 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
76 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
77 for (h = 0; h < SIZEOF (handlers); h++)
79 enum iconv_ilseq_handler handler = handlers[h];
80 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
81 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
82 for (o = 0; o < 2; o++)
84 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
87 int retval = mem_cd_iconveh (input, strlen (input),
89 cd_88592_to_utf8, cd_utf8_to_88591,
94 ASSERT (length == strlen (expected));
95 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
98 for (i = 0; i < 37; i++)
99 ASSERT (offsets[i] == i);
100 ASSERT (offsets[37] == MAGIC);
107 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
108 for (h = 0; h < SIZEOF (handlers); h++)
110 enum iconv_ilseq_handler handler = handlers[h];
111 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
112 for (o = 0; o < 2; o++)
114 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
117 int retval = mem_cd_iconveh (input, strlen (input),
119 cd_88592_to_utf8, cd_utf8_to_88591,
126 ASSERT (retval == -1 && errno == EILSEQ);
127 ASSERT (result == NULL);
131 case iconveh_question_mark:
133 static const char expected[] = "Rafa? Maszkowski";
134 ASSERT (retval == 0);
135 ASSERT (length == strlen (expected));
136 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
139 for (i = 0; i < 16; i++)
140 ASSERT (offsets[i] == i);
141 ASSERT (offsets[16] == MAGIC);
147 case iconveh_escape_sequence:
149 static const char expected[] = "Rafa\\u0142 Maszkowski";
150 ASSERT (retval == 0);
151 ASSERT (length == strlen (expected));
152 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
155 for (i = 0; i < 16; i++)
156 ASSERT (offsets[i] == (i < 5 ? i :
158 ASSERT (offsets[16] == MAGIC);
168 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
169 for (h = 0; h < SIZEOF (handlers); h++)
171 enum iconv_ilseq_handler handler = handlers[h];
172 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
173 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
174 for (o = 0; o < 2; o++)
176 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
179 int retval = mem_cd_iconveh (input, strlen (input),
181 cd_88591_to_utf8, (iconv_t)(-1),
185 ASSERT (retval == 0);
186 ASSERT (length == strlen (expected));
187 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
190 for (i = 0; i < 37; i++)
191 ASSERT (offsets[i] == (i < 1 ? i :
195 ASSERT (offsets[37] == MAGIC);
202 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
203 for (h = 0; h < SIZEOF (handlers); h++)
205 enum iconv_ilseq_handler handler = handlers[h];
206 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
207 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
208 for (o = 0; o < 2; o++)
210 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
213 int retval = mem_cd_iconveh (input, strlen (input),
215 (iconv_t)(-1), cd_utf8_to_88591,
219 ASSERT (retval == 0);
220 ASSERT (length == strlen (expected));
221 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
224 for (i = 0; i < 41; i++)
225 ASSERT (offsets[i] == (i < 1 ? i :
226 i == 1 ? (size_t)(-1) :
228 i == 13 ? (size_t)(-1) :
230 i == 20 ? (size_t)(-1) :
233 ASSERT (offsets[41] == MAGIC);
240 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
241 for (h = 0; h < SIZEOF (handlers); h++)
243 enum iconv_ilseq_handler handler = handlers[h];
244 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
245 for (o = 0; o < 2; o++)
247 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
250 int retval = mem_cd_iconveh (input, strlen (input),
252 (iconv_t)(-1), cd_utf8_to_88591,
259 ASSERT (retval == -1 && errno == EILSEQ);
260 ASSERT (result == NULL);
264 case iconveh_question_mark:
266 static const char expected[] = "Rafa? Maszkowski";
267 ASSERT (retval == 0);
268 ASSERT (length == strlen (expected));
269 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
272 for (i = 0; i < 17; i++)
273 ASSERT (offsets[i] == (i < 5 ? i :
274 i == 5 ? (size_t)(-1) :
276 ASSERT (offsets[17] == MAGIC);
282 case iconveh_escape_sequence:
284 static const char expected[] = "Rafa\\u0142 Maszkowski";
285 ASSERT (retval == 0);
286 ASSERT (length == strlen (expected));
287 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
290 for (i = 0; i < 17; i++)
291 ASSERT (offsets[i] == (i < 5 ? i :
292 i == 5 ? (size_t)(-1) :
294 ASSERT (offsets[17] == MAGIC);
304 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
305 for (h = 0; h < SIZEOF (handlers); h++)
307 enum iconv_ilseq_handler handler = handlers[h];
308 static const char input[] = "\342";
309 for (o = 0; o < 2; o++)
311 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
314 int retval = mem_cd_iconveh (input, strlen (input),
316 (iconv_t)(-1), cd_utf8_to_88591,
320 ASSERT (retval == 0);
321 ASSERT (length == 0);
324 ASSERT (offsets[0] == 0);
325 ASSERT (offsets[1] == MAGIC);
333 /* ------------------------ Test str_cd_iconveh() ------------------------ */
335 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
336 for (h = 0; h < SIZEOF (handlers); h++)
338 enum iconv_ilseq_handler handler = handlers[h];
339 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
340 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
341 char *result = str_cd_iconveh (input,
343 cd_88592_to_utf8, cd_utf8_to_88591,
345 ASSERT (result != NULL);
346 ASSERT (strcmp (result, expected) == 0);
350 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
351 for (h = 0; h < SIZEOF (handlers); h++)
353 enum iconv_ilseq_handler handler = handlers[h];
354 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
355 char *result = str_cd_iconveh (input,
357 cd_88592_to_utf8, cd_utf8_to_88591,
362 ASSERT (result == NULL && errno == EILSEQ);
364 case iconveh_question_mark:
366 static const char expected[] = "Rafa? Maszkowski";
367 ASSERT (result != NULL);
368 ASSERT (strcmp (result, expected) == 0);
372 case iconveh_escape_sequence:
374 static const char expected[] = "Rafa\\u0142 Maszkowski";
375 ASSERT (result != NULL);
376 ASSERT (strcmp (result, expected) == 0);
383 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
384 for (h = 0; h < SIZEOF (handlers); h++)
386 enum iconv_ilseq_handler handler = handlers[h];
387 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
388 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
389 char *result = str_cd_iconveh (input,
391 cd_88591_to_utf8, (iconv_t)(-1),
393 ASSERT (result != NULL);
394 ASSERT (strcmp (result, expected) == 0);
398 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
399 for (h = 0; h < SIZEOF (handlers); h++)
401 enum iconv_ilseq_handler handler = handlers[h];
402 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
403 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
404 char *result = str_cd_iconveh (input,
406 (iconv_t)(-1), cd_utf8_to_88591,
408 ASSERT (result != NULL);
409 ASSERT (strcmp (result, expected) == 0);
413 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
414 for (h = 0; h < SIZEOF (handlers); h++)
416 enum iconv_ilseq_handler handler = handlers[h];
417 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
418 char *result = str_cd_iconveh (input,
420 (iconv_t)(-1), cd_utf8_to_88591,
425 ASSERT (result == NULL && errno == EILSEQ);
427 case iconveh_question_mark:
429 static const char expected[] = "Costs: 27 ?";
430 ASSERT (result != NULL);
431 ASSERT (strcmp (result, expected) == 0);
435 case iconveh_escape_sequence:
437 static const char expected[] = "Costs: 27 \\u20AC";
438 ASSERT (result != NULL);
439 ASSERT (strcmp (result, expected) == 0);
446 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
447 for (h = 0; h < SIZEOF (handlers); h++)
449 enum iconv_ilseq_handler handler = handlers[h];
450 static const char input[] = "\342";
451 char *result = str_cd_iconveh (input,
453 (iconv_t)(-1), cd_utf8_to_88591,
455 ASSERT (result != NULL);
456 ASSERT (strcmp (result, "") == 0);
460 iconv_close (cd_88591_to_88592);
461 iconv_close (cd_88592_to_88591);
462 iconv_close (cd_88591_to_utf8);
463 iconv_close (cd_utf8_to_88591);
464 iconv_close (cd_88592_to_utf8);
465 iconv_close (cd_utf8_to_88592);
467 /* ------------------------- Test mem_iconveh() ------------------------- */
469 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
470 for (h = 0; h < SIZEOF (handlers); h++)
472 enum iconv_ilseq_handler handler = handlers[h];
473 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
474 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
475 for (o = 0; o < 2; o++)
477 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
480 int retval = mem_iconveh (input, strlen (input),
481 "ISO-8859-2", "ISO-8859-1",
485 ASSERT (retval == 0);
486 ASSERT (length == strlen (expected));
487 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
490 for (i = 0; i < 37; i++)
491 ASSERT (offsets[i] == i);
492 ASSERT (offsets[37] == MAGIC);
499 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
500 for (h = 0; h < SIZEOF (handlers); h++)
502 enum iconv_ilseq_handler handler = handlers[h];
503 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
504 for (o = 0; o < 2; o++)
506 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
509 int retval = mem_iconveh (input, strlen (input),
510 "ISO-8859-2", "ISO-8859-1",
517 ASSERT (retval == -1 && errno == EILSEQ);
518 ASSERT (result == NULL);
522 case iconveh_question_mark:
524 static const char expected[] = "Rafa? Maszkowski";
525 ASSERT (retval == 0);
526 ASSERT (length == strlen (expected));
527 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
530 for (i = 0; i < 16; i++)
531 ASSERT (offsets[i] == i);
532 ASSERT (offsets[16] == MAGIC);
538 case iconveh_escape_sequence:
540 static const char expected[] = "Rafa\\u0142 Maszkowski";
541 ASSERT (retval == 0);
542 ASSERT (length == strlen (expected));
543 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
546 for (i = 0; i < 16; i++)
547 ASSERT (offsets[i] == (i < 5 ? i :
549 ASSERT (offsets[16] == MAGIC);
559 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
560 for (h = 0; h < SIZEOF (handlers); h++)
562 enum iconv_ilseq_handler handler = handlers[h];
563 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
564 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
565 for (o = 0; o < 2; o++)
567 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
570 int retval = mem_iconveh (input, strlen (input),
571 "ISO-8859-1", "UTF-8",
575 ASSERT (retval == 0);
576 ASSERT (length == strlen (expected));
577 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
580 for (i = 0; i < 37; i++)
581 ASSERT (offsets[i] == (i < 1 ? i :
585 ASSERT (offsets[37] == MAGIC);
592 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
593 for (h = 0; h < SIZEOF (handlers); h++)
595 enum iconv_ilseq_handler handler = handlers[h];
596 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
597 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
598 for (o = 0; o < 2; o++)
600 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
603 int retval = mem_iconveh (input, strlen (input),
604 "UTF-8", "ISO-8859-1",
608 ASSERT (retval == 0);
609 ASSERT (length == strlen (expected));
610 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
613 for (i = 0; i < 41; i++)
614 ASSERT (offsets[i] == (i < 1 ? i :
615 i == 1 ? (size_t)(-1) :
617 i == 13 ? (size_t)(-1) :
619 i == 20 ? (size_t)(-1) :
622 ASSERT (offsets[41] == MAGIC);
629 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
630 for (h = 0; h < SIZEOF (handlers); h++)
632 enum iconv_ilseq_handler handler = handlers[h];
633 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
634 for (o = 0; o < 2; o++)
636 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
639 int retval = mem_iconveh (input, strlen (input),
640 "UTF-8", "ISO-8859-1",
647 ASSERT (retval == -1 && errno == EILSEQ);
648 ASSERT (result == NULL);
652 case iconveh_question_mark:
654 static const char expected[] = "Rafa? Maszkowski";
655 ASSERT (retval == 0);
656 ASSERT (length == strlen (expected));
657 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
660 for (i = 0; i < 17; i++)
661 ASSERT (offsets[i] == (i < 5 ? i :
662 i == 5 ? (size_t)(-1) :
664 ASSERT (offsets[17] == MAGIC);
670 case iconveh_escape_sequence:
672 static const char expected[] = "Rafa\\u0142 Maszkowski";
673 ASSERT (retval == 0);
674 ASSERT (length == strlen (expected));
675 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
678 for (i = 0; i < 17; i++)
679 ASSERT (offsets[i] == (i < 5 ? i :
680 i == 5 ? (size_t)(-1) :
682 ASSERT (offsets[17] == MAGIC);
692 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
693 for (h = 0; h < SIZEOF (handlers); h++)
695 enum iconv_ilseq_handler handler = handlers[h];
696 static const char input[] = "\342";
697 for (o = 0; o < 2; o++)
699 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
702 int retval = mem_iconveh (input, strlen (input),
703 "UTF-8", "ISO-8859-1",
707 ASSERT (retval == 0);
708 ASSERT (length == 0);
711 ASSERT (offsets[0] == 0);
712 ASSERT (offsets[1] == MAGIC);
720 /* ------------------------- Test str_iconveh() ------------------------- */
722 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
723 for (h = 0; h < SIZEOF (handlers); h++)
725 enum iconv_ilseq_handler handler = handlers[h];
726 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
727 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
728 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
729 ASSERT (result != NULL);
730 ASSERT (strcmp (result, expected) == 0);
734 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
735 for (h = 0; h < SIZEOF (handlers); h++)
737 enum iconv_ilseq_handler handler = handlers[h];
738 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
739 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
743 ASSERT (result == NULL && errno == EILSEQ);
745 case iconveh_question_mark:
747 static const char expected[] = "Rafa? Maszkowski";
748 ASSERT (result != NULL);
749 ASSERT (strcmp (result, expected) == 0);
753 case iconveh_escape_sequence:
755 static const char expected[] = "Rafa\\u0142 Maszkowski";
756 ASSERT (result != NULL);
757 ASSERT (strcmp (result, expected) == 0);
764 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
765 for (h = 0; h < SIZEOF (handlers); h++)
767 enum iconv_ilseq_handler handler = handlers[h];
768 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
769 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
770 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
771 ASSERT (result != NULL);
772 ASSERT (strcmp (result, expected) == 0);
776 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
777 for (h = 0; h < SIZEOF (handlers); h++)
779 enum iconv_ilseq_handler handler = handlers[h];
780 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
781 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
782 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
783 ASSERT (result != NULL);
784 ASSERT (strcmp (result, expected) == 0);
788 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
789 for (h = 0; h < SIZEOF (handlers); h++)
791 enum iconv_ilseq_handler handler = handlers[h];
792 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
793 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
797 ASSERT (result == NULL && errno == EILSEQ);
799 case iconveh_question_mark:
801 static const char expected[] = "Costs: 27 ?";
802 ASSERT (result != NULL);
803 ASSERT (strcmp (result, expected) == 0);
807 case iconveh_escape_sequence:
809 static const char expected[] = "Costs: 27 \\u20AC";
810 ASSERT (result != NULL);
811 ASSERT (strcmp (result, expected) == 0);
818 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
819 for (h = 0; h < SIZEOF (handlers); h++)
821 enum iconv_ilseq_handler handler = handlers[h];
822 static const char input[] = "\342";
823 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
824 ASSERT (result != NULL);
825 ASSERT (strcmp (result, "") == 0);