1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
32 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
33 #define ASSERT(expr) \
38 fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
44 /* Magic number for detecting bounds violations. */
45 #define MAGIC 0x1983EFF1
48 new_offsets (size_t n)
50 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
58 static enum iconv_ilseq_handler handlers[] =
59 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
65 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
66 ISO-8859-2, and UTF-8. */
67 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
68 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
69 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
70 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
71 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
72 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
74 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
75 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
76 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
77 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
79 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
81 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
82 for (h = 0; h < SIZEOF (handlers); h++)
84 enum iconv_ilseq_handler handler = handlers[h];
85 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
86 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
87 for (o = 0; o < 2; o++)
89 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
92 int retval = mem_cd_iconveh (input, strlen (input),
94 cd_88592_to_utf8, cd_utf8_to_88591,
99 ASSERT (length == strlen (expected));
100 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
103 for (i = 0; i < 37; i++)
104 ASSERT (offsets[i] == i);
105 ASSERT (offsets[37] == MAGIC);
112 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
113 for (h = 0; h < SIZEOF (handlers); h++)
115 enum iconv_ilseq_handler handler = handlers[h];
116 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
117 for (o = 0; o < 2; o++)
119 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
122 int retval = mem_cd_iconveh (input, strlen (input),
124 cd_88592_to_utf8, cd_utf8_to_88591,
131 ASSERT (retval == -1 && errno == EILSEQ);
132 ASSERT (result == NULL);
136 case iconveh_question_mark:
138 static const char expected[] = "Rafa? Maszkowski";
139 ASSERT (retval == 0);
140 ASSERT (length == strlen (expected));
141 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
144 for (i = 0; i < 16; i++)
145 ASSERT (offsets[i] == i);
146 ASSERT (offsets[16] == MAGIC);
152 case iconveh_escape_sequence:
154 static const char expected[] = "Rafa\\u0142 Maszkowski";
155 ASSERT (retval == 0);
156 ASSERT (length == strlen (expected));
157 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
160 for (i = 0; i < 16; i++)
161 ASSERT (offsets[i] == (i < 5 ? i :
163 ASSERT (offsets[16] == MAGIC);
173 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
174 for (h = 0; h < SIZEOF (handlers); h++)
176 enum iconv_ilseq_handler handler = handlers[h];
177 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
178 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
179 for (o = 0; o < 2; o++)
181 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
184 int retval = mem_cd_iconveh (input, strlen (input),
186 cd_88591_to_utf8, (iconv_t)(-1),
190 ASSERT (retval == 0);
191 ASSERT (length == strlen (expected));
192 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
195 for (i = 0; i < 37; i++)
196 ASSERT (offsets[i] == (i < 1 ? i :
200 ASSERT (offsets[37] == MAGIC);
207 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
208 for (h = 0; h < SIZEOF (handlers); h++)
210 enum iconv_ilseq_handler handler = handlers[h];
211 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
212 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
213 for (o = 0; o < 2; o++)
215 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
218 int retval = mem_cd_iconveh (input, strlen (input),
220 (iconv_t)(-1), cd_utf8_to_88591,
224 ASSERT (retval == 0);
225 ASSERT (length == strlen (expected));
226 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
229 for (i = 0; i < 41; i++)
230 ASSERT (offsets[i] == (i < 1 ? i :
231 i == 1 ? (size_t)(-1) :
233 i == 13 ? (size_t)(-1) :
235 i == 20 ? (size_t)(-1) :
238 ASSERT (offsets[41] == MAGIC);
245 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
246 for (h = 0; h < SIZEOF (handlers); h++)
248 enum iconv_ilseq_handler handler = handlers[h];
249 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
250 for (o = 0; o < 2; o++)
252 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
255 int retval = mem_cd_iconveh (input, strlen (input),
257 (iconv_t)(-1), cd_utf8_to_88591,
264 ASSERT (retval == -1 && errno == EILSEQ);
265 ASSERT (result == NULL);
269 case iconveh_question_mark:
271 static const char expected[] = "Rafa? Maszkowski";
272 ASSERT (retval == 0);
273 ASSERT (length == strlen (expected));
274 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
277 for (i = 0; i < 17; i++)
278 ASSERT (offsets[i] == (i < 5 ? i :
279 i == 5 ? (size_t)(-1) :
281 ASSERT (offsets[17] == MAGIC);
287 case iconveh_escape_sequence:
289 static const char expected[] = "Rafa\\u0142 Maszkowski";
290 ASSERT (retval == 0);
291 ASSERT (length == strlen (expected));
292 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
295 for (i = 0; i < 17; i++)
296 ASSERT (offsets[i] == (i < 5 ? i :
297 i == 5 ? (size_t)(-1) :
299 ASSERT (offsets[17] == MAGIC);
309 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
310 for (h = 0; h < SIZEOF (handlers); h++)
312 enum iconv_ilseq_handler handler = handlers[h];
313 static const char input[] = "\342";
314 for (o = 0; o < 2; o++)
316 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
319 int retval = mem_cd_iconveh (input, strlen (input),
321 (iconv_t)(-1), cd_utf8_to_88591,
325 ASSERT (retval == 0);
326 ASSERT (length == 0);
329 ASSERT (offsets[0] == 0);
330 ASSERT (offsets[1] == MAGIC);
338 /* ------------------------ Test str_cd_iconveh() ------------------------ */
340 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
341 for (h = 0; h < SIZEOF (handlers); h++)
343 enum iconv_ilseq_handler handler = handlers[h];
344 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
345 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
346 char *result = str_cd_iconveh (input,
348 cd_88592_to_utf8, cd_utf8_to_88591,
350 ASSERT (result != NULL);
351 ASSERT (strcmp (result, expected) == 0);
355 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
356 for (h = 0; h < SIZEOF (handlers); h++)
358 enum iconv_ilseq_handler handler = handlers[h];
359 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
360 char *result = str_cd_iconveh (input,
362 cd_88592_to_utf8, cd_utf8_to_88591,
367 ASSERT (result == NULL && errno == EILSEQ);
369 case iconveh_question_mark:
371 static const char expected[] = "Rafa? Maszkowski";
372 ASSERT (result != NULL);
373 ASSERT (strcmp (result, expected) == 0);
377 case iconveh_escape_sequence:
379 static const char expected[] = "Rafa\\u0142 Maszkowski";
380 ASSERT (result != NULL);
381 ASSERT (strcmp (result, expected) == 0);
388 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
389 for (h = 0; h < SIZEOF (handlers); h++)
391 enum iconv_ilseq_handler handler = handlers[h];
392 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
393 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
394 char *result = str_cd_iconveh (input,
396 cd_88591_to_utf8, (iconv_t)(-1),
398 ASSERT (result != NULL);
399 ASSERT (strcmp (result, expected) == 0);
403 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
404 for (h = 0; h < SIZEOF (handlers); h++)
406 enum iconv_ilseq_handler handler = handlers[h];
407 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
408 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
409 char *result = str_cd_iconveh (input,
411 (iconv_t)(-1), cd_utf8_to_88591,
413 ASSERT (result != NULL);
414 ASSERT (strcmp (result, expected) == 0);
418 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
419 for (h = 0; h < SIZEOF (handlers); h++)
421 enum iconv_ilseq_handler handler = handlers[h];
422 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
423 char *result = str_cd_iconveh (input,
425 (iconv_t)(-1), cd_utf8_to_88591,
430 ASSERT (result == NULL && errno == EILSEQ);
432 case iconveh_question_mark:
434 static const char expected[] = "Costs: 27 ?";
435 ASSERT (result != NULL);
436 ASSERT (strcmp (result, expected) == 0);
440 case iconveh_escape_sequence:
442 static const char expected[] = "Costs: 27 \\u20AC";
443 ASSERT (result != NULL);
444 ASSERT (strcmp (result, expected) == 0);
451 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
452 for (h = 0; h < SIZEOF (handlers); h++)
454 enum iconv_ilseq_handler handler = handlers[h];
455 static const char input[] = "\342";
456 char *result = str_cd_iconveh (input,
458 (iconv_t)(-1), cd_utf8_to_88591,
460 ASSERT (result != NULL);
461 ASSERT (strcmp (result, "") == 0);
465 if (cd_88591_to_88592 != (iconv_t)(-1))
466 iconv_close (cd_88591_to_88592);
467 if (cd_88592_to_88591 != (iconv_t)(-1))
468 iconv_close (cd_88592_to_88591);
469 iconv_close (cd_88591_to_utf8);
470 iconv_close (cd_utf8_to_88591);
471 iconv_close (cd_88592_to_utf8);
472 iconv_close (cd_utf8_to_88592);
474 /* ------------------------- Test mem_iconveh() ------------------------- */
476 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
477 for (h = 0; h < SIZEOF (handlers); h++)
479 enum iconv_ilseq_handler handler = handlers[h];
480 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
481 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
482 for (o = 0; o < 2; o++)
484 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
487 int retval = mem_iconveh (input, strlen (input),
488 "ISO-8859-2", "ISO-8859-1",
492 ASSERT (retval == 0);
493 ASSERT (length == strlen (expected));
494 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
497 for (i = 0; i < 37; i++)
498 ASSERT (offsets[i] == i);
499 ASSERT (offsets[37] == MAGIC);
506 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
507 for (h = 0; h < SIZEOF (handlers); h++)
509 enum iconv_ilseq_handler handler = handlers[h];
510 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
511 for (o = 0; o < 2; o++)
513 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
516 int retval = mem_iconveh (input, strlen (input),
517 "ISO-8859-2", "ISO-8859-1",
524 ASSERT (retval == -1 && errno == EILSEQ);
525 ASSERT (result == NULL);
529 case iconveh_question_mark:
531 static const char expected[] = "Rafa? Maszkowski";
532 ASSERT (retval == 0);
533 ASSERT (length == strlen (expected));
534 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
537 for (i = 0; i < 16; i++)
538 ASSERT (offsets[i] == i);
539 ASSERT (offsets[16] == MAGIC);
545 case iconveh_escape_sequence:
547 static const char expected[] = "Rafa\\u0142 Maszkowski";
548 ASSERT (retval == 0);
549 ASSERT (length == strlen (expected));
550 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
553 for (i = 0; i < 16; i++)
554 ASSERT (offsets[i] == (i < 5 ? i :
556 ASSERT (offsets[16] == MAGIC);
566 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
567 for (h = 0; h < SIZEOF (handlers); h++)
569 enum iconv_ilseq_handler handler = handlers[h];
570 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
571 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
572 for (o = 0; o < 2; o++)
574 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
577 int retval = mem_iconveh (input, strlen (input),
578 "ISO-8859-1", "UTF-8",
582 ASSERT (retval == 0);
583 ASSERT (length == strlen (expected));
584 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
587 for (i = 0; i < 37; i++)
588 ASSERT (offsets[i] == (i < 1 ? i :
592 ASSERT (offsets[37] == MAGIC);
599 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
600 for (h = 0; h < SIZEOF (handlers); h++)
602 enum iconv_ilseq_handler handler = handlers[h];
603 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
604 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
605 for (o = 0; o < 2; o++)
607 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
610 int retval = mem_iconveh (input, strlen (input),
611 "UTF-8", "ISO-8859-1",
615 ASSERT (retval == 0);
616 ASSERT (length == strlen (expected));
617 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
620 for (i = 0; i < 41; i++)
621 ASSERT (offsets[i] == (i < 1 ? i :
622 i == 1 ? (size_t)(-1) :
624 i == 13 ? (size_t)(-1) :
626 i == 20 ? (size_t)(-1) :
629 ASSERT (offsets[41] == MAGIC);
636 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
637 for (h = 0; h < SIZEOF (handlers); h++)
639 enum iconv_ilseq_handler handler = handlers[h];
640 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
641 for (o = 0; o < 2; o++)
643 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
646 int retval = mem_iconveh (input, strlen (input),
647 "UTF-8", "ISO-8859-1",
654 ASSERT (retval == -1 && errno == EILSEQ);
655 ASSERT (result == NULL);
659 case iconveh_question_mark:
661 static const char expected[] = "Rafa? Maszkowski";
662 ASSERT (retval == 0);
663 ASSERT (length == strlen (expected));
664 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
667 for (i = 0; i < 17; i++)
668 ASSERT (offsets[i] == (i < 5 ? i :
669 i == 5 ? (size_t)(-1) :
671 ASSERT (offsets[17] == MAGIC);
677 case iconveh_escape_sequence:
679 static const char expected[] = "Rafa\\u0142 Maszkowski";
680 ASSERT (retval == 0);
681 ASSERT (length == strlen (expected));
682 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
685 for (i = 0; i < 17; i++)
686 ASSERT (offsets[i] == (i < 5 ? i :
687 i == 5 ? (size_t)(-1) :
689 ASSERT (offsets[17] == MAGIC);
699 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
700 for (h = 0; h < SIZEOF (handlers); h++)
702 enum iconv_ilseq_handler handler = handlers[h];
703 static const char input[] = "\342";
704 for (o = 0; o < 2; o++)
706 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
709 int retval = mem_iconveh (input, strlen (input),
710 "UTF-8", "ISO-8859-1",
714 ASSERT (retval == 0);
715 ASSERT (length == 0);
718 ASSERT (offsets[0] == 0);
719 ASSERT (offsets[1] == MAGIC);
727 /* ------------------------- Test str_iconveh() ------------------------- */
729 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
730 for (h = 0; h < SIZEOF (handlers); h++)
732 enum iconv_ilseq_handler handler = handlers[h];
733 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
734 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
735 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
736 ASSERT (result != NULL);
737 ASSERT (strcmp (result, expected) == 0);
741 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
742 for (h = 0; h < SIZEOF (handlers); h++)
744 enum iconv_ilseq_handler handler = handlers[h];
745 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
746 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
750 ASSERT (result == NULL && errno == EILSEQ);
752 case iconveh_question_mark:
754 static const char expected[] = "Rafa? Maszkowski";
755 ASSERT (result != NULL);
756 ASSERT (strcmp (result, expected) == 0);
760 case iconveh_escape_sequence:
762 static const char expected[] = "Rafa\\u0142 Maszkowski";
763 ASSERT (result != NULL);
764 ASSERT (strcmp (result, expected) == 0);
771 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
772 for (h = 0; h < SIZEOF (handlers); h++)
774 enum iconv_ilseq_handler handler = handlers[h];
775 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
776 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
777 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
778 ASSERT (result != NULL);
779 ASSERT (strcmp (result, expected) == 0);
783 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
784 for (h = 0; h < SIZEOF (handlers); h++)
786 enum iconv_ilseq_handler handler = handlers[h];
787 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
788 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
789 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
790 ASSERT (result != NULL);
791 ASSERT (strcmp (result, expected) == 0);
795 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
796 for (h = 0; h < SIZEOF (handlers); h++)
798 enum iconv_ilseq_handler handler = handlers[h];
799 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
800 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
804 ASSERT (result == NULL && errno == EILSEQ);
806 case iconveh_question_mark:
808 static const char expected[] = "Costs: 27 ?";
809 ASSERT (result != NULL);
810 ASSERT (strcmp (result, expected) == 0);
814 case iconveh_escape_sequence:
816 static const char expected[] = "Costs: 27 \\u20AC";
817 ASSERT (result != NULL);
818 ASSERT (strcmp (result, expected) == 0);
825 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
826 for (h = 0; h < SIZEOF (handlers); h++)
828 enum iconv_ilseq_handler handler = handlers[h];
829 static const char input[] = "\342";
830 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
831 ASSERT (result != NULL);
832 ASSERT (strcmp (result, "") == 0);