1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
33 /* Magic number for detecting bounds violations. */
34 #define MAGIC 0x1983EFF1
37 new_offsets (size_t n)
39 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
47 static enum iconv_ilseq_handler handlers[] =
48 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
55 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
56 ISO-8859-2, and UTF-8. */
57 iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");
58 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
59 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
60 iconv_t cd_ascii_to_utf8 = iconv_open ("UTF-8", "ASCII");
61 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
62 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
63 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
64 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
65 iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
66 iconveh_t cdeh_ascii_to_88591;
67 iconveh_t cdeh_ascii_to_88591_indirectly;
68 iconveh_t cdeh_88592_to_88591;
69 iconveh_t cdeh_88592_to_88591_indirectly;
70 iconveh_t cdeh_ascii_to_utf8;
71 iconveh_t cdeh_88591_to_utf8;
72 iconveh_t cdeh_utf8_to_88591;
73 iconveh_t cdeh_utf7_to_utf8;
75 ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));
76 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
77 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
78 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
79 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
81 cdeh_ascii_to_88591.cd = cd_ascii_to_88591;
82 cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8;
83 cdeh_ascii_to_88591.cd2 = cd_utf8_to_88591;
85 cdeh_ascii_to_88591_indirectly.cd = (iconv_t)(-1);
86 cdeh_ascii_to_88591_indirectly.cd1 = cd_ascii_to_utf8;
87 cdeh_ascii_to_88591_indirectly.cd2 = cd_utf8_to_88591;
89 cdeh_88592_to_88591.cd = cd_88592_to_88591;
90 cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
91 cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
93 cdeh_88592_to_88591_indirectly.cd = (iconv_t)(-1);
94 cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8;
95 cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591;
97 cdeh_ascii_to_utf8.cd = cd_ascii_to_utf8;
98 cdeh_ascii_to_utf8.cd1 = cd_ascii_to_utf8;
99 cdeh_ascii_to_utf8.cd2 = (iconv_t)(-1);
101 cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
102 cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
103 cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
105 cdeh_utf8_to_88591.cd = cd_utf8_to_88591;
106 cdeh_utf8_to_88591.cd1 = (iconv_t)(-1);
107 cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591;
109 cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8;
110 cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
111 cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
113 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
115 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
116 for (indirect = 0; indirect <= 1; indirect++)
118 for (h = 0; h < SIZEOF (handlers); h++)
120 enum iconv_ilseq_handler handler = handlers[h];
121 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
122 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
123 for (o = 0; o < 2; o++)
125 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
128 int retval = mem_cd_iconveh (input, strlen (input),
130 ? &cdeh_88592_to_88591_indirectly
131 : &cdeh_88592_to_88591),
135 ASSERT (retval == 0);
136 ASSERT (length == strlen (expected));
137 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
140 for (i = 0; i < 37; i++)
141 ASSERT (offsets[i] == i);
142 ASSERT (offsets[37] == MAGIC);
150 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
151 for (indirect = 0; indirect <= 1; indirect++)
153 for (h = 0; h < SIZEOF (handlers); h++)
155 enum iconv_ilseq_handler handler = handlers[h];
156 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
157 for (o = 0; o < 2; o++)
159 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
162 int retval = mem_cd_iconveh (input, strlen (input),
164 ? &cdeh_ascii_to_88591_indirectly
165 : &cdeh_ascii_to_88591),
172 ASSERT (retval == -1 && errno == EILSEQ);
173 ASSERT (result == NULL);
177 case iconveh_question_mark:
178 case iconveh_escape_sequence:
180 static const char expected[] = "Rafa? Maszkowski";
181 ASSERT (retval == 0);
182 ASSERT (length == strlen (expected));
183 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
186 for (i = 0; i < 16; i++)
187 ASSERT (offsets[i] == i);
188 ASSERT (offsets[16] == MAGIC);
199 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
200 for (indirect = 0; indirect <= 1; indirect++)
202 for (h = 0; h < SIZEOF (handlers); h++)
204 enum iconv_ilseq_handler handler = handlers[h];
205 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
206 for (o = 0; o < 2; o++)
208 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
211 int retval = mem_cd_iconveh (input, strlen (input),
213 ? &cdeh_88592_to_88591_indirectly
214 : &cdeh_88592_to_88591),
221 ASSERT (retval == -1 && errno == EILSEQ);
222 ASSERT (result == NULL);
226 case iconveh_question_mark:
228 static const char expected[] = "Rafa? Maszkowski";
229 ASSERT (retval == 0);
230 ASSERT (length == strlen (expected));
231 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
234 for (i = 0; i < 16; i++)
235 ASSERT (offsets[i] == i);
236 ASSERT (offsets[16] == MAGIC);
242 case iconveh_escape_sequence:
244 static const char expected[] = "Rafa\\u0142 Maszkowski";
245 ASSERT (retval == 0);
246 ASSERT (length == strlen (expected));
247 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
250 for (i = 0; i < 16; i++)
251 ASSERT (offsets[i] == (i < 5 ? i :
253 ASSERT (offsets[16] == MAGIC);
264 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
265 for (h = 0; h < SIZEOF (handlers); h++)
267 enum iconv_ilseq_handler handler = handlers[h];
268 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
269 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
270 for (o = 0; o < 2; o++)
272 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
275 int retval = mem_cd_iconveh (input, strlen (input),
280 ASSERT (retval == 0);
281 ASSERT (length == strlen (expected));
282 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
285 for (i = 0; i < 37; i++)
286 ASSERT (offsets[i] == (i < 1 ? i :
290 ASSERT (offsets[37] == MAGIC);
297 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
298 for (h = 0; h < SIZEOF (handlers); h++)
300 enum iconv_ilseq_handler handler = handlers[h];
301 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
302 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
303 for (o = 0; o < 2; o++)
305 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
308 int retval = mem_cd_iconveh (input, strlen (input),
313 ASSERT (retval == 0);
314 ASSERT (length == strlen (expected));
315 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
318 for (i = 0; i < 41; i++)
319 ASSERT (offsets[i] == (i < 1 ? i :
320 i == 1 ? (size_t)(-1) :
322 i == 13 ? (size_t)(-1) :
324 i == 20 ? (size_t)(-1) :
327 ASSERT (offsets[41] == MAGIC);
334 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
335 for (h = 0; h < SIZEOF (handlers); h++)
337 enum iconv_ilseq_handler handler = handlers[h];
338 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
339 for (o = 0; o < 2; o++)
341 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
344 int retval = mem_cd_iconveh (input, strlen (input),
352 ASSERT (retval == -1 && errno == EILSEQ);
353 ASSERT (result == NULL);
357 case iconveh_question_mark:
358 case iconveh_escape_sequence:
360 static const char expected[] = "Rafa? Maszkowski";
361 ASSERT (retval == 0);
362 ASSERT (length == strlen (expected));
363 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
366 for (i = 0; i < 16; i++)
367 ASSERT (offsets[i] == i);
368 ASSERT (offsets[16] == MAGIC);
378 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
379 for (h = 0; h < SIZEOF (handlers); h++)
381 enum iconv_ilseq_handler handler = handlers[h];
382 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
383 for (o = 0; o < 2; o++)
385 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
388 int retval = mem_cd_iconveh (input, strlen (input),
396 ASSERT (retval == -1 && errno == EILSEQ);
397 ASSERT (result == NULL);
401 case iconveh_question_mark:
403 static const char expected[] = "Rafa? Maszkowski";
404 ASSERT (retval == 0);
405 ASSERT (length == strlen (expected));
406 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
409 for (i = 0; i < 17; i++)
410 ASSERT (offsets[i] == (i < 5 ? i :
411 i == 5 ? (size_t)(-1) :
413 ASSERT (offsets[17] == MAGIC);
419 case iconveh_escape_sequence:
421 static const char expected[] = "Rafa\\u0142 Maszkowski";
422 ASSERT (retval == 0);
423 ASSERT (length == strlen (expected));
424 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
427 for (i = 0; i < 17; i++)
428 ASSERT (offsets[i] == (i < 5 ? i :
429 i == 5 ? (size_t)(-1) :
431 ASSERT (offsets[17] == MAGIC);
441 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
442 for (h = 0; h < SIZEOF (handlers); h++)
444 enum iconv_ilseq_handler handler = handlers[h];
445 static const char input[] = "\342";
446 for (o = 0; o < 2; o++)
448 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
451 int retval = mem_cd_iconveh (input, strlen (input),
456 ASSERT (retval == 0);
457 ASSERT (length == 0);
460 ASSERT (offsets[0] == 0);
461 ASSERT (offsets[1] == MAGIC);
468 if (cd_utf7_to_utf8 != (iconv_t)(-1))
470 /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
471 -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */
472 # if !(defined __sun && !defined _LIBICONV_VERSION)
473 /* Test conversion from UTF-7 to UTF-8 with EINVAL. */
474 for (h = 0; h < SIZEOF (handlers); h++)
476 enum iconv_ilseq_handler handler = handlers[h];
477 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
478 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
479 static const char input[] = "+VDLYP9hA";
480 static const char expected1[] = "\345\220\262"; /* 吲 glibc */
481 static const char expected2[] = ""; /* libiconv */
484 int retval = mem_cd_iconveh (input, 7,
489 ASSERT (retval == 0);
490 ASSERT (length == strlen (expected1) || length == strlen (expected2));
491 ASSERT (result != NULL);
492 if (length == strlen (expected1))
493 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
495 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
499 /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */
500 for (h = 0; h < SIZEOF (handlers); h++)
502 enum iconv_ilseq_handler handler = handlers[h];
503 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
504 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
505 static const char input[] = "+2D/YQNhB";
508 int retval = mem_cd_iconveh (input, strlen (input),
516 ASSERT (retval == -1 && errno == EILSEQ);
517 ASSERT (result == NULL);
519 case iconveh_question_mark:
520 case iconveh_escape_sequence:
523 static const char expected1[] = "?????";
524 /* libiconv <= 1.12 result */
525 static const char expected2[] = "?2D/YQNhB";
526 /* libiconv behaviour changed in version 1.13: the result is
527 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
529 static const char expected3[] = "?\340\277\266\341\200\266";
530 ASSERT (retval == 0);
531 ASSERT (length == strlen (expected1)
532 || length == strlen (expected2)
533 || length == strlen (expected3));
534 ASSERT (result != NULL);
535 if (length == strlen (expected1))
536 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
537 else if (length == strlen (expected2))
538 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
540 ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
549 /* ------------------------ Test str_cd_iconveh() ------------------------ */
551 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
552 for (indirect = 0; indirect <= 1; indirect++)
554 for (h = 0; h < SIZEOF (handlers); h++)
556 enum iconv_ilseq_handler handler = handlers[h];
557 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
558 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
559 char *result = str_cd_iconveh (input,
561 ? &cdeh_88592_to_88591_indirectly
562 : &cdeh_88592_to_88591),
564 ASSERT (result != NULL);
565 ASSERT (strcmp (result, expected) == 0);
570 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
571 for (indirect = 0; indirect <= 1; indirect++)
573 for (h = 0; h < SIZEOF (handlers); h++)
575 enum iconv_ilseq_handler handler = handlers[h];
576 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
577 char *result = str_cd_iconveh (input,
579 ? &cdeh_ascii_to_88591_indirectly
580 : &cdeh_ascii_to_88591),
585 ASSERT (result == NULL && errno == EILSEQ);
587 case iconveh_question_mark:
588 case iconveh_escape_sequence:
590 static const char expected[] = "Rafa? Maszkowski";
591 ASSERT (result != NULL);
592 ASSERT (strcmp (result, expected) == 0);
600 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
601 for (indirect = 0; indirect <= 1; indirect++)
603 for (h = 0; h < SIZEOF (handlers); h++)
605 enum iconv_ilseq_handler handler = handlers[h];
606 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
607 char *result = str_cd_iconveh (input,
609 ? &cdeh_88592_to_88591_indirectly
610 : &cdeh_88592_to_88591),
615 ASSERT (result == NULL && errno == EILSEQ);
617 case iconveh_question_mark:
619 static const char expected[] = "Rafa? Maszkowski";
620 ASSERT (result != NULL);
621 ASSERT (strcmp (result, expected) == 0);
625 case iconveh_escape_sequence:
627 static const char expected[] = "Rafa\\u0142 Maszkowski";
628 ASSERT (result != NULL);
629 ASSERT (strcmp (result, expected) == 0);
637 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
638 for (h = 0; h < SIZEOF (handlers); h++)
640 enum iconv_ilseq_handler handler = handlers[h];
641 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
642 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
643 char *result = str_cd_iconveh (input,
646 ASSERT (result != NULL);
647 ASSERT (strcmp (result, expected) == 0);
651 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
652 for (h = 0; h < SIZEOF (handlers); h++)
654 enum iconv_ilseq_handler handler = handlers[h];
655 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
656 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
657 char *result = str_cd_iconveh (input,
660 ASSERT (result != NULL);
661 ASSERT (strcmp (result, expected) == 0);
665 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
666 for (h = 0; h < SIZEOF (handlers); h++)
668 enum iconv_ilseq_handler handler = handlers[h];
669 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
670 char *result = str_cd_iconveh (input,
676 ASSERT (result == NULL && errno == EILSEQ);
678 case iconveh_question_mark:
679 case iconveh_escape_sequence:
681 static const char expected[] = "Rafa? Maszkowski";
682 ASSERT (result != NULL);
683 ASSERT (strcmp (result, expected) == 0);
690 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
691 for (h = 0; h < SIZEOF (handlers); h++)
693 enum iconv_ilseq_handler handler = handlers[h];
694 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
695 char *result = str_cd_iconveh (input,
701 ASSERT (result == NULL && errno == EILSEQ);
703 case iconveh_question_mark:
705 static const char expected[] = "Costs: 27 ?";
706 ASSERT (result != NULL);
707 ASSERT (strcmp (result, expected) == 0);
711 case iconveh_escape_sequence:
713 static const char expected[] = "Costs: 27 \\u20AC";
714 ASSERT (result != NULL);
715 ASSERT (strcmp (result, expected) == 0);
722 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
723 for (h = 0; h < SIZEOF (handlers); h++)
725 enum iconv_ilseq_handler handler = handlers[h];
726 static const char input[] = "\342";
727 char *result = str_cd_iconveh (input,
730 ASSERT (result != NULL);
731 ASSERT (strcmp (result, "") == 0);
735 if (cd_88591_to_88592 != (iconv_t)(-1))
736 iconv_close (cd_88591_to_88592);
737 if (cd_88592_to_88591 != (iconv_t)(-1))
738 iconv_close (cd_88592_to_88591);
739 iconv_close (cd_88591_to_utf8);
740 iconv_close (cd_utf8_to_88591);
741 iconv_close (cd_88592_to_utf8);
742 iconv_close (cd_utf8_to_88592);
744 /* ------------------------- Test mem_iconveh() ------------------------- */
746 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
747 for (h = 0; h < SIZEOF (handlers); h++)
749 enum iconv_ilseq_handler handler = handlers[h];
750 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
751 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
752 for (o = 0; o < 2; o++)
754 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
757 int retval = mem_iconveh (input, strlen (input),
758 "ISO-8859-2", "ISO-8859-1",
762 ASSERT (retval == 0);
763 ASSERT (length == strlen (expected));
764 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
767 for (i = 0; i < 37; i++)
768 ASSERT (offsets[i] == i);
769 ASSERT (offsets[37] == MAGIC);
776 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
777 for (h = 0; h < SIZEOF (handlers); h++)
779 enum iconv_ilseq_handler handler = handlers[h];
780 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
781 for (o = 0; o < 2; o++)
783 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
786 int retval = mem_iconveh (input, strlen (input),
787 "ISO-8859-2", "ISO-8859-1",
794 ASSERT (retval == -1 && errno == EILSEQ);
795 ASSERT (result == NULL);
799 case iconveh_question_mark:
801 static const char expected[] = "Rafa? Maszkowski";
802 ASSERT (retval == 0);
803 ASSERT (length == strlen (expected));
804 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
807 for (i = 0; i < 16; i++)
808 ASSERT (offsets[i] == i);
809 ASSERT (offsets[16] == MAGIC);
815 case iconveh_escape_sequence:
817 static const char expected[] = "Rafa\\u0142 Maszkowski";
818 ASSERT (retval == 0);
819 ASSERT (length == strlen (expected));
820 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
823 for (i = 0; i < 16; i++)
824 ASSERT (offsets[i] == (i < 5 ? i :
826 ASSERT (offsets[16] == MAGIC);
836 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
837 for (h = 0; h < SIZEOF (handlers); h++)
839 enum iconv_ilseq_handler handler = handlers[h];
840 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
841 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
842 for (o = 0; o < 2; o++)
844 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
847 int retval = mem_iconveh (input, strlen (input),
848 "ISO-8859-1", "UTF-8",
852 ASSERT (retval == 0);
853 ASSERT (length == strlen (expected));
854 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
857 for (i = 0; i < 37; i++)
858 ASSERT (offsets[i] == (i < 1 ? i :
862 ASSERT (offsets[37] == MAGIC);
869 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
870 for (h = 0; h < SIZEOF (handlers); h++)
872 enum iconv_ilseq_handler handler = handlers[h];
873 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
874 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
875 for (o = 0; o < 2; o++)
877 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
880 int retval = mem_iconveh (input, strlen (input),
881 "UTF-8", "ISO-8859-1",
885 ASSERT (retval == 0);
886 ASSERT (length == strlen (expected));
887 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
890 for (i = 0; i < 41; i++)
891 ASSERT (offsets[i] == (i < 1 ? i :
892 i == 1 ? (size_t)(-1) :
894 i == 13 ? (size_t)(-1) :
896 i == 20 ? (size_t)(-1) :
899 ASSERT (offsets[41] == MAGIC);
906 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
907 for (h = 0; h < SIZEOF (handlers); h++)
909 enum iconv_ilseq_handler handler = handlers[h];
910 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
911 for (o = 0; o < 2; o++)
913 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
916 int retval = mem_iconveh (input, strlen (input),
917 "UTF-8", "ISO-8859-1",
924 ASSERT (retval == -1 && errno == EILSEQ);
925 ASSERT (result == NULL);
929 case iconveh_question_mark:
931 static const char expected[] = "Rafa? Maszkowski";
932 ASSERT (retval == 0);
933 ASSERT (length == strlen (expected));
934 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
937 for (i = 0; i < 17; i++)
938 ASSERT (offsets[i] == (i < 5 ? i :
939 i == 5 ? (size_t)(-1) :
941 ASSERT (offsets[17] == MAGIC);
947 case iconveh_escape_sequence:
949 static const char expected[] = "Rafa\\u0142 Maszkowski";
950 ASSERT (retval == 0);
951 ASSERT (length == strlen (expected));
952 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
955 for (i = 0; i < 17; i++)
956 ASSERT (offsets[i] == (i < 5 ? i :
957 i == 5 ? (size_t)(-1) :
959 ASSERT (offsets[17] == MAGIC);
969 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
970 for (h = 0; h < SIZEOF (handlers); h++)
972 enum iconv_ilseq_handler handler = handlers[h];
973 static const char input[] = "\342";
974 for (o = 0; o < 2; o++)
976 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
979 int retval = mem_iconveh (input, strlen (input),
980 "UTF-8", "ISO-8859-1",
984 ASSERT (retval == 0);
985 ASSERT (length == 0);
988 ASSERT (offsets[0] == 0);
989 ASSERT (offsets[1] == MAGIC);
996 /* ------------------------- Test str_iconveh() ------------------------- */
998 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
999 for (h = 0; h < SIZEOF (handlers); h++)
1001 enum iconv_ilseq_handler handler = handlers[h];
1002 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1003 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1004 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1005 ASSERT (result != NULL);
1006 ASSERT (strcmp (result, expected) == 0);
1010 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1011 for (h = 0; h < SIZEOF (handlers); h++)
1013 enum iconv_ilseq_handler handler = handlers[h];
1014 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1015 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1019 ASSERT (result == NULL && errno == EILSEQ);
1021 case iconveh_question_mark:
1023 static const char expected[] = "Rafa? Maszkowski";
1024 ASSERT (result != NULL);
1025 ASSERT (strcmp (result, expected) == 0);
1029 case iconveh_escape_sequence:
1031 static const char expected[] = "Rafa\\u0142 Maszkowski";
1032 ASSERT (result != NULL);
1033 ASSERT (strcmp (result, expected) == 0);
1040 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1041 for (h = 0; h < SIZEOF (handlers); h++)
1043 enum iconv_ilseq_handler handler = handlers[h];
1044 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1045 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1046 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
1047 ASSERT (result != NULL);
1048 ASSERT (strcmp (result, expected) == 0);
1052 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1053 for (h = 0; h < SIZEOF (handlers); h++)
1055 enum iconv_ilseq_handler handler = handlers[h];
1056 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1057 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1058 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1059 ASSERT (result != NULL);
1060 ASSERT (strcmp (result, expected) == 0);
1064 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1065 for (h = 0; h < SIZEOF (handlers); h++)
1067 enum iconv_ilseq_handler handler = handlers[h];
1068 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1069 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1073 ASSERT (result == NULL && errno == EILSEQ);
1075 case iconveh_question_mark:
1077 static const char expected[] = "Costs: 27 ?";
1078 ASSERT (result != NULL);
1079 ASSERT (strcmp (result, expected) == 0);
1083 case iconveh_escape_sequence:
1085 static const char expected[] = "Costs: 27 \\u20AC";
1086 ASSERT (result != NULL);
1087 ASSERT (strcmp (result, expected) == 0);
1094 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1095 for (h = 0; h < SIZEOF (handlers); h++)
1097 enum iconv_ilseq_handler handler = handlers[h];
1098 static const char input[] = "\342";
1099 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1100 ASSERT (result != NULL);
1101 ASSERT (strcmp (result, "") == 0);