X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=tests%2Ftest-striconveh.c;h=22b48757298df84639f15512123f4d4737c205d7;hb=711f4a0cd352147964e22b129cf506bd128b10dd;hp=2557baa48ea2200186bf69da69c3e568c9a2a7c3;hpb=e220635fc952c63b533873f2af39ec76aed4b4e3;p=gnulib.git diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c index 2557baa48..22b487572 100644 --- a/tests/test-striconveh.c +++ b/tests/test-striconveh.c @@ -1,5 +1,5 @@ /* Test of character set conversion with error handling. - Copyright (C) 2007-2008 Free Software Foundation, Inc. + Copyright (C) 2007-2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -58,6 +58,7 @@ main () { static enum iconv_ilseq_handler handlers[] = { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; + size_t indirect; size_t h; size_t o; size_t i; @@ -71,102 +72,136 @@ main () iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8"); iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2"); iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8"); + iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7"); + iconveh_t cdeh_88592_to_88591; + iconveh_t cdeh_88592_to_88591_indirectly; + iconveh_t cdeh_88591_to_utf8; + iconveh_t cdeh_utf8_to_88591; + iconveh_t cdeh_utf7_to_utf8; ASSERT (cd_88591_to_utf8 != (iconv_t)(-1)); ASSERT (cd_utf8_to_88591 != (iconv_t)(-1)); ASSERT (cd_88592_to_utf8 != (iconv_t)(-1)); ASSERT (cd_utf8_to_88592 != (iconv_t)(-1)); + cdeh_88592_to_88591.cd = cd_88592_to_88591; + cdeh_88592_to_88591.cd1 = cd_88592_to_utf8; + cdeh_88592_to_88591.cd2 = cd_utf8_to_88591; + + cdeh_88592_to_88591_indirectly.cd = (iconv_t)(-1); + cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8; + cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591; + + cdeh_88591_to_utf8.cd = cd_88591_to_utf8; + cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8; + cdeh_88591_to_utf8.cd2 = (iconv_t)(-1); + + cdeh_utf8_to_88591.cd = cd_utf8_to_88591; + cdeh_utf8_to_88591.cd1 = (iconv_t)(-1); + cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591; + + cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8; + cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8; + cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1); + /* ------------------------ Test mem_cd_iconveh() ------------------------ */ /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */ - for (h = 0; h < SIZEOF (handlers); h++) + for (indirect = 0; indirect <= 1; indirect++) { - enum iconv_ilseq_handler handler = handlers[h]; - static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - for (o = 0; o < 2; o++) + for (h = 0; h < SIZEOF (handlers); h++) { - size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_88592_to_88591, - cd_88592_to_utf8, cd_utf8_to_88591, - handler, - offsets, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - if (o) + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + for (o = 0; o < 2; o++) { - for (i = 0; i < 37; i++) - ASSERT (offsets[i] == i); - ASSERT (offsets[37] == MAGIC); - free (offsets); + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + (indirect + ? &cdeh_88592_to_88591_indirectly + : &cdeh_88592_to_88591), + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); } - free (result); } } /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */ - for (h = 0; h < SIZEOF (handlers); h++) + for (indirect = 0; indirect <= 1; indirect++) { - enum iconv_ilseq_handler handler = handlers[h]; - static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ - for (o = 0; o < 2; o++) + for (h = 0; h < SIZEOF (handlers); h++) { - size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_88592_to_88591, - cd_88592_to_utf8, cd_utf8_to_88591, - handler, - offsets, - &result, &length); - switch (handler) + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ + for (o = 0; o < 2; o++) { - case iconveh_error: - ASSERT (retval == -1 && errno == EILSEQ); - ASSERT (result == NULL); - if (o) - free (offsets); - break; - case iconveh_question_mark: - { - static const char expected[] = "Rafa? Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - if (o) - { - for (i = 0; i < 16; i++) - ASSERT (offsets[i] == i); - ASSERT (offsets[16] == MAGIC); + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + (indirect + ? &cdeh_88592_to_88591_indirectly + : &cdeh_88592_to_88591), + handler, + offsets, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + if (o) free (offsets); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); } - free (result); - } - break; - case iconveh_escape_sequence: - { - static const char expected[] = "Rafa\\u0142 Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - if (o) + break; + case iconveh_escape_sequence: { - for (i = 0; i < 16; i++) - ASSERT (offsets[i] == (i < 5 ? i : - i + 5)); - ASSERT (offsets[16] == MAGIC); - free (offsets); + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i + 5)); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); } - free (result); - } - break; + break; + } } } } @@ -183,8 +218,7 @@ main () char *result = NULL; size_t length = 0; int retval = mem_cd_iconveh (input, strlen (input), - cd_88591_to_utf8, - cd_88591_to_utf8, (iconv_t)(-1), + &cdeh_88591_to_utf8, handler, offsets, &result, &length); @@ -217,8 +251,7 @@ main () char *result = NULL; size_t length = 0; int retval = mem_cd_iconveh (input, strlen (input), - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, + &cdeh_utf8_to_88591, handler, offsets, &result, &length); @@ -254,8 +287,7 @@ main () char *result = NULL; size_t length = 0; int retval = mem_cd_iconveh (input, strlen (input), - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, + &cdeh_utf8_to_88591, handler, offsets, &result, &length); @@ -318,8 +350,7 @@ main () char *result = NULL; size_t length = 0; int retval = mem_cd_iconveh (input, strlen (input), - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, + &cdeh_utf8_to_88591, handler, offsets, &result, &length); @@ -335,53 +366,142 @@ main () } } + if (cd_utf7_to_utf8 != (iconv_t)(-1)) + { + /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns + -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */ +# if !(defined __sun && !defined _LIBICONV_VERSION) + /* Test conversion from UTF-7 to UTF-8 with EINVAL. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would + convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */ + static const char input[] = "+VDLYP9hA"; + static const char expected1[] = "\345\220\262"; /* 吲 glibc */ + static const char expected2[] = ""; /* libiconv */ + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, 7, + &cdeh_utf7_to_utf8, + handler, + NULL, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected1) || length == strlen (expected2)); + ASSERT (result != NULL); + if (length == strlen (expected1)) + ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); + else + ASSERT (memcmp (result, expected2, strlen (expected2)) == 0); + free (result); + } + + /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would + convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */ + static const char input[] = "+2D/YQNhB"; + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + &cdeh_utf7_to_utf8, + handler, + NULL, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + break; + case iconveh_question_mark: + case iconveh_escape_sequence: + { + /* glibc result */ + static const char expected1[] = "?????"; + /* libiconv <= 1.12 result */ + static const char expected2[] = "?2D/YQNhB"; + /* libiconv behaviour changed in version 1.13: the result is + '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left + by 6 bits. */ + static const char expected3[] = "?\340\277\266\341\200\266"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected1) + || length == strlen (expected2) + || length == strlen (expected3)); + ASSERT (result != NULL); + if (length == strlen (expected1)) + ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); + else if (length == strlen (expected2)) + ASSERT (memcmp (result, expected2, strlen (expected2)) == 0); + else + ASSERT (memcmp (result, expected3, strlen (expected3)) == 0); + free (result); + } + break; + } + } +# endif + } + /* ------------------------ Test str_cd_iconveh() ------------------------ */ /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */ - for (h = 0; h < SIZEOF (handlers); h++) + for (indirect = 0; indirect <= 1; indirect++) { - enum iconv_ilseq_handler handler = handlers[h]; - static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - char *result = str_cd_iconveh (input, - cd_88592_to_88591, - cd_88592_to_utf8, cd_utf8_to_88591, - handler); - ASSERT (result != NULL); - ASSERT (strcmp (result, expected) == 0); - free (result); + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + char *result = str_cd_iconveh (input, + (indirect + ? &cdeh_88592_to_88591_indirectly + : &cdeh_88592_to_88591), + handler); + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } } /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */ - for (h = 0; h < SIZEOF (handlers); h++) + for (indirect = 0; indirect <= 1; indirect++) { - enum iconv_ilseq_handler handler = handlers[h]; - static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ - char *result = str_cd_iconveh (input, - cd_88592_to_88591, - cd_88592_to_utf8, cd_utf8_to_88591, - handler); - switch (handler) + for (h = 0; h < SIZEOF (handlers); h++) { - case iconveh_error: - ASSERT (result == NULL && errno == EILSEQ); - break; - case iconveh_question_mark: - { - static const char expected[] = "Rafa? Maszkowski"; - ASSERT (result != NULL); - ASSERT (strcmp (result, expected) == 0); - free (result); - } - break; - case iconveh_escape_sequence: - { - static const char expected[] = "Rafa\\u0142 Maszkowski"; - ASSERT (result != NULL); - ASSERT (strcmp (result, expected) == 0); - free (result); - } - break; + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ + char *result = str_cd_iconveh (input, + (indirect + ? &cdeh_88592_to_88591_indirectly + : &cdeh_88592_to_88591), + handler); + switch (handler) + { + case iconveh_error: + ASSERT (result == NULL && errno == EILSEQ); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } + break; + } } } @@ -392,8 +512,7 @@ main () static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; char *result = str_cd_iconveh (input, - cd_88591_to_utf8, - cd_88591_to_utf8, (iconv_t)(-1), + &cdeh_88591_to_utf8, handler); ASSERT (result != NULL); ASSERT (strcmp (result, expected) == 0); @@ -407,8 +526,7 @@ main () static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; char *result = str_cd_iconveh (input, - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, + &cdeh_utf8_to_88591, handler); ASSERT (result != NULL); ASSERT (strcmp (result, expected) == 0); @@ -421,8 +539,7 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */ char *result = str_cd_iconveh (input, - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, + &cdeh_utf8_to_88591, handler); switch (handler) { @@ -454,8 +571,7 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\342"; char *result = str_cd_iconveh (input, - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, + &cdeh_utf8_to_88591, handler); ASSERT (result != NULL); ASSERT (strcmp (result, "") == 0);