From 826fd3d7e36c3538595e9eacad8517040812b99b Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 21 Jan 2007 22:04:33 +0000 Subject: [PATCH] New function mem_iconveh. --- lib/striconveh.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++ lib/striconveh.h | 17 ++++++ tests/test-striconveh.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 314 insertions(+) diff --git a/lib/striconveh.c b/lib/striconveh.c index 07e8e2a3e..c27ef98eb 100644 --- a/lib/striconveh.c +++ b/lib/striconveh.c @@ -766,6 +766,154 @@ str_cd_iconveh (const char *src, #endif +int +mem_iconveh (const char *src, size_t srclen, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler, + char **resultp, size_t *lengthp) +{ + if (c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result; + + if (*resultp != NULL && *lengthp >= srclen) + result = *resultp; + else + { + result = (char *) malloc (srclen); + if (result == NULL) + { + errno = ENOMEM; + return -1; + } + } + memcpy (result, src, srclen); + *resultp = result; + *lengthp = srclen; + return 0; + } + else + { +#if HAVE_ICONV + iconv_t cd; + iconv_t cd1; + iconv_t cd2; + char *result; + size_t length; + int retval; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (c_strcasecmp (from_codeset, "EUC-KR") == 0 + || c_strcasecmp (to_codeset, "EUC-KR") == 0) + { + errno = EINVAL; + return -1; + } +# endif + + cd = iconv_open (to_codeset, from_codeset); + if (cd == (iconv_t)(-1)) + return -1; + + if (c_strcasecmp (from_codeset, "UTF-8") == 0) + cd1 = (iconv_t)(-1); + else + { + cd1 = iconv_open ("UTF-8", from_codeset); + if (cd1 == (iconv_t)(-1)) + { + int saved_errno = errno; + iconv_close (cd); + errno = saved_errno; + return -1; + } + } + + if (c_strcasecmp (to_codeset, "UTF-8") == 0) + cd2 = (iconv_t)(-1); + else + { + cd2 = iconv_open (to_codeset, "UTF-8"); + if (cd2 == (iconv_t)(-1)) + { + int saved_errno = errno; + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + iconv_close (cd); + errno = saved_errno; + return -1; + } + } + + result = *resultp; + length = *lengthp; + retval = + mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, &result, &length); + + if (retval < 0) + { + /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */ + int saved_errno = errno; + if (cd2 != (iconv_t)(-1)) + iconv_close (cd2); + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + iconv_close (cd); + errno = saved_errno; + } + else + { + if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0) + { + /* Return -1, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (cd1 != (iconv_t)(-1)) + iconv_close (cd1); + iconv_close (cd); + if (result != *resultp && result != NULL) + free (result); + errno = saved_errno; + return -1; + } + if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0) + { + /* Return -1, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + iconv_close (cd); + if (result != *resultp && result != NULL) + free (result); + errno = saved_errno; + return -1; + } + if (iconv_close (cd) < 0) + { + /* Return -1, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + if (result != *resultp && result != NULL) + free (result); + errno = saved_errno; + return -1; + } + *resultp = result; + *lengthp = length; + } + return retval; +#else + /* This is a different error code than if iconv_open existed but didn't + support from_codeset and to_codeset, so that the caller can emit + an error message such as + "iconv() is not supported. Installing GNU libiconv and + then reinstalling this package would fix this." */ + errno = ENOSYS; + return -1; +#endif + } +} + char * str_iconveh (const char *src, const char *from_codeset, const char *to_codeset, diff --git a/lib/striconveh.h b/lib/striconveh.h index 2ea7d540f..3dc153955 100644 --- a/lib/striconveh.h +++ b/lib/striconveh.h @@ -80,6 +80,23 @@ extern char * #endif /* Convert an entire string from one encoding to another, using iconv. + The original string is at [SRC,...,SRC+SRCLEN-1]. + Both the "from" and the "to" encoding must use a single NUL byte at the + end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). + *RESULTP and *LENGTH should initially be a scratch buffer and its size, + or *RESULTP can initially be NULL. + May erase the contents of the memory at *RESULTP. + Return value: 0 if successful, otherwise -1 and errno set. + If successful: The resulting string is stored in *RESULTP and its length + in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is + unchanged if no dynamic memory allocation was necessary. */ +extern int + mem_iconveh (const char *src, size_t srclen, + const char *from_codeset, const char *to_codeset, + enum iconv_ilseq_handler handler, + char **resultp, size_t *lengthp); + +/* Convert an entire string from one encoding to another, using iconv. The original string is the NUL-terminated string starting at SRC. Both the "from" and the "to" encoding must use a single NUL byte at the end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c index 1243fcdfe..7d4527372 100644 --- a/tests/test-striconveh.c +++ b/tests/test-striconveh.c @@ -347,6 +347,155 @@ main () iconv_close (cd_88592_to_utf8); iconv_close (cd_utf8_to_88592); + /* ------------------------- Test mem_iconveh() ------------------------- */ + + /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-2", "ISO-8859-1", + handler, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + + /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-2", "ISO-8859-1", + handler, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + break; + } + } + + /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-1", "UTF-8", + handler, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + + /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; + static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "UTF-8", "ISO-8859-1", + handler, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + + /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */ + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "UTF-8", "ISO-8859-1", + handler, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + free (result); + } + break; + } + } + + /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\342"; + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "UTF-8", "ISO-8859-1", + handler, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == 0); + if (result != NULL) + free (result); + } + /* ------------------------- Test str_iconveh() ------------------------- */ /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */ -- 2.11.0