X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fstriconv.c;h=ba546a4333e98505433af30362901494ef1288c4;hb=ab624c1a8f744419fdb653b77250153a3563203f;hp=4ca983914f269bddffe16c4f7239215efb58cf76;hpb=771ffe34fdff1d8f2c40d697cb363829d01ff995;p=gnulib.git diff --git a/lib/striconv.c b/lib/striconv.c index 4ca983914..ba546a433 100644 --- a/lib/striconv.c +++ b/lib/striconv.c @@ -1,5 +1,5 @@ /* Charset conversion. - Copyright (C) 2001-2006 Free Software Foundation, Inc. + Copyright (C) 2001-2007, 2010-2012 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software; you can redistribute it and/or modify @@ -13,12 +13,9 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program; if not, see . */ -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif +#include /* Specification. */ #include "striconv.h" @@ -33,7 +30,6 @@ # include #endif -#include "strdup.h" #include "c-strcase.h" #ifndef SIZE_MAX @@ -45,7 +41,7 @@ int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, - char **resultp, size_t *lengthp) + char **resultp, size_t *lengthp) { # define tmpbufsize 4096 size_t length; @@ -53,7 +49,8 @@ mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + || defined __sun) /* Set to the initial state. */ iconv (cd, NULL, NULL, NULL, NULL); # endif @@ -61,51 +58,59 @@ mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, /* Determine the length we need. */ { size_t count = 0; - char tmpbuf[tmpbufsize]; + /* The alignment is needed when converting e.g. to glibc's WCHAR_T or + libiconv's UCS-4-INTERNAL encoding. */ + union { unsigned int align; char buf[tmpbufsize]; } tmp; +# define tmpbuf tmp.buf const char *inptr = src; size_t insize = srclen; while (insize > 0) { - char *outptr = tmpbuf; - size_t outsize = tmpbufsize; - size_t res = iconv (cd, - (ICONV_CONST char **) &inptr, &insize, - &outptr, &outsize); - - if (res == (size_t)(-1)) - { - if (errno == E2BIG) - ; - else if (errno == EINVAL) - break; - else - return -1; - } -# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) - /* Irix iconv() inserts a NUL byte if it cannot convert. */ - else if (res > 0) - { - errno = EILSEQ; - return -1; - } + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + ; + else if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ + else if (res > 0) + { + errno = EILSEQ; + return -1; + } # endif - count += outptr - tmpbuf; + count += outptr - tmpbuf; } /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + || defined __sun) { char *outptr = tmpbuf; size_t outsize = tmpbufsize; size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); if (res == (size_t)(-1)) - return -1; + return -1; count += outptr - tmpbuf; } # endif length = count; +# undef tmpbuf } if (length == 0) @@ -113,18 +118,22 @@ mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, *lengthp = 0; return 0; } - result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length)); - if (result == NULL) + if (*resultp != NULL && *lengthp >= length) + result = *resultp; + else { - errno = ENOMEM; - return -1; + result = (char *) malloc (length); + if (result == NULL) + { + errno = ENOMEM; + return -1; + } } - *resultp = result; - *lengthp = length; /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + || defined __sun) /* Return to the initial state. */ iconv (cd, NULL, NULL, NULL, NULL); # endif @@ -138,41 +147,59 @@ mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, while (insize > 0) { - size_t res = iconv (cd, - (ICONV_CONST char **) &inptr, &insize, - &outptr, &outsize); - - if (res == (size_t)(-1)) - { - if (errno == EINVAL) - break; - else - return -1; - } -# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) - /* Irix iconv() inserts a NUL byte if it cannot convert. */ - else if (res > 0) - { - errno = EILSEQ; - return -1; - } + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + goto fail; + } +# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ + else if (res > 0) + { + errno = EILSEQ; + goto fail; + } # endif } /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + || defined __sun) { size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); if (res == (size_t)(-1)) - return -1; + goto fail; } # endif if (outsize != 0) abort (); } + *resultp = result; + *lengthp = length; + return 0; + + fail: + { + if (result != *resultp) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } # undef tmpbufsize } @@ -183,21 +210,24 @@ str_cd_iconv (const char *src, iconv_t cd) to a trailing NUL byte in the output. But not for UTF-7. So that this function is usable for UTF-7, we have to exclude the NUL byte from the conversion and add it by hand afterwards. */ -# if PROBABLY_SLOWER +# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. For other iconv() implementations, + we have to look at the number of irreversible conversions returned; + but this information is lost when iconv() returns for an E2BIG reason. + Therefore we cannot use the second, faster algorithm. */ char *result = NULL; - size_t length; + size_t length = 0; int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); char *final_result; if (retval < 0) { if (result != NULL) - { - int saved_errno = errno; - free (result); - errno = saved_errno; - } + abort (); return NULL; } @@ -206,8 +236,7 @@ str_cd_iconv (const char *src, iconv_t cd) (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); if (final_result == NULL) { - if (result != NULL) - free (result); + free (result); errno = ENOMEM; return NULL; } @@ -216,7 +245,10 @@ str_cd_iconv (const char *src, iconv_t cd) return final_result; # else - + /* This algorithm is likely faster than the one above. But it may produce + iconv() returns for an E2BIG reason, when the output size guess is too + small. Therefore it can only be used when we don't need the number of + irreversible conversions performed. */ char *result; size_t result_size; size_t length; @@ -243,7 +275,8 @@ str_cd_iconv (const char *src, iconv_t cd) /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + || defined __sun) /* Set to the initial state. */ iconv (cd, NULL, NULL, NULL, NULL); # endif @@ -255,89 +288,82 @@ str_cd_iconv (const char *src, iconv_t cd) for (;;) { - /* Here inptr + inbytes_remaining = src + strlen (src), - outptr + outbytes_remaining = result + result_size - 1. */ - size_t res = iconv (cd, - (ICONV_CONST char **) &inptr, &inbytes_remaining, - &outptr, &outbytes_remaining); - - if (res == (size_t)(-1)) - { - if (errno == EINVAL) - break; - else if (errno == E2BIG) - { - size_t used = outptr - result; - size_t newsize = result_size * 2; - char *newresult; - - if (!(newsize > result_size)) - { - errno = ENOMEM; - goto failed; - } - newresult = (char *) realloc (result, newsize); - if (newresult == NULL) - { - errno = ENOMEM; - goto failed; - } - result = newresult; - result_size = newsize; - outptr = result + used; - outbytes_remaining = result_size - 1 - used; - } - else - goto failed; - } -# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) - /* Irix iconv() inserts a NUL byte if it cannot convert. */ - else if (res > 0) - { - errno = EILSEQ; - goto failed; - } -# endif - else - break; + /* Here inptr + inbytes_remaining = src + strlen (src), + outptr + outbytes_remaining = result + result_size - 1. */ + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytes_remaining, + &outptr, &outbytes_remaining); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else if (errno == E2BIG) + { + size_t used = outptr - result; + size_t newsize = result_size * 2; + char *newresult; + + if (!(newsize > result_size)) + { + errno = ENOMEM; + goto failed; + } + newresult = (char *) realloc (result, newsize); + if (newresult == NULL) + { + errno = ENOMEM; + goto failed; + } + result = newresult; + result_size = newsize; + outptr = result + used; + outbytes_remaining = result_size - 1 - used; + } + else + goto failed; + } + else + break; } /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + || defined __sun) for (;;) { - /* Here outptr + outbytes_remaining = result + result_size - 1. */ - size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); - - if (res == (size_t)(-1)) - { - if (errno == E2BIG) - { - size_t used = outptr - result; - size_t newsize = result_size * 2; - char *newresult; - - if (!(newsize > result_size)) - { - errno = ENOMEM; - goto failed; - } - newresult = (char *) realloc (result, newsize); - if (newresult == NULL) - { - errno = ENOMEM; - goto failed; - } - result = newresult; - result_size = newsize; - outptr = result + used; - outbytes_remaining = result_size - 1 - used; - } - else - goto failed; - } - else - break; + /* Here outptr + outbytes_remaining = result + result_size - 1. */ + size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); + + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + { + size_t used = outptr - result; + size_t newsize = result_size * 2; + char *newresult; + + if (!(newsize > result_size)) + { + errno = ENOMEM; + goto failed; + } + newresult = (char *) realloc (result, newsize); + if (newresult == NULL) + { + errno = ENOMEM; + goto failed; + } + result = newresult; + result_size = newsize; + outptr = result + used; + outbytes_remaining = result_size - 1 - used; + } + else + goto failed; + } + else + break; } # endif @@ -353,7 +379,7 @@ str_cd_iconv (const char *src, iconv_t cd) char *smaller_result = (char *) realloc (result, length); if (smaller_result != NULL) - result = smaller_result; + result = smaller_result; } return result; @@ -374,8 +400,14 @@ str_cd_iconv (const char *src, iconv_t cd) char * str_iconv (const char *src, const char *from_codeset, const char *to_codeset) { - if (c_strcasecmp (from_codeset, to_codeset) == 0) - return strdup (src); + if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result = strdup (src); + + if (result == NULL) + errno = ENOMEM; + return result; + } else { #if HAVE_ICONV @@ -383,46 +415,47 @@ str_iconv (const char *src, const char *from_codeset, const char *to_codeset) char *result; /* Avoid glibc-2.1 bug with EUC-KR. */ -# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION +# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + && !defined _LIBICONV_VERSION if (c_strcasecmp (from_codeset, "EUC-KR") == 0 - || c_strcasecmp (to_codeset, "EUC-KR") == 0) - { - errno = EINVAL; - return NULL; - } + || c_strcasecmp (to_codeset, "EUC-KR") == 0) + { + errno = EINVAL; + return NULL; + } # endif cd = iconv_open (to_codeset, from_codeset); if (cd == (iconv_t) -1) - return NULL; + return NULL; result = str_cd_iconv (src, cd); if (result == NULL) - { - /* Close cd, but preserve the errno from str_cd_iconv. */ - int saved_errno = errno; - iconv_close (cd); - errno = saved_errno; - } + { + /* Close cd, but preserve the errno from str_cd_iconv. */ + int saved_errno = errno; + iconv_close (cd); + errno = saved_errno; + } else - { - if (iconv_close (cd) < 0) - { - /* Return NULL, but free the allocated memory, and while doing - that, preserve the errno from iconv_close. */ - int saved_errno = errno; - free (result); - errno = saved_errno; - return NULL; - } - } + { + if (iconv_close (cd) < 0) + { + /* Return NULL, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + free (result); + errno = saved_errno; + return NULL; + } + } return result; #else /* This is a different error code than if iconv_open existed but didn't - support from_codeset and to_codeset, so that the caller can emit - an error message such as - "iconv() is not supported. Installing GNU libiconv and - then reinstalling this package would fix this." */ + support from_codeset and to_codeset, so that the caller can emit + an error message such as + "iconv() is not supported. Installing GNU libiconv and + then reinstalling this package would fix this." */ errno = ENOSYS; return NULL; #endif