X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fstriconveh.c;h=99bbe18d29eca1caacee330c49b2fe588db79dc5;hb=c532b2a8ef48d317a1bc348584c00880ea7c5ba6;hp=56c0c04782accb0d2edeb448375d27f07e685c50;hpb=6f18b53d9342b4e55364e0c081928f65b5d7ed79;p=gnulib.git diff --git a/lib/striconveh.c b/lib/striconveh.c index 56c0c0478..99bbe18d2 100644 --- a/lib/striconveh.c +++ b/lib/striconveh.c @@ -2,10 +2,10 @@ Copyright (C) 2001-2007 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -13,8 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -28,8 +27,6 @@ #if HAVE_ICONV # include -# include "utf8-ucs4.h" -# include "ucs4-utf8.h" # include "unistr.h" #endif @@ -175,6 +172,79 @@ iconv_carefully_1 (iconv_t cd, return res; } +/* utf8conv_carefully is like iconv, except that + - it converts from UTF-8 to UTF-8, + - it stops as soon as it encounters a conversion error, and it returns + in *INCREMENTED a boolean telling whether it has incremented the input + pointers past the error location, + - if one_character_only is true, it stops after converting one + character. */ +static size_t +utf8conv_carefully (bool one_character_only, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + bool *incremented) +{ + const char *inptr = *inbuf; + size_t insize = *inbytesleft; + char *outptr = *outbuf; + size_t outsize = *outbytesleft; + size_t res; + + res = 0; + do + { + ucs4_t uc; + int n; + int m; + + n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize); + if (n < 0) + { + errno = (n == -2 ? EINVAL : EILSEQ); + n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize); + inptr += n; + insize -= n; + res = (size_t)(-1); + *incremented = true; + break; + } + if (outsize == 0) + { + errno = E2BIG; + res = (size_t)(-1); + *incremented = false; + break; + } + m = u8_uctomb ((uint8_t *) outptr, uc, outsize); + if (m == -2) + { + errno = E2BIG; + res = (size_t)(-1); + *incremented = false; + break; + } + inptr += n; + insize -= n; + if (m == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + *incremented = true; + break; + } + outptr += m; + outsize -= m; + } + while (!one_character_only && insize > 0); + + *inbuf = inptr; + *inbytesleft = insize; + *outbuf = outptr; + *outbytesleft = outsize; + return res; +} + static int mem_cd_iconveh_internal (const char *src, size_t srclen, iconv_t cd, iconv_t cd1, iconv_t cd2, @@ -212,6 +282,10 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, } result = initial_result; + /* Test whether a direct conversion is possible at all. */ + if (cd == (iconv_t)(-1)) + goto indirectly; + if (offsets != NULL) { size_t i; @@ -410,8 +484,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, goto done; indirectly: - /* The direct conversion failed, handler != iconveh_error, - and cd2 != (iconv_t)(-1). + /* The direct conversion failed. Use a conversion through UTF-8. */ if (offsets != NULL) { @@ -424,6 +497,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, } length = 0; { + const bool slowly = (offsets != NULL || handler == iconveh_error); # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ char utf8buf[utf8bufsize + 1]; size_t utf8len = 0; @@ -438,7 +512,8 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, /* Set to the initial state. */ if (cd1 != (iconv_t)(-1)) iconv (cd1, NULL, NULL, NULL, NULL); - iconv (cd2, NULL, NULL, NULL, NULL); + if (cd2 != (iconv_t)(-1)) + iconv (cd2, NULL, NULL, NULL, NULL); # endif while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2) @@ -460,7 +535,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, } if (cd1 != (iconv_t)(-1)) { - if (offsets != NULL) + if (slowly) res1 = iconv_carefully_1 (cd1, &in1ptr, &in1size, &out1ptr, &out1size, @@ -474,55 +549,10 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, else { /* FROM_CODESET is UTF-8. */ - res1 = 0; - do - { - ucs4_t uc; - int n; - int m; - - n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size); - if (uc == 0xfffd - && !(n >= 3 - && (uint8_t)in1ptr[0] == 0xEF - && (uint8_t)in1ptr[1] == 0xBF - && (uint8_t)in1ptr[2] == 0xBD)) - { - in1ptr += n; - in1size -= n; - errno = EILSEQ; - res1 = (size_t)(-1); - incremented1 = true; - break; - } - if (out1size == 0) - { - errno = E2BIG; - res1 = (size_t)(-1); - incremented1 = false; - break; - } - m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size); - if (m == -2) - { - errno = E2BIG; - res1 = (size_t)(-1); - incremented1 = false; - break; - } - in1ptr += n; - in1size -= n; - if (m == -1) - { - errno = EILSEQ; - res1 = (size_t)(-1); - incremented1 = true; - break; - } - out1ptr += m; - out1size -= m; - } - while (offsets == NULL && in1size > 0); + res1 = utf8conv_carefully (slowly, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); } } else if (do_final_flush1) @@ -592,10 +622,19 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, bool grow; if (in2size > 0) - res2 = iconv_carefully (cd2, - &in2ptr, &in2size, - &out2ptr, &out2size, - &incremented2); + { + if (cd2 != (iconv_t)(-1)) + res2 = iconv_carefully (cd2, + &in2ptr, &in2size, + &out2ptr, &out2size, + &incremented2); + else + /* TO_CODESET is UTF-8. */ + res2 = utf8conv_carefully (false, + &in2ptr, &in2size, + &out2ptr, &out2size, + &incremented2); + } else /* in1size == 0 && !do_final_flush1 && in2size == 0 && do_final_flush2 */ { @@ -603,10 +642,11 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) - res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); -# else - res2 = 0; + if (cd2 != (iconv_t)(-1)) + res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); + else # endif + res2 = 0; do_final_flush2 = false; incremented2 = true; } @@ -677,9 +717,28 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, inptr = scratchbuf; insize = scratchlen; - res = iconv (cd2, - (ICONV_CONST char **) &inptr, &insize, - &out2ptr, &out2size); + if (cd2 != (iconv_t)(-1)) + res = iconv (cd2, + (ICONV_CONST char **) &inptr, &insize, + &out2ptr, &out2size); + else + { + /* TO_CODESET is UTF-8. */ + if (out2size >= insize) + { + memcpy (out2ptr, inptr, insize); + out2ptr += insize; + out2size -= insize; + inptr += insize; + insize = 0; + res = 0; + } + else + { + errno = E2BIG; + res = (size_t)(-1); + } + } length = out2ptr - result; if (res == (size_t)(-1) && errno == E2BIG) { @@ -706,9 +765,23 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, out2ptr = result + length; out2size = allocated - extra_alloc - length; - res = iconv (cd2, - (ICONV_CONST char **) &inptr, &insize, - &out2ptr, &out2size); + if (cd2 != (iconv_t)(-1)) + res = iconv (cd2, + (ICONV_CONST char **) &inptr, + &insize, + &out2ptr, &out2size); + else + { + /* TO_CODESET is UTF-8. */ + if (!(out2size >= insize)) + abort (); + memcpy (out2ptr, inptr, insize); + out2ptr += insize; + out2size -= insize; + inptr += insize; + insize = 0; + res = 0; + } length = out2ptr - result; } # if !defined _LIBICONV_VERSION && !defined __GLIBC__ @@ -926,8 +999,6 @@ mem_iconveh (const char *src, size_t srclen, # endif cd = iconv_open (to_codeset, from_codeset); - if (cd == (iconv_t)(-1)) - return -1; if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) cd1 = (iconv_t)(-1); @@ -937,7 +1008,8 @@ mem_iconveh (const char *src, size_t srclen, if (cd1 == (iconv_t)(-1)) { int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return -1; } @@ -953,7 +1025,8 @@ mem_iconveh (const char *src, size_t srclen, int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return -1; } @@ -972,7 +1045,8 @@ mem_iconveh (const char *src, size_t srclen, iconv_close (cd2); if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; } else @@ -984,7 +1058,8 @@ mem_iconveh (const char *src, size_t srclen, int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); if (result != *resultp && result != NULL) free (result); errno = saved_errno; @@ -995,13 +1070,14 @@ mem_iconveh (const char *src, size_t srclen, /* Return -1, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); if (result != *resultp && result != NULL) free (result); errno = saved_errno; return -1; } - if (iconv_close (cd) < 0) + if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) { /* Return -1, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ @@ -1059,8 +1135,6 @@ str_iconveh (const char *src, # endif cd = iconv_open (to_codeset, from_codeset); - if (cd == (iconv_t)(-1)) - return NULL; if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) cd1 = (iconv_t)(-1); @@ -1070,7 +1144,8 @@ str_iconveh (const char *src, if (cd1 == (iconv_t)(-1)) { int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return NULL; } @@ -1086,7 +1161,8 @@ str_iconveh (const char *src, int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return NULL; } @@ -1102,7 +1178,8 @@ str_iconveh (const char *src, iconv_close (cd2); if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; } else @@ -1114,7 +1191,8 @@ str_iconveh (const char *src, int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); free (result); errno = saved_errno; return NULL; @@ -1124,12 +1202,13 @@ str_iconveh (const char *src, /* Return NULL, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); free (result); errno = saved_errno; return NULL; } - if (iconv_close (cd) < 0) + if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) { /* Return NULL, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */