From: Bruno Haible Date: Sun, 20 Apr 2008 18:02:43 +0000 (+0200) Subject: Fix test failure on platforms with non-GNU iconv. X-Git-Tag: v0.1~7524 X-Git-Url: http://erislabs.net/gitweb/?a=commitdiff_plain;h=cab313c6ffa7dc84862960754f4ae67971ca26c2;p=gnulib.git Fix test failure on platforms with non-GNU iconv. --- diff --git a/ChangeLog b/ChangeLog index 188e63c7d..8e3370705 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,14 @@ 2008-04-20 Bruno Haible + Fix test failure on platforms with non-GNU iconv. + * lib/uniconv/u16-conv-to-enc.c (u16_to_u8_lenient): New function. + (U_TO_U8): Use it, rather than u16_to_u8. + * lib/uniconv/u-conv-to-enc.h (FUNC): Allow an incomplete sequence of + units at the end of the input string. + * modules/uniconv/u16-conv-to-enc (Depends-on): Update. + +2008-04-20 Bruno Haible + * tests/uniconv/test-u8-conv-to-enc.c (main): Accept result == NULL when the resulting length is 0. * tests/uniconv/test-u16-conv-to-enc.c (main): Likewise. diff --git a/lib/uniconv/u-conv-to-enc.h b/lib/uniconv/u-conv-to-enc.h index b69a2b7f5..68ff81484 100644 --- a/lib/uniconv/u-conv-to-enc.h +++ b/lib/uniconv/u-conv-to-enc.h @@ -106,7 +106,7 @@ FUNC (const char *tocode, iunit = 0; i8 = 0; - while (iunit < srclen) + while (iunit < srclen && i8 < utf8_srclen) { int countunit; int count8; @@ -120,6 +120,17 @@ FUNC (const char *tocode, iunit += countunit; i8 += count8; } + /* Check that utf8_src has been traversed entirely. */ + if (i8 < utf8_srclen) + abort (); + /* Check that src has been traversed entirely, except possibly for an + incomplete sequence of units at the end. */ + if (iunit < srclen) + { + offsets[iunit] = *lengthp; + if (!(U_MBLEN (src + iunit, srclen - iunit) < 0)) + abort (); + } free (scaled_offsets); } if (utf8_src != tmpbuf) diff --git a/lib/uniconv/u16-conv-to-enc.c b/lib/uniconv/u16-conv-to-enc.c index c7ba2a5a1..ba70736cf 100644 --- a/lib/uniconv/u16-conv-to-enc.c +++ b/lib/uniconv/u16-conv-to-enc.c @@ -1,5 +1,5 @@ /* Conversion from UTF-16 to legacy encodings. - Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -39,9 +39,136 @@ # endif #endif + +#if !defined UTF16_NAME + +/* A variant of u16_to_u8 that treats an incomplete sequence of units at the + end as a harmless no-op, rather than reporting it as an EILSEQ error. */ + +#define FUNC u16_to_u8_lenient +#define SRC_UNIT uint16_t +#define DST_UNIT uint8_t + +static DST_UNIT * +FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp) +{ + const SRC_UNIT *s_end = s + n; + /* Output string accumulator. */ + DST_UNIT *result; + size_t allocated; + size_t length; + + if (resultbuf != NULL) + { + result = resultbuf; + allocated = *lengthp; + } + else + { + result = NULL; + allocated = 0; + } + length = 0; + /* Invariants: + result is either == resultbuf or == NULL or malloc-allocated. + If length > 0, then result != NULL. */ + + while (s < s_end) + { + ucs4_t uc; + int count; + + /* Fetch a Unicode character from the input string. */ + count = u16_mbtoucr (&uc, s, s_end - s); + if (count < 0) + { + if (count == -2) + /* Incomplete sequence of units. */ + break; + if (!(result == resultbuf || result == NULL)) + free (result); + errno = EILSEQ; + return NULL; + } + s += count; + + /* Store it in the output string. */ + count = u8_uctomb (result + length, uc, allocated - length); + if (count == -1) + { + if (!(result == resultbuf || result == NULL)) + free (result); + errno = EILSEQ; + return NULL; + } + if (count == -2) + { + DST_UNIT *memory; + + allocated = (allocated > 0 ? 2 * allocated : 12); + if (length + 6 > allocated) + allocated = length + 6; + if (result == resultbuf || result == NULL) + memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT)); + else + memory = + (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT)); + + if (memory == NULL) + { + if (!(result == resultbuf || result == NULL)) + free (result); + errno = ENOMEM; + return NULL; + } + if (result == resultbuf && length > 0) + memcpy ((char *) memory, (char *) result, + length * sizeof (DST_UNIT)); + result = memory; + count = u8_uctomb (result + length, uc, allocated - length); + if (count < 0) + abort (); + } + length += count; + } + + if (length == 0) + { + if (result == NULL) + { + /* Return a non-NULL value. NULL means error. */ + result = (DST_UNIT *) malloc (1); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + } + } + else if (result != resultbuf && length < allocated) + { + /* Shrink the allocated memory if possible. */ + DST_UNIT *memory; + + memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT)); + if (memory != NULL) + result = memory; + } + + *lengthp = length; + return result; +} + +#undef DST_UNIT +#undef SRC_UNIT +#undef FUNC + +#endif + + #define FUNC u16_conv_to_encoding #define UNIT uint16_t -#define U_TO_U8 u16_to_u8 +#define U_TO_U8 u16_to_u8_lenient #define U_MBLEN u16_mblen #if defined UTF16_NAME # define UTF_NAME UTF16_NAME diff --git a/modules/uniconv/u16-conv-to-enc b/modules/uniconv/u16-conv-to-enc index 04d4cb6d5..0eaf090c4 100644 --- a/modules/uniconv/u16-conv-to-enc +++ b/modules/uniconv/u16-conv-to-enc @@ -9,7 +9,10 @@ Depends-on: uniconv/base striconveha uniconv/u8-conv-to-enc -unistr/u16-to-u8 +unistr/u16-mbtoucr +unistr/u8-uctomb +unistr/u16-mblen +unistr/u8-mblen configure.ac: AC_REQUIRE([AC_C_BIGENDIAN])