From 8ec9a20ec394362a9adc767ca70cbd5f7a0ff4cb Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 28 Mar 2007 22:25:24 +0000 Subject: [PATCH] Some refactoring. --- ChangeLog | 5 +++ lib/striconveh.c | 123 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 82 insertions(+), 46 deletions(-) diff --git a/ChangeLog b/ChangeLog index d9107b957..df5abe753 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2007-03-28 Bruno Haible + * lib/striconveh.c (utf8conv_carefully): New function. + (mem_cd_iconveh_internal): Invoke it. + +2007-03-28 Bruno Haible + * lib/striconveh.c (mem_cd_iconveh_internal): Use u8_mbtoucr instead of u8_mbtouc in order to distinguish invalid and incomplete UTF-8 input. diff --git a/lib/striconveh.c b/lib/striconveh.c index 4da14722e..4765b38b1 100644 --- a/lib/striconveh.c +++ b/lib/striconveh.c @@ -173,6 +173,79 @@ iconv_carefully_1 (iconv_t cd, return res; } +/* utf8conv_carefully is like iconv, except that + - it converts from UTF-8 to UTF-8, + - it stops as soon as it encounters a conversion error, and it returns + in *INCREMENTED a boolean telling whether it has incremented the input + pointers past the error location, + - if one_character_only is true, it stops after converting one + character. */ +static size_t +utf8conv_carefully (bool one_character_only, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + bool *incremented) +{ + const char *inptr = *inbuf; + size_t insize = *inbytesleft; + char *outptr = *outbuf; + size_t outsize = *outbytesleft; + size_t res; + + res = 0; + do + { + ucs4_t uc; + int n; + int m; + + n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize); + if (n < 0) + { + errno = (n == -2 ? EINVAL : EILSEQ); + n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize); + inptr += n; + insize -= n; + res = (size_t)(-1); + *incremented = true; + break; + } + if (outsize == 0) + { + errno = E2BIG; + res = (size_t)(-1); + *incremented = false; + break; + } + m = u8_uctomb ((uint8_t *) outptr, uc, outsize); + if (m == -2) + { + errno = E2BIG; + res = (size_t)(-1); + *incremented = false; + break; + } + inptr += n; + insize -= n; + if (m == -1) + { + errno = EILSEQ; + res = (size_t)(-1); + *incremented = true; + break; + } + outptr += m; + outsize -= m; + } + while (!one_character_only && insize > 0); + + *inbuf = inptr; + *inbytesleft = insize; + *outbuf = outptr; + *outbytesleft = outsize; + return res; +} + static int mem_cd_iconveh_internal (const char *src, size_t srclen, iconv_t cd, iconv_t cd1, iconv_t cd2, @@ -472,52 +545,10 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, else { /* FROM_CODESET is UTF-8. */ - res1 = 0; - do - { - ucs4_t uc; - int n; - int m; - - n = u8_mbtoucr (&uc, (const uint8_t *) in1ptr, in1size); - if (n < 0) - { - errno = (n == -2 ? EINVAL : EILSEQ); - n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size); - in1ptr += n; - in1size -= n; - res1 = (size_t)(-1); - incremented1 = true; - break; - } - if (out1size == 0) - { - errno = E2BIG; - res1 = (size_t)(-1); - incremented1 = false; - break; - } - m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size); - if (m == -2) - { - errno = E2BIG; - res1 = (size_t)(-1); - incremented1 = false; - break; - } - in1ptr += n; - in1size -= n; - if (m == -1) - { - errno = EILSEQ; - res1 = (size_t)(-1); - incremented1 = true; - break; - } - out1ptr += m; - out1size -= m; - } - while (offsets == NULL && in1size > 0); + res1 = utf8conv_carefully (offsets != NULL, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); } } else if (do_final_flush1) -- 2.11.0