Distinguish invalid and incomplete UTF-8 input, and set errno accordingly.
authorBruno Haible <bruno@clisp.org>
Wed, 28 Mar 2007 21:50:51 +0000 (21:50 +0000)
committerBruno Haible <bruno@clisp.org>
Wed, 28 Mar 2007 21:50:51 +0000 (21:50 +0000)
ChangeLog
lib/striconveh.c
modules/striconveh

index f6fc6eb..d9107b9 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2007-03-28  Bruno Haible  <bruno@clisp.org>
 
+       * lib/striconveh.c (mem_cd_iconveh_internal): Use u8_mbtoucr instead
+       of u8_mbtouc in order to distinguish invalid and incomplete UTF-8
+       input.
+       * modules/striconveh (Depends-on): Add unistr/u8-mbtoucr. Replace
+       utf8-ucs4 with unistr/u8-mbtouc. Replace ucs4-utf8 with
+       unistr/u8-uctomb.
+
+2007-03-28  Bruno Haible  <bruno@clisp.org>
+
        * modules/unistr/u8-mbtoucr: New file.
        * lib/unistr/u8-mbtoucr.c: New file.
        * modules/unistr/u16-mbtoucr: New file.
index 8bee60a..4da1472 100644 (file)
@@ -479,16 +479,13 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                    int n;
                    int m;
 
-                   n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
-                   if (uc == 0xfffd
-                       && !(n >= 3
-                            && (uint8_t)in1ptr[0] == 0xEF
-                            && (uint8_t)in1ptr[1] == 0xBF
-                            && (uint8_t)in1ptr[2] == 0xBD))
+                   n = u8_mbtoucr (&uc, (const uint8_t *) in1ptr, in1size);
+                   if (n < 0)
                      {
+                       errno = (n == -2 ? EINVAL : EILSEQ);
+                       n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
                        in1ptr += n;
                        in1size -= n;
-                       errno = EILSEQ;
                        res1 = (size_t)(-1);
                        incremented1 = true;
                        break;
index 6b93087..172cd85 100644 (file)
@@ -8,10 +8,11 @@ lib/striconveh.c
 Depends-on:
 stdbool
 iconv
-utf8-ucs4
-ucs4-utf8
 unistr/u8-prev
 unistr/u8-mbtouc-unsafe
+unistr/u8-mbtouc
+unistr/u8-mbtoucr
+unistr/u8-uctomb
 strdup
 c-strcase
 c-strcaseeq