From 6d6f8a5cd56ca54a9e9d49516cd2e849f8a6b1c6 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Tue, 23 Jan 2007 01:09:41 +0000 Subject: [PATCH] Add optional offsets argument to conversion routines. --- ChangeLog | 14 ++ lib/striconveh.c | 149 ++++++++++-- lib/striconveh.h | 10 + lib/striconveha.c | 5 +- lib/striconveha.h | 5 + tests/test-striconveh.c | 625 ++++++++++++++++++++++++++++++++---------------- 6 files changed, 583 insertions(+), 225 deletions(-) diff --git a/ChangeLog b/ChangeLog index e6f93bf21..56d829480 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,19 @@ 2007-01-22 Bruno Haible + * lib/striconveh.h (mem_cd_iconveh, mem_iconveh): Add 'offsets' + argument. + * lib/striconveh.c (iconv_carefully_1): New function. + (mem_cd_iconveh_internal, mem_cd_iconveh, mem_iconveh): Add 'offsets' + argument. + (str_cd_iconveh): Update. + * lib/striconveha.h (mem_iconveha): Add 'offsets' argument. + * lib/striconveha.c (mem_iconveha): Add 'offsets' argument. + * tests/test-striconveh.c (MAGIC): New macro. + (new_offsets): New function. + (main): Test call with and without offsets. + +2007-01-22 Bruno Haible + * modules/sys_stat (Makefile.am): Use @MKDIR_P@ instead of $(MKDIR_P). * modules/sys_select (Makefile.am): Likewise. * modules/sys_socket (Makefile.am): Likewise. diff --git a/lib/striconveh.c b/lib/striconveh.c index b02a182bb..7feb76816 100644 --- a/lib/striconveh.c +++ b/lib/striconveh.c @@ -119,11 +119,68 @@ iconv_carefully (iconv_t cd, iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft)) # endif +/* iconv_carefully_1 is like iconv_carefully, except that it stops after + converting one character. */ +static size_t +iconv_carefully_1 (iconv_t cd, + const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, + bool *incremented) +{ + const char *inptr = *inbuf; + const char *inptr_end = inptr + *inbytesleft; + char *outptr = *outbuf; + size_t outsize = *outbytesleft; + const char *inptr_before = inptr; + size_t res = (size_t)(-1); + size_t insize; + + for (insize = 1; inptr + insize <= inptr_end; insize++) + { + res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + if (!(res == (size_t)(-1) && errno == EINVAL)) + break; + /* We expect that no input bytes have been consumed so far. */ + if (inptr != inptr_before) + abort (); + } + + *inbuf = inptr; + *inbytesleft = inptr_end - inptr; +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ + if (res != (size_t)(-1) && res > 0) + { + /* iconv() has already incremented INPTR. We cannot go back to a + previous INPTR, otherwise the state inside CD would become invalid, + if FROM_CODESET is a stateful encoding. So, tell the caller that + *INBUF has already been incremented. */ + *incremented = (inptr > inptr_before); + errno = EILSEQ; + return (size_t)(-1); + } +# endif + + if (res != (size_t)(-1)) + { + *outbuf = outptr; + *outbytesleft = outsize; + } + *incremented = false; + return res; +} + static int mem_cd_iconveh_internal (const char *src, size_t srclen, iconv_t cd, iconv_t cd1, iconv_t cd2, enum iconv_ilseq_handler handler, size_t extra_alloc, + size_t *offsets, char **resultp, size_t *lengthp) { /* When a conversion error occurs, we cannot start using CD1 and CD2 at @@ -141,6 +198,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, char *result; size_t allocated; size_t length; + size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */ if (*lengthp >= sizeof (tmpbuf)) { @@ -153,6 +211,16 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, allocated = sizeof (tmpbuf); } result = initial_result; + + if (offsets != NULL) + { + size_t i; + + for (i = 0; i < srclen; i++) + offsets[i] = (size_t)(-1); + + last_length = (size_t)(-1); + } length = 0; /* First, try a direct conversion, and see whether a conversion error @@ -176,16 +244,29 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, size_t res; bool grow; - /* Use iconv_carefully instead of iconv here, because: - - If TO_CODESET is UTF-8, we can do the error handling in this loop, - no need for a second loop, - - With iconv() implementations other than GNU libiconv and GNU libc, - if we use iconv() in a big swoop, checking for an E2BIG return, - we lose the number of irreversible conversions. */ - res = iconv_carefully (cd, - &inptr, &insize, - &outptr, &outsize, - &incremented); + if (offsets != NULL) + { + if (length != last_length) /* ensure that offset[] be increasing */ + { + offsets[inptr - src] = length; + last_length = length; + } + res = iconv_carefully_1 (cd, + &inptr, &insize, + &outptr, &outsize, + &incremented); + } + else + /* Use iconv_carefully instead of iconv here, because: + - If TO_CODESET is UTF-8, we can do the error handling in this + loop, no need for a second loop, + - With iconv() implementations other than GNU libiconv and GNU + libc, if we use iconv() in a big swoop, checking for an E2BIG + return, we lose the number of irreversible conversions. */ + res = iconv_carefully (cd, + &inptr, &insize, + &outptr, &outsize, + &incremented); length = outptr - result; grow = (length + extra_alloc > allocated / 2); @@ -332,6 +413,15 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, /* The direct conversion failed, handler != iconveh_error, and cd2 != (iconv_t)(-1). Use a conversion through UTF-8. */ + if (offsets != NULL) + { + size_t i; + + for (i = 0; i < srclen; i++) + offsets[i] = (size_t)(-1); + + last_length = (size_t)(-1); + } length = 0; { # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ @@ -362,11 +452,25 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, /* Conversion step 1: from FROM_CODESET to UTF-8. */ if (in1size > 0) { + if (offsets != NULL + && length != last_length) /* ensure that offset[] be increasing */ + { + offsets[in1ptr - src] = length; + last_length = length; + } if (cd1 != (iconv_t)(-1)) - res1 = iconv_carefully (cd1, - (ICONV_CONST char **) &in1ptr, &in1size, - &out1ptr, &out1size, - &incremented1); + { + if (offsets != NULL) + res1 = iconv_carefully_1 (cd1, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); + else + res1 = iconv_carefully (cd1, + &in1ptr, &in1size, + &out1ptr, &out1size, + &incremented1); + } else { /* FROM_CODESET is UTF-8. */ @@ -418,7 +522,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, out1ptr += m; out1size -= m; } - while (in1size > 0); + while (offsets == NULL && in1size > 0); } } else if (do_final_flush1) @@ -469,7 +573,8 @@ mem_cd_iconveh_internal (const char *src, size_t srclen, errno1 = errno; utf8len = out1ptr - utf8buf; - if (in1size == 0 + if (offsets != NULL + || in1size == 0 || utf8len > utf8bufsize / 2 || (res1 == (size_t)(-1) && errno1 == E2BIG)) { @@ -726,10 +831,11 @@ int mem_cd_iconveh (const char *src, size_t srclen, iconv_t cd, iconv_t cd1, iconv_t cd2, enum iconv_ilseq_handler handler, + size_t *offsets, char **resultp, size_t *lengthp) { return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0, - resultp, lengthp); + offsets, resultp, lengthp); } char * @@ -744,7 +850,7 @@ str_cd_iconveh (const char *src, char *result = NULL; size_t length = 0; int retval = mem_cd_iconveh_internal (src, strlen (src), - cd, cd1, cd2, handler, 1, + cd, cd1, cd2, handler, 1, NULL, &result, &length); if (retval < 0) @@ -770,6 +876,7 @@ int mem_iconveh (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, enum iconv_ilseq_handler handler, + size_t *offsets, char **resultp, size_t *lengthp) { if (srclen == 0) @@ -778,7 +885,7 @@ mem_iconveh (const char *src, size_t srclen, *lengthp = 0; return 0; } - else if (c_strcasecmp (from_codeset, to_codeset) == 0) + else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0) { char *result; @@ -854,8 +961,8 @@ mem_iconveh (const char *src, size_t srclen, result = *resultp; length = *lengthp; - retval = - mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, &result, &length); + retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets, + &result, &length); if (retval < 0) { diff --git a/lib/striconveh.h b/lib/striconveh.h index 2ab1e6fed..e97890c0e 100644 --- a/lib/striconveh.h +++ b/lib/striconveh.h @@ -47,6 +47,10 @@ enum iconv_ilseq_handler (iconv_t)(-1) if FROM_CODESET is UTF-8). CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1) if TO_CODESET is UTF-8). + If OFFSET is not NULL, it should point to an array of SRCLEN integers; this + array is filled with offsets into the result, i.e. the character starting + at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], + and other offsets are set to (size_t)(-1). *RESULTP and *LENGTH should initially be a scratch buffer and its size, or *RESULTP can initially be NULL. May erase the contents of the memory at *RESULTP. @@ -58,6 +62,7 @@ extern int mem_cd_iconveh (const char *src, size_t srclen, iconv_t cd, iconv_t cd1, iconv_t cd2, enum iconv_ilseq_handler handler, + size_t *offsets, char **resultp, size_t *lengthp); /* Convert an entire string from one encoding to another, using iconv. @@ -81,6 +86,10 @@ extern char * /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. + If OFFSET is not NULL, it should point to an array of SRCLEN integers; this + array is filled with offsets into the result, i.e. the character starting + at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], + and other offsets are set to (size_t)(-1). *RESULTP and *LENGTH should initially be a scratch buffer and its size, or *RESULTP can initially be NULL. May erase the contents of the memory at *RESULTP. @@ -92,6 +101,7 @@ extern int mem_iconveh (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, enum iconv_ilseq_handler handler, + size_t *offsets, char **resultp, size_t *lengthp); /* Convert an entire string from one encoding to another, using iconv. diff --git a/lib/striconveha.c b/lib/striconveha.c index 9da18c93e..12a7bfc2f 100644 --- a/lib/striconveha.c +++ b/lib/striconveha.c @@ -147,10 +147,11 @@ int mem_iconveha (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, enum iconv_ilseq_handler handler, + size_t *offsets, char **resultp, size_t *lengthp) { int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler, - resultp, lengthp); + offsets, resultp, lengthp); if (retval >= 0 || errno != EINVAL) return retval; else @@ -168,7 +169,7 @@ mem_iconveha (const char *src, size_t srclen, { retval = mem_iconveha (src, srclen, from_codeset, to_codeset, handler, - resultp, lengthp); + offsets, resultp, lengthp); if (!(retval < 0 && errno == EILSEQ)) return retval; encodings++; diff --git a/lib/striconveha.h b/lib/striconveha.h index 28fc7e6b1..b4d7ce496 100644 --- a/lib/striconveha.h +++ b/lib/striconveha.h @@ -30,6 +30,10 @@ extern "C" { /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. The "from" encoding can also be a name defined for autodetection. + If OFFSET is not NULL, it should point to an array of SRCLEN integers; this + array is filled with offsets into the result, i.e. the character starting + at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]], + and other offsets are set to (size_t)(-1). *RESULTP and *LENGTH should initially be a scratch buffer and its size, or *RESULTP can initially be NULL. May erase the contents of the memory at *RESULTP. @@ -41,6 +45,7 @@ extern int mem_iconveha (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, enum iconv_ilseq_handler handler, + size_t *offsets, char **resultp, size_t *lengthp); /* Convert an entire string from one encoding to another, using iconv. diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c index 7d4527372..31b7b721d 100644 --- a/tests/test-striconveh.c +++ b/tests/test-striconveh.c @@ -34,12 +34,25 @@ #define SIZEOF(array) (sizeof (array) / sizeof (array[0])) #define ASSERT(expr) if (!(expr)) abort (); +/* Magic number for detecting bounds violations. */ +#define MAGIC 0x1983EFF1 + +static size_t * +new_offsets (size_t n) +{ + size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t)); + offsets[n] = MAGIC; + return offsets; +} + int main () { static enum iconv_ilseq_handler handlers[] = { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; size_t h; + size_t o; + size_t i; #if HAVE_ICONV /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, @@ -66,17 +79,29 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_88592_to_88591, - cd_88592_to_utf8, cd_utf8_to_88591, - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + cd_88592_to_88591, + cd_88592_to_utf8, cd_utf8_to_88591, + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); + } } /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */ @@ -84,37 +109,59 @@ main () { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_88592_to_88591, - cd_88592_to_utf8, cd_utf8_to_88591, - handler, - &result, &length); - switch (handler) + for (o = 0; o < 2; o++) { - case iconveh_error: - ASSERT (retval == -1 && errno == EILSEQ); - ASSERT (result == NULL); - break; - case iconveh_question_mark: - { - static const char expected[] = "Rafa? Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; - case iconveh_escape_sequence: - { - static const char expected[] = "Rafa\\u0142 Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + cd_88592_to_88591, + cd_88592_to_utf8, cd_utf8_to_88591, + handler, + offsets, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + if (o) + free (offsets); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i + 5)); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; + } } } @@ -124,17 +171,32 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_88591_to_utf8, - cd_88591_to_utf8, (iconv_t)(-1), - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + cd_88591_to_utf8, + cd_88591_to_utf8, (iconv_t)(-1), + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == (i < 1 ? i : + i < 12 ? i + 1 : + i < 18 ? i + 2 : + i + 3)); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); + } } /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ @@ -143,17 +205,36 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + cd_utf8_to_88591, + (iconv_t)(-1), cd_utf8_to_88591, + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 41; i++) + ASSERT (offsets[i] == (i < 1 ? i : + i == 1 ? (size_t)(-1) : + i < 13 ? i - 1 : + i == 13 ? (size_t)(-1) : + i < 20 ? i - 2 : + i == 20 ? (size_t)(-1) : + i < 40 ? i - 3 : + (size_t)(-1))); + ASSERT (offsets[41] == MAGIC); + free (offsets); + } + free (result); + } } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ @@ -161,37 +242,62 @@ main () { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */ - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, - handler, - &result, &length); - switch (handler) + for (o = 0; o < 2; o++) { - case iconveh_error: - ASSERT (retval == -1 && errno == EILSEQ); - ASSERT (result == NULL); - break; - case iconveh_question_mark: - { - static const char expected[] = "Rafa? Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; - case iconveh_escape_sequence: - { - static const char expected[] = "Rafa\\u0142 Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + cd_utf8_to_88591, + (iconv_t)(-1), cd_utf8_to_88591, + handler, + offsets, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + if (o) + free (offsets); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 17; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i == 5 ? (size_t)(-1) : + i - 1)); + ASSERT (offsets[17] == MAGIC); + free (offsets); + } + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 17; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i == 5 ? (size_t)(-1) : + i + 4)); + ASSERT (offsets[17] == MAGIC); + free (offsets); + } + free (result); + } + break; + } } } @@ -200,17 +306,28 @@ main () { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\342"; - char *result = NULL; - size_t length = 0; - int retval = mem_cd_iconveh (input, strlen (input), - cd_utf8_to_88591, - (iconv_t)(-1), cd_utf8_to_88591, - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == 0); - if (result != NULL) - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + cd_utf8_to_88591, + (iconv_t)(-1), cd_utf8_to_88591, + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == 0); + if (o) + { + ASSERT (offsets[0] == 0); + ASSERT (offsets[1] == MAGIC); + free (offsets); + } + if (result != NULL) + free (result); + } } /* ------------------------ Test str_cd_iconveh() ------------------------ */ @@ -355,16 +472,28 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - char *result = NULL; - size_t length = 0; - int retval = mem_iconveh (input, strlen (input), - "ISO-8859-2", "ISO-8859-1", - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-2", "ISO-8859-1", + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); + } } /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */ @@ -372,36 +501,58 @@ main () { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */ - char *result = NULL; - size_t length = 0; - int retval = mem_iconveh (input, strlen (input), - "ISO-8859-2", "ISO-8859-1", - handler, - &result, &length); - switch (handler) + for (o = 0; o < 2; o++) { - case iconveh_error: - ASSERT (retval == -1 && errno == EILSEQ); - ASSERT (result == NULL); - break; - case iconveh_question_mark: - { - static const char expected[] = "Rafa? Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; - case iconveh_escape_sequence: - { - static const char expected[] = "Rafa\\u0142 Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-2", "ISO-8859-1", + handler, + offsets, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + if (o) + free (offsets); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i + 5)); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; + } } } @@ -411,16 +562,31 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; - char *result = NULL; - size_t length = 0; - int retval = mem_iconveh (input, strlen (input), - "ISO-8859-1", "UTF-8", - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-1", "UTF-8", + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == (i < 1 ? i : + i < 12 ? i + 1 : + i < 18 ? i + 2 : + i + 3)); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); + } } /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ @@ -429,16 +595,35 @@ main () enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - char *result = NULL; - size_t length = 0; - int retval = mem_iconveh (input, strlen (input), - "UTF-8", "ISO-8859-1", - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "UTF-8", "ISO-8859-1", + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 41; i++) + ASSERT (offsets[i] == (i < 1 ? i : + i == 1 ? (size_t)(-1) : + i < 13 ? i - 1 : + i == 13 ? (size_t)(-1) : + i < 20 ? i - 2 : + i == 20 ? (size_t)(-1) : + i < 40 ? i - 3 : + (size_t)(-1))); + ASSERT (offsets[41] == MAGIC); + free (offsets); + } + free (result); + } } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ @@ -446,36 +631,61 @@ main () { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */ - char *result = NULL; - size_t length = 0; - int retval = mem_iconveh (input, strlen (input), - "UTF-8", "ISO-8859-1", - handler, - &result, &length); - switch (handler) + for (o = 0; o < 2; o++) { - case iconveh_error: - ASSERT (retval == -1 && errno == EILSEQ); - ASSERT (result == NULL); - break; - case iconveh_question_mark: - { - static const char expected[] = "Rafa? Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; - case iconveh_escape_sequence: - { - static const char expected[] = "Rafa\\u0142 Maszkowski"; - ASSERT (retval == 0); - ASSERT (length == strlen (expected)); - ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); - free (result); - } - break; + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "UTF-8", "ISO-8859-1", + handler, + offsets, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + if (o) + free (offsets); + break; + case iconveh_question_mark: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 17; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i == 5 ? (size_t)(-1) : + i - 1)); + ASSERT (offsets[17] == MAGIC); + free (offsets); + } + free (result); + } + break; + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa\\u0142 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 17; i++) + ASSERT (offsets[i] == (i < 5 ? i : + i == 5 ? (size_t)(-1) : + i + 4)); + ASSERT (offsets[17] == MAGIC); + free (offsets); + } + free (result); + } + break; + } } } @@ -484,16 +694,27 @@ main () { enum iconv_ilseq_handler handler = handlers[h]; static const char input[] = "\342"; - char *result = NULL; - size_t length = 0; - int retval = mem_iconveh (input, strlen (input), - "UTF-8", "ISO-8859-1", - handler, - &result, &length); - ASSERT (retval == 0); - ASSERT (length == 0); - if (result != NULL) - free (result); + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "UTF-8", "ISO-8859-1", + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == 0); + if (o) + { + ASSERT (offsets[0] == 0); + ASSERT (offsets[1] == MAGIC); + free (offsets); + } + if (result != NULL) + free (result); + } } /* ------------------------- Test str_iconveh() ------------------------- */ -- 2.11.0