From d270926f2e82e654f9cabd4a895c283edb5c2183 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 24 May 2010 23:00:42 +0200 Subject: [PATCH] Don't use conversion with transliteration in u{8,16,32}_strcoll. --- ChangeLog | 22 ++++++++++++++++++++++ lib/unistr/u-strcoll.h | 19 ++++++++++++------- lib/unistr/u16-strcoll.c | 2 +- lib/unistr/u32-strcoll.c | 2 +- lib/unistr/u8-strcoll.c | 2 +- modules/unistr/u16-strcoll | 5 +++-- modules/unistr/u32-strcoll | 5 +++-- modules/unistr/u8-strcoll | 5 +++-- 8 files changed, 46 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index bfbd9249e..45828dcab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,27 @@ 2010-05-24 Bruno Haible + Don't use conversion with transliteration in u{8,16,32}_strcoll. + * lib/unistr/u-strcoll.h (FUNC): Use U_STRCONV_TO_ENCODING with + iconveh_error argument. + * lib/unistr/u8-strcoll.c: Define U_STRCONV_TO_ENCODING instead of + U_STRCONV_TO_LOCALE. + * lib/unistr/u16-strcoll.c: Likewise. + * lib/unistr/u32-strcoll.c: Likewise. + * modules/unistr/u8-strcoll (Depends-on): Add + uniconv/u8-strconv-to-enc, localcharset. Remove + uniconv/u8-strconv-to-locale. + (configure.ac): Bump version number. + * modules/unistr/u16-strcoll (Depends-on): Add + uniconv/u16-strconv-to-enc, localcharset. Remove + uniconv/u16-strconv-to-locale. + (configure.ac): Bump version number. + * modules/unistr/u32-strcoll (Depends-on): Add + uniconv/u32-strconv-to-enc, localcharset. Remove + uniconv/u32-strconv-to-locale. + (configure.ac): Bump version number. + +2010-05-24 Bruno Haible + Avoid a test failure on NetBSD 5.0. * tests/test-striconveh.c (main): On NetBSD, skip a test that triggers an iconv() bug. diff --git a/lib/unistr/u-strcoll.h b/lib/unistr/u-strcoll.h index 9ec5c6017..62addbaf1 100644 --- a/lib/unistr/u-strcoll.h +++ b/lib/unistr/u-strcoll.h @@ -23,14 +23,19 @@ FUNC (const UNIT *s1, const UNIT *s2) When it fails, it sets errno, but also returns a meaningful return value, for the sake of callers which ignore errno. */ int final_errno = errno; + const char *encoding = locale_charset (); char *sl1; char *sl2; int result; - sl1 = U_STRCONV_TO_LOCALE (s1); + /* Pass iconveh_error here, not iconveh_question_mark. Otherwise the + conversion to locale encoding can do transliteration or map some + characters to question marks, leading to results that depend on the + iconv() implementation and are not obvious. */ + sl1 = U_STRCONV_TO_ENCODING (s1, encoding, iconveh_error); if (sl1 != NULL) { - sl2 = U_STRCONV_TO_LOCALE (s2); + sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error); if (sl2 != NULL) { /* Compare sl1 and sl2. */ @@ -41,10 +46,10 @@ FUNC (const UNIT *s1, const UNIT *s2) /* strcoll succeeded. */ free (sl1); free (sl2); - /* The conversion to locale encoding can do transliteration or - map some characters to question marks. Therefore sl1 and sl2 - may be equal when s1 and s2 were in fact different. Return a - nonzero result in this case. */ + /* The conversion to locale encoding can drop Unicode TAG + characters. Therefore sl1 and sl2 may be equal when s1 + and s2 were in fact different. Return a nonzero result + in this case. */ if (result == 0) result = U_STRCMP (s1, s2); } @@ -68,7 +73,7 @@ FUNC (const UNIT *s1, const UNIT *s2) else { final_errno = errno; - sl2 = U_STRCONV_TO_LOCALE (s2); + sl2 = U_STRCONV_TO_ENCODING (s2, encoding, iconveh_error); if (sl2 != NULL) { /* s2 could be converted to locale encoding, s1 not. */ diff --git a/lib/unistr/u16-strcoll.c b/lib/unistr/u16-strcoll.c index 5a504bf4d..b75ba34a3 100644 --- a/lib/unistr/u16-strcoll.c +++ b/lib/unistr/u16-strcoll.c @@ -29,5 +29,5 @@ #define FUNC u16_strcoll #define UNIT uint16_t #define U_STRCMP u16_strcmp -#define U_STRCONV_TO_LOCALE u16_strconv_to_locale +#define U_STRCONV_TO_ENCODING u16_strconv_to_encoding #include "u-strcoll.h" diff --git a/lib/unistr/u32-strcoll.c b/lib/unistr/u32-strcoll.c index 974885534..95bab0757 100644 --- a/lib/unistr/u32-strcoll.c +++ b/lib/unistr/u32-strcoll.c @@ -29,5 +29,5 @@ #define FUNC u32_strcoll #define UNIT uint32_t #define U_STRCMP u32_strcmp -#define U_STRCONV_TO_LOCALE u32_strconv_to_locale +#define U_STRCONV_TO_ENCODING u32_strconv_to_encoding #include "u-strcoll.h" diff --git a/lib/unistr/u8-strcoll.c b/lib/unistr/u8-strcoll.c index 9ffa135e6..b4d01061c 100644 --- a/lib/unistr/u8-strcoll.c +++ b/lib/unistr/u8-strcoll.c @@ -29,5 +29,5 @@ #define FUNC u8_strcoll #define UNIT uint8_t #define U_STRCMP u8_strcmp -#define U_STRCONV_TO_LOCALE u8_strconv_to_locale +#define U_STRCONV_TO_ENCODING u8_strconv_to_encoding #include "u-strcoll.h" diff --git a/modules/unistr/u16-strcoll b/modules/unistr/u16-strcoll index 718f7d9e3..d364454f6 100644 --- a/modules/unistr/u16-strcoll +++ b/modules/unistr/u16-strcoll @@ -8,10 +8,11 @@ lib/unistr/u-strcoll.h Depends-on: unistr/base unistr/u16-strcmp -uniconv/u16-strconv-to-locale +uniconv/u16-strconv-to-enc +localcharset configure.ac: -gl_LIBUNISTRING_LIBSOURCE([0.9.3], [unistr/u16-strcoll.c]) +gl_LIBUNISTRING_LIBSOURCE([0.9.4], [unistr/u16-strcoll.c]) Makefile.am: diff --git a/modules/unistr/u32-strcoll b/modules/unistr/u32-strcoll index e2441758d..1cdd59523 100644 --- a/modules/unistr/u32-strcoll +++ b/modules/unistr/u32-strcoll @@ -8,10 +8,11 @@ lib/unistr/u-strcoll.h Depends-on: unistr/base unistr/u32-strcmp -uniconv/u32-strconv-to-locale +uniconv/u32-strconv-to-enc +localcharset configure.ac: -gl_LIBUNISTRING_LIBSOURCE([0.9.3], [unistr/u32-strcoll.c]) +gl_LIBUNISTRING_LIBSOURCE([0.9.4], [unistr/u32-strcoll.c]) Makefile.am: diff --git a/modules/unistr/u8-strcoll b/modules/unistr/u8-strcoll index e28e77193..45d563f55 100644 --- a/modules/unistr/u8-strcoll +++ b/modules/unistr/u8-strcoll @@ -8,10 +8,11 @@ lib/unistr/u-strcoll.h Depends-on: unistr/base unistr/u8-strcmp -uniconv/u8-strconv-to-locale +uniconv/u8-strconv-to-enc +localcharset configure.ac: -gl_LIBUNISTRING_LIBSOURCE([0.9.3], [unistr/u8-strcoll.c]) +gl_LIBUNISTRING_LIBSOURCE([0.9.4], [unistr/u8-strcoll.c]) Makefile.am: -- 2.11.0