X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Ficonv_open.c;h=f1d61b9212717c0c4205222f261430706d87de72;hb=74dd1966a5db708c66201bf952976ef27bd33e7e;hp=f78cbb48d13794d147b55bc4f7bfd7ffdbdd886d;hpb=8119dc1622611078f2fca227cf94d65ab1765ce5;p=gnulib.git diff --git a/lib/iconv_open.c b/lib/iconv_open.c index f78cbb48d..f1d61b921 100644 --- a/lib/iconv_open.c +++ b/lib/iconv_open.c @@ -23,20 +23,23 @@ #include #include #include "c-ctype.h" +#include "c-strcase.h" #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) /* Namespace cleanliness. */ #define mapping_lookup rpl_iconv_open_mapping_lookup -/* The macro ICONV_FLAVOR is defined to one of these. */ +/* The macro ICONV_FLAVOR is defined to one of these or undefined. */ #define ICONV_FLAVOR_AIX "iconv_open-aix.h" #define ICONV_FLAVOR_HPUX "iconv_open-hpux.h" #define ICONV_FLAVOR_IRIX "iconv_open-irix.h" #define ICONV_FLAVOR_OSF "iconv_open-osf.h" -#include ICONV_FLAVOR +#ifdef ICONV_FLAVOR +# include ICONV_FLAVOR +#endif iconv_t rpl_iconv_open (const char *tocode, const char *fromcode) @@ -47,6 +50,59 @@ rpl_iconv_open (const char *tocode, const char *fromcode) char *fromcode_upper_end; char *tocode_upper_end; +#if REPLACE_ICONV_UTF + /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}. + Do this here, before calling the real iconv_open(), because OSF/1 5.1 + iconv() to these encoding inserts a BOM, which is wrong. + We do not need to handle conversion between arbitrary encodings and + UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step + conversion throough UTF-8. + The _ICONV_* constants are chosen to be disjoint from any iconv_t + returned by the system's iconv_open() functions. Recall that iconv_t + is a scalar type. */ + if (c_toupper (fromcode[0]) == 'U' + && c_toupper (fromcode[1]) == 'T' + && c_toupper (fromcode[2]) == 'F' + && fromcode[3] == '-') + { + if (c_toupper (tocode[0]) == 'U' + && c_toupper (tocode[1]) == 'T' + && c_toupper (tocode[2]) == 'F' + && tocode[3] == '-') + { + if (strcmp (fromcode + 4, "8") == 0) + { + if (c_strcasecmp (tocode + 4, "16BE") == 0) + return _ICONV_UTF8_UTF16BE; + if (c_strcasecmp (tocode + 4, "16LE") == 0) + return _ICONV_UTF8_UTF16LE; + if (c_strcasecmp (tocode + 4, "32BE") == 0) + return _ICONV_UTF8_UTF32BE; + if (c_strcasecmp (tocode + 4, "32LE") == 0) + return _ICONV_UTF8_UTF32LE; + } + else if (strcmp (tocode + 4, "8") == 0) + { + if (c_strcasecmp (fromcode + 4, "16BE") == 0) + return _ICONV_UTF16BE_UTF8; + if (c_strcasecmp (fromcode + 4, "16LE") == 0) + return _ICONV_UTF16LE_UTF8; + if (c_strcasecmp (fromcode + 4, "32BE") == 0) + return _ICONV_UTF32BE_UTF8; + if (c_strcasecmp (fromcode + 4, "32LE") == 0) + return _ICONV_UTF32LE_UTF8; + } + } + } +#endif + + /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1 + here. This would lead to programs that work in some locales (such as the + "C" or "en_US" locales) but do not work in East Asian locales. It is + better if programmers make their programs depend on GNU libiconv (except + on glibc systems), e.g. by using the AM_ICONV macro and documenting the + dependency in an INSTALL or DEPENDENCIES file. */ + /* Try with the original names first. This covers the case when fromcode or tocode is a lowercase encoding name that is understood by the system's iconv_open but not listed in our @@ -93,6 +149,7 @@ rpl_iconv_open (const char *tocode, const char *fromcode) tocode_upper_end = q; } +#ifdef ICONV_FLAVOR /* Apply the mappings. */ { const struct mapping *m = @@ -106,6 +163,10 @@ rpl_iconv_open (const char *tocode, const char *fromcode) tocode = (m != NULL ? m->vendor_name : tocode_upper); } +#else + fromcode = fromcode_upper; + tocode = tocode_upper; +#endif return iconv_open (tocode, fromcode); }