X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Funicodeio.c;h=a146569e47eaf41ded178751a5a85e62a4b813cc;hb=609f9cb77296a39e8776d420091a66f2ea5acb72;hp=b70c5b476dc574cd34fa5b36e4a5ce29b45ef946;hpb=582b534b59d786c40680b786c19cd87f646c4304;p=gnulib.git diff --git a/lib/unicodeio.c b/lib/unicodeio.c index b70c5b476..a146569e4 100644 --- a/lib/unicodeio.c +++ b/lib/unicodeio.c @@ -1,6 +1,6 @@ /* Unicode character output to streams with locale dependent encoding. - Copyright (C) 2000 Free Software Foundation, Inc. + Copyright (C) 2000-2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published @@ -19,13 +19,15 @@ /* Written by Bruno Haible . */ +/* Note: This file requires the locale_charset() function. See in + libiconv-1.8/libcharset/INTEGRATE for how to obtain it. */ + #ifdef HAVE_CONFIG_H # include #endif -#if HAVE_STDDEF_H -# include -#endif +/* Specification. */ +#include "unicodeio.h" #include #if HAVE_STRING_H @@ -39,46 +41,17 @@ extern int errno; #endif -#if HAVE_LIMITS_H -# include -#endif -#ifndef MB_LEN_MAX -# define MB_LEN_MAX 1 -#endif - #if HAVE_ICONV # include #endif #include -#if ENABLE_NLS -# include -# define _(Text) gettext (Text) -#else -# define _(Text) Text -#endif - -#include "unicodeio.h" - -#if __STDC_ISO_10646__ && HAVE_WCTOMB - -/* Values of type wchar_t are Unicode code points. */ - -/* Place into BUF the locale-dependent representation of the character - CODE. Return the size of the result. If there is a conversion - error, return -1, setting errno appropriately. Assumes that the - locale doesn't change between two calls. */ -static size_t -convert_unicode_char (char buf[MB_LEN_MAX], unsigned int code) -{ - wchar_t wc = code; - errno = 0; - /* Test for truncation before invoking wctomb. */ - return wc == code ? wctomb (buf, wc) : -1; -} +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid -#else +#include "localcharset.h" /* When we pass a Unicode character to iconv(), we must pass it in a suitable encoding. The standardized Unicode encodings are @@ -129,101 +102,155 @@ utf8_wctomb (unsigned char *r, unsigned int wc) } /* Luckily, the encoding's name is platform independent. */ -# define UTF8_NAME "UTF-8" - -/* Place into BUF the locale-dependent representation of the character - CODE. Return the size of the result. If there is a conversion - error, return -1, setting errno appropriately. Assumes that the - locale doesn't change between two calls. */ -static size_t -convert_unicode_char (char buf[MB_LEN_MAX], unsigned int code) +#define UTF8_NAME "UTF-8" + +/* Converts the Unicode character CODE to its multibyte representation + in the current locale and calls the SUCCESS callback on the resulting + byte sequence. If an error occurs, invokes the FAILURE callback instead, + passing it CODE and an English error string. + Returns whatever the callback returned. + Assumes that the locale doesn't change between two calls. */ +long +unicode_to_mb (unsigned int code, + long (*success) PARAMS ((const char *buf, size_t buflen, + void *callback_arg)), + long (*failure) PARAMS ((unsigned int code, const char *msg, + void *callback_arg)), + void *callback_arg) { static int initialized; static int is_utf8; -# if HAVE_ICONV +#if HAVE_ICONV static iconv_t utf8_to_local; -# endif +#endif + + char inbuf[6]; + int count; if (!initialized) { - extern const char *locale_charset PARAMS ((void)); const char *charset = locale_charset (); - is_utf8 = (charset != NULL && !strcmp (charset, UTF8_NAME)); -# if HAVE_ICONV + is_utf8 = !strcmp (charset, UTF8_NAME); +#if HAVE_ICONV if (!is_utf8) { - utf8_to_local = (charset != NULL - ? iconv_open (charset, UTF8_NAME) - : (iconv_t) -1); - if (utf8_to_local == (iconv_t) -1) - { - /* For an unknown encoding, assume ASCII. */ - utf8_to_local = iconv_open ("ASCII", UTF8_NAME); - if (utf8_to_local == (iconv_t) -1) - { - errno = ENOTSUP; - return -1; - } - } + utf8_to_local = iconv_open (charset, UTF8_NAME); + if (utf8_to_local == (iconv_t)(-1)) + /* For an unknown encoding, assume ASCII. */ + utf8_to_local = iconv_open ("ASCII", UTF8_NAME); } -# endif +#endif initialized = 1; } + /* Test whether the utf8_to_local converter is available at all. */ + if (!is_utf8) + { +#if HAVE_ICONV + if (utf8_to_local == (iconv_t)(-1)) + return failure (code, N_("iconv function not usable"), callback_arg); +#else + return failure (code, N_("iconv function not available"), callback_arg); +#endif + } + /* Convert the character to UTF-8. */ - if (is_utf8) - return utf8_wctomb ((unsigned char *) buf, code); - else + count = utf8_wctomb ((unsigned char *) inbuf, code); + if (count < 0) + return failure (code, N_("character out of range"), callback_arg); + +#if HAVE_ICONV + if (!is_utf8) { -# if HAVE_ICONV - char inbuf[6]; - const char *inptr = inbuf; - size_t inbytesleft = utf8_wctomb ((unsigned char *) inbuf, code); - char *outptr = buf; - size_t outbytesleft = MB_LEN_MAX; + char outbuf[25]; + const char *inptr; + size_t inbytesleft; + char *outptr; + size_t outbytesleft; size_t res; - if (inbytesleft == (size_t) -1) - return -1; + inptr = inbuf; + inbytesleft = count; + outptr = outbuf; + outbytesleft = sizeof (outbuf); /* Convert the character from UTF-8 to the locale's charset. */ - res = iconv (utf8_to_local, &inptr, &inbytesleft, &outptr, &outbytesleft); - if (inbytesleft > 0 || res == (size_t) -1 + res = iconv (utf8_to_local, + (ICONV_CONST char **)&inptr, &inbytesleft, + &outptr, &outbytesleft); + if (inbytesleft > 0 || res == (size_t)(-1) /* Irix iconv() inserts a NUL byte if it cannot convert. */ -# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') -# endif - ) - return -1; +# endif + ) + return failure (code, NULL, callback_arg); /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ -# if defined _LIBICONV_VERSION \ +# if defined _LIBICONV_VERSION \ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) /* Get back to the initial shift state. */ - return iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); -# endif - - return outptr - buf; -# else - errno = ENOTSUP; - return -1; + res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); + if (res == (size_t)(-1)) + return failure (code, NULL, callback_arg); # endif + + return success (outbuf, outptr - outbuf, callback_arg); } +#endif + + /* At this point, is_utf8 is true, so no conversion is needed. */ + return success (inbuf, count, callback_arg); } -#endif +/* Simple success callback that outputs the converted string. + The STREAM is passed as callback_arg. */ +long +fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg) +{ + FILE *stream = (FILE *) callback_arg; -/* Output the Unicode character CODE to the output stream STREAM. */ -void -print_unicode_char (FILE *stream, unsigned int code) + fwrite (buf, 1, buflen, stream); + return 0; +} + +/* Simple failure callback that displays an error and exits. */ +static long +exit_failure_callback (unsigned int code, const char *msg, void *callback_arg) +{ + if (msg == NULL) + error (1, 0, _("cannot convert U+%04X to local character set"), code); + else + error (1, 0, _("cannot convert U+%04X to local character set: %s"), code, + gettext (msg)); + return -1; +} + +/* Simple failure callback that displays a fallback representation in plain + ASCII, using the same notation as ISO C99 strings. */ +static long +fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg) { - char buf[MB_LEN_MAX]; - size_t s = convert_unicode_char (buf, code); + FILE *stream = (FILE *) callback_arg; - if (s == (size_t) -1) - error (1, errno, _("cannot convert U+%04X to local character set"), code); + if (code < 0x10000) + fprintf (stream, "\\u%04X", code); else - fwrite (buf, 1, s, stream); + fprintf (stream, "\\U%08X", code); + return -1; +} + +/* Outputs the Unicode character CODE to the output stream STREAM. + Upon failure, exit if exit_on_error is true, otherwise output a fallback + notation. */ +void +print_unicode_char (FILE *stream, unsigned int code, int exit_on_error) +{ + unicode_to_mb (code, fwrite_success_callback, + exit_on_error + ? exit_failure_callback + : fallback_failure_callback, + stream); }