X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Funicodeio.c;h=9df8b79f84998f7c870f4efc903e86002df7ce3d;hb=1276a2c5f24c0c932426aca9c899fa524d2443f2;hp=13888529258acbdaf60e30cbdec5b3c76c6a85c7;hpb=45fb04778c3cb4f91da291c214461deb45e7c857;p=gnulib.git diff --git a/lib/unicodeio.c b/lib/unicodeio.c index 138885292..9df8b79f8 100644 --- a/lib/unicodeio.c +++ b/lib/unicodeio.c @@ -1,43 +1,30 @@ /* Unicode character output to streams with locale dependent encoding. - Copyright (C) 2000 Free Software Foundation, Inc. + Copyright (C) 2000-2003, 2006, 2008-2014 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public License as published - by the Free Software Foundation; either version 2, or (at your option) - any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU Library General Public - License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ /* Written by Bruno Haible . */ -#ifdef HAVE_CONFIG_H -# include -#endif +#include -#if HAVE_STDDEF_H -# include -#endif +/* Specification. */ +#include "unicodeio.h" #include -#if HAVE_STRING_H -# include -#else -# include -#endif - +#include #include -#ifndef errno -extern int errno; -#endif #if HAVE_ICONV # include @@ -45,14 +32,12 @@ extern int errno; #include -#if ENABLE_NLS -# include -# define _(Text) gettext (Text) -#else -# define _(Text) Text -#endif +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid -#include "unicodeio.h" +#include "localcharset.h" +#include "unistr.h" /* When we pass a Unicode character to iconv(), we must pass it in a suitable encoding. The standardized Unicode encodings are @@ -66,49 +51,22 @@ extern int errno; So we use UTF-8. It supports characters up to \U7FFFFFFF and is unambiguously defined. */ -/* Stores the UTF-8 representation of the Unicode character wc in r[0..5]. - Returns the number of bytes stored, or -1 if wc is out of range. */ -static int -utf8_wctomb (unsigned char *r, unsigned int wc) -{ - int count; - - if (wc < 0x80) - count = 1; - else if (wc < 0x800) - count = 2; - else if (wc < 0x10000) - count = 3; - else if (wc < 0x200000) - count = 4; - else if (wc < 0x4000000) - count = 5; - else if (wc <= 0x7fffffff) - count = 6; - else - return -1; - - switch (count) - { - /* Note: code falls through cases! */ - case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000; - case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000; - case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000; - case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800; - case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0; - case 1: r[0] = wc; - } - - return count; -} - /* Luckily, the encoding's name is platform independent. */ #define UTF8_NAME "UTF-8" -/* Outputs the Unicode character CODE to the output stream STREAM. +/* Converts the Unicode character CODE to its multibyte representation + in the current locale and calls the SUCCESS callback on the resulting + byte sequence. If an error occurs, invokes the FAILURE callback instead, + passing it CODE and an English error string. + Returns whatever the callback returned. Assumes that the locale doesn't change between two calls. */ -void -print_unicode_char (FILE *stream, unsigned int code) +long +unicode_to_mb (unsigned int code, + long (*success) (const char *buf, size_t buflen, + void *callback_arg), + long (*failure) (unsigned int code, const char *msg, + void *callback_arg), + void *callback_arg) { static int initialized; static int is_utf8; @@ -121,42 +79,40 @@ print_unicode_char (FILE *stream, unsigned int code) if (!initialized) { - extern const char *locale_charset PARAMS ((void)); const char *charset = locale_charset (); - is_utf8 = (charset != NULL && !strcmp (charset, UTF8_NAME)); + is_utf8 = !strcmp (charset, UTF8_NAME); #if HAVE_ICONV if (!is_utf8) - { - utf8_to_local = (charset != NULL - ? iconv_open (charset, UTF8_NAME) - : (iconv_t)(-1)); - if (utf8_to_local == (iconv_t)(-1)) - { - /* For an unknown encoding, assume ASCII. */ - utf8_to_local = iconv_open ("ASCII", UTF8_NAME); - if (utf8_to_local == (iconv_t)(-1)) - error (1, 0, - _("cannot output U+%04X: iconv function not usable"), - code); - } - } + { + utf8_to_local = iconv_open (charset, UTF8_NAME); + if (utf8_to_local == (iconv_t)(-1)) + /* For an unknown encoding, assume ASCII. */ + utf8_to_local = iconv_open ("ASCII", UTF8_NAME); + } #endif initialized = 1; } + /* Test whether the utf8_to_local converter is available at all. */ + if (!is_utf8) + { +#if HAVE_ICONV + if (utf8_to_local == (iconv_t)(-1)) + return failure (code, N_("iconv function not usable"), callback_arg); +#else + return failure (code, N_("iconv function not available"), callback_arg); +#endif + } + /* Convert the character to UTF-8. */ - count = utf8_wctomb ((unsigned char *) inbuf, code); + count = u8_uctomb ((unsigned char *) inbuf, code, sizeof (inbuf)); if (count < 0) - error (1, 0, _("U+%04X: character out of range"), code); + return failure (code, N_("character out of range"), callback_arg); - if (is_utf8) - { - fwrite (inbuf, 1, count, stream); - } - else - { #if HAVE_ICONV + if (!is_utf8) + { char outbuf[25]; const char *inptr; size_t inbytesleft; @@ -170,31 +126,90 @@ print_unicode_char (FILE *stream, unsigned int code) outbytesleft = sizeof (outbuf); /* Convert the character from UTF-8 to the locale's charset. */ - res = iconv (utf8_to_local, &inptr, &inbytesleft, &outptr, &outbytesleft); + res = iconv (utf8_to_local, + (ICONV_CONST char **)&inptr, &inbytesleft, + &outptr, &outbytesleft); if (inbytesleft > 0 || res == (size_t)(-1) - /* Irix iconv() inserts a NUL byte if it cannot convert. */ + /* Irix iconv() inserts a NUL byte if it cannot convert. */ # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) - || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') + || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') # endif ) - error (1, res == (size_t)(-1) ? errno : 0, - _("cannot convert U+%04X to local character set"), code); + return failure (code, NULL, callback_arg); - /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ + /* Avoid glibc-2.1 bug and Solaris 7 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) \ + && !defined __UCLIBC__) \ + || defined __sun) /* Get back to the initial shift state. */ res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); if (res == (size_t)(-1)) - error (1, errno, _("cannot convert U+%04X to local character set"), - code); + return failure (code, NULL, callback_arg); # endif - fwrite (outbuf, 1, outptr - outbuf, stream); -#else - error (1, 0, _("cannot output U+%04X: iconv function not available"), - code); -#endif + return success (outbuf, outptr - outbuf, callback_arg); } +#endif + + /* At this point, is_utf8 is true, so no conversion is needed. */ + return success (inbuf, count, callback_arg); +} + +/* Simple success callback that outputs the converted string. + The STREAM is passed as callback_arg. */ +long +fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg) +{ + FILE *stream = (FILE *) callback_arg; + + /* The return value of fwrite can be ignored here, because under normal + conditions (STREAM is an open stream and not wide-character oriented) + when fwrite() returns a value != buflen it also sets STREAM's error + indicator. */ + fwrite (buf, 1, buflen, stream); + return 0; +} + +/* Simple failure callback that displays an error and exits. */ +static long +exit_failure_callback (unsigned int code, const char *msg, + void *callback_arg _GL_UNUSED) +{ + if (msg == NULL) + error (1, 0, _("cannot convert U+%04X to local character set"), code); + else + error (1, 0, _("cannot convert U+%04X to local character set: %s"), code, + gettext (msg)); + return -1; +} + +/* Simple failure callback that displays a fallback representation in plain + ASCII, using the same notation as ISO C99 strings. */ +static long +fallback_failure_callback (unsigned int code, + const char *msg _GL_UNUSED, + void *callback_arg) +{ + FILE *stream = (FILE *) callback_arg; + + if (code < 0x10000) + fprintf (stream, "\\u%04X", code); + else + fprintf (stream, "\\U%08X", code); + return -1; +} + +/* Outputs the Unicode character CODE to the output stream STREAM. + Upon failure, exit if exit_on_error is true, otherwise output a fallback + notation. */ +void +print_unicode_char (FILE *stream, unsigned int code, int exit_on_error) +{ + unicode_to_mb (code, fwrite_success_callback, + exit_on_error + ? exit_failure_callback + : fallback_failure_callback, + stream); }