X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Funicodeio.c;h=62aee248bb26798895d25489ec04404f468be0f0;hb=43593319b31e6b0175b8eec4433bac744959822d;hp=4f0792bb68696d6ab0c7a5d7fbe138abb59d49ca;hpb=57a1c06115f3e74f70b18301b66b15ad34ecafcf;p=gnulib.git diff --git a/lib/unicodeio.c b/lib/unicodeio.c index 4f0792bb6..62aee248b 100644 --- a/lib/unicodeio.c +++ b/lib/unicodeio.c @@ -1,40 +1,30 @@ /* Unicode character output to streams with locale dependent encoding. - Copyright (C) 2000-2003 Free Software Foundation, Inc. + Copyright (C) 2000-2003, 2006, 2008-2013 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ /* Written by Bruno Haible . */ -/* Note: This file requires the locale_charset() function. See in - libiconv-1.8/libcharset/INTEGRATE for how to obtain it. */ - -#ifdef HAVE_CONFIG_H -# include -#endif +#include /* Specification. */ #include "unicodeio.h" #include #include - #include -#ifndef errno -extern int errno; -#endif #if HAVE_ICONV # include @@ -47,6 +37,7 @@ extern int errno; #define N_(msgid) msgid #include "localcharset.h" +#include "unistr.h" /* When we pass a Unicode character to iconv(), we must pass it in a suitable encoding. The standardized Unicode encodings are @@ -60,42 +51,6 @@ extern int errno; So we use UTF-8. It supports characters up to \U7FFFFFFF and is unambiguously defined. */ -/* Stores the UTF-8 representation of the Unicode character wc in r[0..5]. - Returns the number of bytes stored, or -1 if wc is out of range. */ -static int -utf8_wctomb (unsigned char *r, unsigned int wc) -{ - int count; - - if (wc < 0x80) - count = 1; - else if (wc < 0x800) - count = 2; - else if (wc < 0x10000) - count = 3; - else if (wc < 0x200000) - count = 4; - else if (wc < 0x4000000) - count = 5; - else if (wc <= 0x7fffffff) - count = 6; - else - return -1; - - switch (count) - { - /* Note: code falls through cases! */ - case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000; - case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000; - case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000; - case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800; - case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0; - case 1: r[0] = wc; - } - - return count; -} - /* Luckily, the encoding's name is platform independent. */ #define UTF8_NAME "UTF-8" @@ -107,11 +62,11 @@ utf8_wctomb (unsigned char *r, unsigned int wc) Assumes that the locale doesn't change between two calls. */ long unicode_to_mb (unsigned int code, - long (*success) (const char *buf, size_t buflen, - void *callback_arg), - long (*failure) (unsigned int code, const char *msg, - void *callback_arg), - void *callback_arg) + long (*success) (const char *buf, size_t buflen, + void *callback_arg), + long (*failure) (unsigned int code, const char *msg, + void *callback_arg), + void *callback_arg) { static int initialized; static int is_utf8; @@ -129,12 +84,12 @@ unicode_to_mb (unsigned int code, is_utf8 = !strcmp (charset, UTF8_NAME); #if HAVE_ICONV if (!is_utf8) - { - utf8_to_local = iconv_open (charset, UTF8_NAME); - if (utf8_to_local == (iconv_t)(-1)) - /* For an unknown encoding, assume ASCII. */ - utf8_to_local = iconv_open ("ASCII", UTF8_NAME); - } + { + utf8_to_local = iconv_open (charset, UTF8_NAME); + if (utf8_to_local == (iconv_t)(-1)) + /* For an unknown encoding, assume ASCII. */ + utf8_to_local = iconv_open ("ASCII", UTF8_NAME); + } #endif initialized = 1; } @@ -144,14 +99,14 @@ unicode_to_mb (unsigned int code, { #if HAVE_ICONV if (utf8_to_local == (iconv_t)(-1)) - return failure (code, N_("iconv function not usable"), callback_arg); + return failure (code, N_("iconv function not usable"), callback_arg); #else return failure (code, N_("iconv function not available"), callback_arg); #endif } /* Convert the character to UTF-8. */ - count = utf8_wctomb ((unsigned char *) inbuf, code); + count = u8_uctomb ((unsigned char *) inbuf, code, sizeof (inbuf)); if (count < 0) return failure (code, N_("character out of range"), callback_arg); @@ -172,24 +127,26 @@ unicode_to_mb (unsigned int code, /* Convert the character from UTF-8 to the locale's charset. */ res = iconv (utf8_to_local, - (ICONV_CONST char **)&inptr, &inbytesleft, - &outptr, &outbytesleft); + (ICONV_CONST char **)&inptr, &inbytesleft, + &outptr, &outbytesleft); if (inbytesleft > 0 || res == (size_t)(-1) - /* Irix iconv() inserts a NUL byte if it cannot convert. */ + /* Irix iconv() inserts a NUL byte if it cannot convert. */ # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) - || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') + || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') # endif ) - return failure (code, NULL, callback_arg); + return failure (code, NULL, callback_arg); /* Avoid glibc-2.1 bug and Solaris 7 bug. */ # if defined _LIBICONV_VERSION \ - || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + || !(((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) \ + && !defined __UCLIBC__) \ + || defined __sun) /* Get back to the initial shift state. */ res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); if (res == (size_t)(-1)) - return failure (code, NULL, callback_arg); + return failure (code, NULL, callback_arg); # endif return success (outbuf, outptr - outbuf, callback_arg); @@ -207,26 +164,33 @@ fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg) { FILE *stream = (FILE *) callback_arg; + /* The return value of fwrite can be ignored here, because under normal + conditions (STREAM is an open stream and not wide-character oriented) + when fwrite() returns a value != buflen it also sets STREAM's error + indicator. */ fwrite (buf, 1, buflen, stream); return 0; } /* Simple failure callback that displays an error and exits. */ static long -exit_failure_callback (unsigned int code, const char *msg, void *callback_arg) +exit_failure_callback (unsigned int code, const char *msg, + void *callback_arg _GL_UNUSED) { if (msg == NULL) error (1, 0, _("cannot convert U+%04X to local character set"), code); else error (1, 0, _("cannot convert U+%04X to local character set: %s"), code, - gettext (msg)); + gettext (msg)); return -1; } /* Simple failure callback that displays a fallback representation in plain ASCII, using the same notation as ISO C99 strings. */ static long -fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg) +fallback_failure_callback (unsigned int code, + const char *msg _GL_UNUSED, + void *callback_arg) { FILE *stream = (FILE *) callback_arg; @@ -244,8 +208,8 @@ void print_unicode_char (FILE *stream, unsigned int code, int exit_on_error) { unicode_to_mb (code, fwrite_success_callback, - exit_on_error - ? exit_failure_callback - : fallback_failure_callback, - stream); + exit_on_error + ? exit_failure_callback + : fallback_failure_callback, + stream); }