1 /* Line breaking auxiliary functions.
2 Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "unilbrk/ulc-common.h"
29 is_utf8_encoding (const char *encoding)
31 if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0))
41 iconv_string_length (iconv_t cd, const char *s, size_t n)
43 # define TMPBUFSIZE 4096
45 char tmpbuf[TMPBUFSIZE];
46 const char *inptr = s;
51 char *outptr = tmpbuf;
52 size_t outsize = TMPBUFSIZE;
53 size_t res = iconv (cd, (ICONV_CONST char **) &inptr, &insize, &outptr, &outsize);
54 if (res == (size_t)(-1) && errno != E2BIG
55 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
56 /* Irix iconv() inserts a NUL byte if it cannot convert.
57 NetBSD iconv() inserts a question mark if it cannot convert.
58 Only GNU libiconv and GNU libc are known to prefer to fail rather
59 than doing a lossy conversion. */
64 count += outptr - tmpbuf;
66 /* Avoid glibc-2.1 bug and Solaris 7 through 9 bug. */
67 # if defined _LIBICONV_VERSION \
68 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
70 char *outptr = tmpbuf;
71 size_t outsize = TMPBUFSIZE;
72 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
73 if (res == (size_t)(-1))
75 count += outptr - tmpbuf;
77 /* Return to the initial state. */
78 iconv (cd, NULL, NULL, NULL, NULL);
85 iconv_string_keeping_offsets (iconv_t cd, const char *s, size_t n,
86 size_t *offtable, char *t, size_t m)
93 /* Avoid glibc-2.1 bug. */
94 # if !defined _LIBICONV_VERSION && (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1)
95 const size_t extra = 1;
97 const size_t extra = 0;
100 for (i = 0; i < n; i++)
101 offtable[i] = (size_t)(-1);
107 while (inptr < s_end)
109 const char *saved_inptr;
113 offtable[inptr - s] = outptr - t;
117 for (insize = 1; inptr + insize <= s_end; insize++)
119 res = iconv (cd, (ICONV_CONST char **) &inptr, &insize, &outptr, &outsize);
120 if (!(res == (size_t)(-1) && errno == EINVAL))
122 /* We expect that no input bytes have been consumed so far. */
123 if (inptr != saved_inptr)
126 /* After we verified the convertibility and computed the translation's
127 size m, there shouldn't be any conversion error here. */
128 if (res == (size_t)(-1)
129 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
130 /* Irix iconv() inserts a NUL byte if it cannot convert.
131 NetBSD iconv() inserts a question mark if it cannot convert.
132 Only GNU libiconv and GNU libc are known to prefer to fail rather
133 than doing a lossy conversion. */
139 /* Avoid glibc-2.1 bug and Solaris 7 bug. */
140 # if defined _LIBICONV_VERSION \
141 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
142 if (iconv (cd, NULL, NULL, &outptr, &outsize) == (size_t)(-1))
145 /* We should have produced exactly m output bytes. */
146 if (outsize != extra)
150 #endif /* HAVE_ICONV */
154 /* Tests whether a string is entirely ASCII. Returns 1 if yes.
155 Returns 0 if the string is in an 8-bit encoding or an ISO-2022 encoding. */
157 is_all_ascii (const char *s, size_t n)
159 for (; n > 0; s++, n--)
161 unsigned char c = (unsigned char) *s;
163 if (!(c_isprint (c) || c_isspace (c)))
169 #endif /* C_CTYPE_ASCII */