1 /* Character set conversion with error handling and autodetection.
2 Copyright (C) 2002, 2005, 2007 Free Software Foundation, Inc.
3 Written by Bruno Haible.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "striconveha.h"
28 #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
31 /* Autodetection list. */
33 struct autodetect_alias
35 struct autodetect_alias *next;
37 const char * const *encodings_to_try;
40 static const char * const autodetect_utf8_try[] =
42 /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
43 be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
44 "UTF-8", "ISO-8859-1",
47 static const char * const autodetect_jp_try[] =
49 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
51 Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
52 is unavoidable. People will condemn SHIFT_JIS.
53 If we tried SHIFT_JIS first, then some short EUC-JP inputs would
54 come out wrong, and people would condemn EUC-JP and Unix, which
56 Finally try SHIFT_JIS. */
57 "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS",
60 static const char * const autodetect_kr_try[] =
62 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
64 Finally try EUC-KR. */
65 "ISO-2022-KR", "EUC-KR",
69 static struct autodetect_alias autodetect_predefined[] =
71 { &autodetect_predefined[1], "autodetect_utf8", autodetect_utf8_try },
72 { &autodetect_predefined[2], "autodetect_jp", autodetect_jp_try },
73 { NULL, "autodetect_kr", autodetect_kr_try }
76 static struct autodetect_alias *autodetect_list = &autodetect_predefined[0];
77 static struct autodetect_alias **autodetect_list_end =
78 &autodetect_predefined[SIZEOF(autodetect_predefined)-1].next;
81 uniconv_register_autodetect (const char *name,
82 const char * const *try_in_order)
89 struct autodetect_alias *new_alias;
91 const char **new_try_in_order;
93 /* The TRY_IN_ORDER list must not be empty. */
94 if (try_in_order[0] == NULL)
100 /* We must deep-copy NAME and TRY_IN_ORDER, because they may be allocated
101 with dynamic extent. */
102 namelen = strlen (name) + 1;
103 memneed = sizeof (struct autodetect_alias) + namelen + sizeof (char *);
104 for (i = 0; try_in_order[i] != NULL; i++)
105 memneed += sizeof (char *) + strlen (try_in_order[i]) + 1;
108 memory = (char *) malloc (memneed);
111 new_alias = (struct autodetect_alias *) memory;
112 memory += sizeof (struct autodetect_alias);
114 new_try_in_order = (const char **) memory;
115 memory += (listlen + 1) * sizeof (char *);
117 new_name = (char *) memory;
118 memcpy (new_name, name, namelen);
121 for (i = 0; i < listlen; i++)
123 size_t len = strlen (try_in_order[i]) + 1;
124 memcpy (memory, try_in_order[i], len);
125 new_try_in_order[i] = (const char *) memory;
128 new_try_in_order[i] = NULL;
130 /* Now insert the new alias. */
131 new_alias->name = new_name;
132 new_alias->encodings_to_try = new_try_in_order;
133 new_alias->next = NULL;
134 /* FIXME: Not multithread-safe. */
135 *autodetect_list_end = new_alias;
136 autodetect_list_end = &new_alias->next;
147 mem_iconveha (const char *src, size_t srclen,
148 const char *from_codeset, const char *to_codeset,
149 enum iconv_ilseq_handler handler,
151 char **resultp, size_t *lengthp)
153 int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
154 offsets, resultp, lengthp);
155 if (retval >= 0 || errno != EINVAL)
159 struct autodetect_alias *alias;
161 /* Unsupported from_codeset or to_codeset. Check whether the caller
162 requested autodetection. */
163 for (alias = autodetect_list; alias != NULL; alias = alias->next)
164 if (strcmp (from_codeset, alias->name) == 0)
166 const char * const *encodings;
168 if (handler != iconveh_error)
170 /* First try all encodings without any forgiving. */
171 encodings = alias->encodings_to_try;
174 retval = mem_iconveha (src, srclen,
175 *encodings, to_codeset,
176 iconveh_error, offsets,
178 if (!(retval < 0 && errno == EILSEQ))
182 while (*encodings != NULL);
185 encodings = alias->encodings_to_try;
188 retval = mem_iconveha (src, srclen,
189 *encodings, to_codeset,
192 if (!(retval < 0 && errno == EILSEQ))
196 while (*encodings != NULL);
198 /* Return the last call's result. */
202 /* It wasn't an autodetection name. */
209 str_iconveha (const char *src,
210 const char *from_codeset, const char *to_codeset,
211 enum iconv_ilseq_handler handler)
213 char *result = str_iconveh (src, from_codeset, to_codeset, handler);
215 if (result != NULL || errno != EINVAL)
219 struct autodetect_alias *alias;
221 /* Unsupported from_codeset or to_codeset. Check whether the caller
222 requested autodetection. */
223 for (alias = autodetect_list; alias != NULL; alias = alias->next)
224 if (strcmp (from_codeset, alias->name) == 0)
226 const char * const *encodings;
228 if (handler != iconveh_error)
230 /* First try all encodings without any forgiving. */
231 encodings = alias->encodings_to_try;
234 result = str_iconveha (src,
235 *encodings, to_codeset,
237 if (!(result == NULL && errno == EILSEQ))
241 while (*encodings != NULL);
244 encodings = alias->encodings_to_try;
247 result = str_iconveha (src,
248 *encodings, to_codeset,
250 if (!(result == NULL && errno == EILSEQ))
254 while (*encodings != NULL);
256 /* Return the last call's result. */
260 /* It wasn't an autodetection name. */