1 /* Localization of proper names.
2 Copyright (C) 2006-2008 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "propername.h"
37 #include "localcharset.h"
38 #include "c-strcase.h"
39 #include "xstriconv.h"
44 /* Tests whether STRING contains trim (SUB), starting and ending at word
46 Here, instead of implementing Unicode Standard Annex #29 for determining
47 word boundaries, we assume that trim (SUB) starts and ends with words and
48 only test whether the part before it ends with a non-word and the part
49 after it starts with a non-word. */
51 mbsstr_trimmed_wordbounded (const char *string, const char *sub)
53 char *tsub = trim (sub);
56 for (; *string != '\0';)
58 const char *tsub_in_string = mbsstr (string, tsub);
59 if (tsub_in_string == NULL)
66 mbui_iterator_t string_iter;
67 bool word_boundary_before;
68 bool word_boundary_after;
70 mbui_init (string_iter, string);
71 word_boundary_before = true;
72 if (mbui_cur_ptr (string_iter) < tsub_in_string)
74 mbchar_t last_char_before_tsub;
77 if (!mbui_avail (string_iter))
79 last_char_before_tsub = mbui_cur (string_iter);
80 mbui_advance (string_iter);
82 while (mbui_cur_ptr (string_iter) < tsub_in_string);
83 if (mb_isalnum (last_char_before_tsub))
84 word_boundary_before = false;
87 mbui_init (string_iter, tsub_in_string);
89 mbui_iterator_t tsub_iter;
91 for (mbui_init (tsub_iter, tsub);
92 mbui_avail (tsub_iter);
93 mbui_advance (tsub_iter))
95 if (!mbui_avail (string_iter))
97 mbui_advance (string_iter);
100 word_boundary_after = true;
101 if (mbui_avail (string_iter))
103 mbchar_t first_char_after_tsub = mbui_cur (string_iter);
104 if (mb_isalnum (first_char_after_tsub))
105 word_boundary_after = false;
108 if (word_boundary_before && word_boundary_after)
114 mbui_init (string_iter, tsub_in_string);
115 if (!mbui_avail (string_iter))
117 string = tsub_in_string + mb_len (mbui_cur (string_iter));
120 #endif /* HAVE_MBRTOWC */
122 bool word_boundary_before;
124 bool word_boundary_after;
126 word_boundary_before = true;
127 if (string < tsub_in_string)
128 if (isalnum ((unsigned char) tsub_in_string[-1]))
129 word_boundary_before = false;
131 p = tsub_in_string + strlen (tsub);
132 word_boundary_after = true;
134 if (isalnum ((unsigned char) *p))
135 word_boundary_after = false;
137 if (word_boundary_before && word_boundary_after)
143 if (*tsub_in_string == '\0')
145 string = tsub_in_string + 1;
153 /* Return the localization of NAME. NAME is written in ASCII. */
156 proper_name (const char *name)
158 /* See whether there is a translation. */
159 const char *translation = gettext (name);
161 if (translation != name)
163 /* See whether the translation contains the original name. */
164 if (mbsstr_trimmed_wordbounded (translation, name))
168 /* Return "TRANSLATION (NAME)". */
170 XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
172 sprintf (result, "%s (%s)", translation, name);
180 /* Return the localization of a name whose original writing is not ASCII.
181 NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
182 escape sequences. NAME_ASCII is a fallback written only with ASCII
186 proper_name_utf8 (const char *name_ascii, const char *name_utf8)
188 /* See whether there is a translation. */
189 const char *translation = gettext (name_ascii);
191 /* Try to convert NAME_UTF8 to the locale encoding. */
192 const char *locale_code = locale_charset ();
193 char *alloc_name_converted = NULL;
194 char *alloc_name_converted_translit = NULL;
195 const char *name_converted = NULL;
196 const char *name_converted_translit = NULL;
199 if (c_strcasecmp (locale_code, "UTF-8") != 0)
202 name_converted = alloc_name_converted =
203 xstr_iconv (name_utf8, "UTF-8", locale_code);
205 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
206 || _LIBICONV_VERSION >= 0x0105
208 size_t len = strlen (locale_code);
209 char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
210 memcpy (locale_code_translit, locale_code, len);
211 memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
213 name_converted_translit = alloc_name_converted_translit =
214 xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
216 free (locale_code_translit);
223 name_converted = name_utf8;
224 name_converted_translit = name_utf8;
227 /* The name in locale encoding. */
228 name = (name_converted != NULL ? name_converted :
229 name_converted_translit != NULL ? name_converted_translit :
232 if (translation != name_ascii)
234 /* See whether the translation contains the original name. */
235 if (mbsstr_trimmed_wordbounded (translation, name_ascii)
236 || (name_converted != NULL
237 && mbsstr_trimmed_wordbounded (translation, name_converted))
238 || (name_converted_translit != NULL
239 && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
241 if (alloc_name_converted != NULL)
242 free (alloc_name_converted);
243 if (alloc_name_converted_translit != NULL)
244 free (alloc_name_converted_translit);
249 /* Return "TRANSLATION (NAME)". */
251 XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
253 sprintf (result, "%s (%s)", translation, name);
255 if (alloc_name_converted != NULL)
256 free (alloc_name_converted);
257 if (alloc_name_converted_translit != NULL)
258 free (alloc_name_converted_translit);
264 if (alloc_name_converted != NULL && alloc_name_converted != name)
265 free (alloc_name_converted);
266 if (alloc_name_converted_translit != NULL
267 && alloc_name_converted_translit != name)
268 free (alloc_name_converted_translit);
276 main (int argc, char *argv[])
278 setlocale (LC_ALL, "");
279 if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))