1 /* Localization of proper names.
2 Copyright (C) 2006-2011 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2006.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "propername.h"
35 #include "localcharset.h"
36 #include "c-strcase.h"
37 #include "xstriconv.h"
41 /* The attribute __const__ was added in gcc 2.95. */
42 #undef _GL_ATTRIBUTE_CONST
43 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)
44 # define _GL_ATTRIBUTE_CONST __attribute__ ((__const__))
46 # define _GL_ATTRIBUTE_CONST /* empty */
50 /* Tests whether STRING contains trim (SUB), starting and ending at word
52 Here, instead of implementing Unicode Standard Annex #29 for determining
53 word boundaries, we assume that trim (SUB) starts and ends with words and
54 only test whether the part before it ends with a non-word and the part
55 after it starts with a non-word. */
57 mbsstr_trimmed_wordbounded (const char *string, const char *sub)
59 char *tsub = trim (sub);
62 for (; *string != '\0';)
64 const char *tsub_in_string = mbsstr (string, tsub);
65 if (tsub_in_string == NULL)
71 mbui_iterator_t string_iter;
72 bool word_boundary_before;
73 bool word_boundary_after;
75 mbui_init (string_iter, string);
76 word_boundary_before = true;
77 if (mbui_cur_ptr (string_iter) < tsub_in_string)
79 mbchar_t last_char_before_tsub;
82 if (!mbui_avail (string_iter))
84 last_char_before_tsub = mbui_cur (string_iter);
85 mbui_advance (string_iter);
87 while (mbui_cur_ptr (string_iter) < tsub_in_string);
88 if (mb_isalnum (last_char_before_tsub))
89 word_boundary_before = false;
92 mbui_init (string_iter, tsub_in_string);
94 mbui_iterator_t tsub_iter;
96 for (mbui_init (tsub_iter, tsub);
97 mbui_avail (tsub_iter);
98 mbui_advance (tsub_iter))
100 if (!mbui_avail (string_iter))
102 mbui_advance (string_iter);
105 word_boundary_after = true;
106 if (mbui_avail (string_iter))
108 mbchar_t first_char_after_tsub = mbui_cur (string_iter);
109 if (mb_isalnum (first_char_after_tsub))
110 word_boundary_after = false;
113 if (word_boundary_before && word_boundary_after)
119 mbui_init (string_iter, tsub_in_string);
120 if (!mbui_avail (string_iter))
122 string = tsub_in_string + mb_len (mbui_cur (string_iter));
126 bool word_boundary_before;
128 bool word_boundary_after;
130 word_boundary_before = true;
131 if (string < tsub_in_string)
132 if (isalnum ((unsigned char) tsub_in_string[-1]))
133 word_boundary_before = false;
135 p = tsub_in_string + strlen (tsub);
136 word_boundary_after = true;
138 if (isalnum ((unsigned char) *p))
139 word_boundary_after = false;
141 if (word_boundary_before && word_boundary_after)
147 if (*tsub_in_string == '\0')
149 string = tsub_in_string + 1;
157 /* Return the localization of NAME. NAME is written in ASCII. */
159 const char * _GL_ATTRIBUTE_CONST
160 proper_name (const char *name)
162 /* See whether there is a translation. */
163 const char *translation = gettext (name);
165 if (translation != name)
167 /* See whether the translation contains the original name. */
168 if (mbsstr_trimmed_wordbounded (translation, name))
172 /* Return "TRANSLATION (NAME)". */
174 XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
176 sprintf (result, "%s (%s)", translation, name);
184 /* Return the localization of a name whose original writing is not ASCII.
185 NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
186 escape sequences. NAME_ASCII is a fallback written only with ASCII
190 proper_name_utf8 (const char *name_ascii, const char *name_utf8)
192 /* See whether there is a translation. */
193 const char *translation = gettext (name_ascii);
195 /* Try to convert NAME_UTF8 to the locale encoding. */
196 const char *locale_code = locale_charset ();
197 char *alloc_name_converted = NULL;
198 char *alloc_name_converted_translit = NULL;
199 const char *name_converted = NULL;
200 const char *name_converted_translit = NULL;
203 if (c_strcasecmp (locale_code, "UTF-8") != 0)
206 name_converted = alloc_name_converted =
207 xstr_iconv (name_utf8, "UTF-8", locale_code);
209 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
210 && !defined __UCLIBC__) \
211 || _LIBICONV_VERSION >= 0x0105
213 char *converted_translit;
215 size_t len = strlen (locale_code);
216 char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
217 memcpy (locale_code_translit, locale_code, len);
218 memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
221 xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
223 free (locale_code_translit);
225 if (converted_translit != NULL)
227 # if !_LIBICONV_VERSION
228 /* Don't use the transliteration if it added question marks.
229 glibc's transliteration falls back to question marks; libiconv's
230 transliteration does not.
231 mbschr is equivalent to strchr in this case. */
232 if (strchr (converted_translit, '?') != NULL)
233 free (converted_translit);
236 name_converted_translit = alloc_name_converted_translit =
245 name_converted = name_utf8;
246 name_converted_translit = name_utf8;
249 /* The name in locale encoding. */
250 name = (name_converted != NULL ? name_converted :
251 name_converted_translit != NULL ? name_converted_translit :
254 /* See whether we have a translation. Some translators have not understood
255 that they should use the UTF-8 form of the name, if possible. So if the
256 translator provided a no-op translation, we ignore it. */
257 if (strcmp (translation, name_ascii) != 0)
259 /* See whether the translation contains the original name. */
260 if (mbsstr_trimmed_wordbounded (translation, name_ascii)
261 || (name_converted != NULL
262 && mbsstr_trimmed_wordbounded (translation, name_converted))
263 || (name_converted_translit != NULL
264 && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
266 if (alloc_name_converted != NULL)
267 free (alloc_name_converted);
268 if (alloc_name_converted_translit != NULL)
269 free (alloc_name_converted_translit);
274 /* Return "TRANSLATION (NAME)". */
276 XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
278 sprintf (result, "%s (%s)", translation, name);
280 if (alloc_name_converted != NULL)
281 free (alloc_name_converted);
282 if (alloc_name_converted_translit != NULL)
283 free (alloc_name_converted_translit);
289 if (alloc_name_converted != NULL && alloc_name_converted != name)
290 free (alloc_name_converted);
291 if (alloc_name_converted_translit != NULL
292 && alloc_name_converted_translit != name)
293 free (alloc_name_converted_translit);
301 main (int argc, char *argv[])
303 setlocale (LC_ALL, "");
304 if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
314 main (int argc, char *argv[])
316 setlocale (LC_ALL, "");
317 printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));