Improve name: "count-one-bits" is better than "popcount".
[gnulib.git] / lib / striconveha.c
index 9da18c9..c25249a 100644 (file)
@@ -25,6 +25,9 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "malloca.h"
+#include "c-strcase.h"
+
 #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
 
 
@@ -143,14 +146,16 @@ uniconv_register_autodetect (const char *name,
     }
 }
 
-int
-mem_iconveha (const char *src, size_t srclen,
-             const char *from_codeset, const char *to_codeset,
-             enum iconv_ilseq_handler handler,
-             char **resultp, size_t *lengthp)
+/* Like mem_iconveha, except no handling of transliteration.  */
+static int
+mem_iconveha_notranslit (const char *src, size_t srclen,
+                        const char *from_codeset, const char *to_codeset,
+                        enum iconv_ilseq_handler handler,
+                        size_t *offsets,
+                        char **resultp, size_t *lengthp)
 {
   int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
-                           resultp, lengthp);
+                           offsets, resultp, lengthp);
   if (retval >= 0 || errno != EINVAL)
     return retval;
   else
@@ -162,13 +167,32 @@ mem_iconveha (const char *src, size_t srclen,
       for (alias = autodetect_list; alias != NULL; alias = alias->next)
        if (strcmp (from_codeset, alias->name) == 0)
          {
-           const char * const *encodings = alias->encodings_to_try;
+           const char * const *encodings;
 
+           if (handler != iconveh_error)
+             {
+               /* First try all encodings without any forgiving.  */
+               encodings = alias->encodings_to_try;
+               do
+                 {
+                   retval = mem_iconveha_notranslit (src, srclen,
+                                                     *encodings, to_codeset,
+                                                     iconveh_error, offsets,
+                                                     resultp, lengthp);
+                   if (!(retval < 0 && errno == EILSEQ))
+                     return retval;
+                   encodings++;
+                 }
+               while (*encodings != NULL);
+             }
+
+           encodings = alias->encodings_to_try;
            do
              {
-               retval = mem_iconveha (src, srclen,
-                                      from_codeset, to_codeset, handler,
-                                      resultp, lengthp);
+               retval = mem_iconveha_notranslit (src, srclen,
+                                                 *encodings, to_codeset,
+                                                 handler, offsets,
+                                                 resultp, lengthp);
                if (!(retval < 0 && errno == EILSEQ))
                  return retval;
                encodings++;
@@ -185,10 +209,52 @@ mem_iconveha (const char *src, size_t srclen,
     }
 }
 
-char *
-str_iconveha (const char *src,
+int
+mem_iconveha (const char *src, size_t srclen,
              const char *from_codeset, const char *to_codeset,
-             enum iconv_ilseq_handler handler)
+             bool transliterate,
+             enum iconv_ilseq_handler handler,
+             size_t *offsets,
+             char **resultp, size_t *lengthp)
+{
+  if (srclen == 0)
+    {
+      /* Nothing to convert.  */
+      *lengthp = 0;
+      return 0;
+    }
+
+  /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+     we want to use transliteration.  */
+#if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+  if (transliterate)
+    {
+      int retval;
+      size_t len = strlen (to_codeset);
+      char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
+      memcpy (to_codeset_suffixed, to_codeset, len);
+      memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
+
+      retval = mem_iconveha_notranslit (src, srclen,
+                                       from_codeset, to_codeset_suffixed,
+                                       handler, offsets, resultp, lengthp);
+
+      freea (to_codeset_suffixed);
+
+      return retval;
+    }
+  else
+#endif
+    return mem_iconveha_notranslit (src, srclen,
+                                   from_codeset, to_codeset,
+                                   handler, offsets, resultp, lengthp);
+}
+
+/* Like str_iconveha, except no handling of transliteration.  */
+static char *
+str_iconveha_notranslit (const char *src,
+                        const char *from_codeset, const char *to_codeset,
+                        enum iconv_ilseq_handler handler)
 {
   char *result = str_iconveh (src, from_codeset, to_codeset, handler);
 
@@ -203,11 +269,30 @@ str_iconveha (const char *src,
       for (alias = autodetect_list; alias != NULL; alias = alias->next)
        if (strcmp (from_codeset, alias->name) == 0)
          {
-           const char * const *encodings = alias->encodings_to_try;
+           const char * const *encodings;
+
+           if (handler != iconveh_error)
+             {
+               /* First try all encodings without any forgiving.  */
+               encodings = alias->encodings_to_try;
+               do
+                 {
+                   result = str_iconveha_notranslit (src,
+                                                     *encodings, to_codeset,
+                                                     iconveh_error);
+                   if (!(result == NULL && errno == EILSEQ))
+                     return result;
+                   encodings++;
+                 }
+               while (*encodings != NULL);
+             }
 
+           encodings = alias->encodings_to_try;
            do
              {
-               result = str_iconveha (src, *encodings, to_codeset, handler);
+               result = str_iconveha_notranslit (src,
+                                                 *encodings, to_codeset,
+                                                 handler);
                if (!(result == NULL && errno == EILSEQ))
                  return result;
                encodings++;
@@ -223,3 +308,41 @@ str_iconveha (const char *src,
       return NULL;
     }
 }
+
+char *
+str_iconveha (const char *src,
+             const char *from_codeset, const char *to_codeset,
+             bool transliterate,
+             enum iconv_ilseq_handler handler)
+{
+  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
+    {
+      char *result = strdup (src);
+
+      if (result == NULL)
+       errno = ENOMEM;
+      return result;
+    }
+
+  /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+     we want to use transliteration.  */
+#if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+  if (transliterate)
+    {
+      char *result;
+      size_t len = strlen (to_codeset);
+      char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
+      memcpy (to_codeset_suffixed, to_codeset, len);
+      memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
+
+      result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed,
+                                       handler);
+
+      freea (to_codeset_suffixed);
+
+      return result;
+    }
+  else
+#endif
+    return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);
+}