Test indirect conversion as well.
[gnulib.git] / tests / test-striconveh.c
index 2557baa..22b4875 100644 (file)
@@ -1,5 +1,5 @@
 /* Test of character set conversion with error handling.
-   Copyright (C) 2007-2008 Free Software Foundation, Inc.
+   Copyright (C) 2007-2009 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -58,6 +58,7 @@ main ()
 {
   static enum iconv_ilseq_handler handlers[] =
     { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
+  size_t indirect;
   size_t h;
   size_t o;
   size_t i;
@@ -71,102 +72,136 @@ main ()
   iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
   iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
   iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
+  iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
+  iconveh_t cdeh_88592_to_88591;
+  iconveh_t cdeh_88592_to_88591_indirectly;
+  iconveh_t cdeh_88591_to_utf8;
+  iconveh_t cdeh_utf8_to_88591;
+  iconveh_t cdeh_utf7_to_utf8;
 
   ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
   ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
   ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
   ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
 
+  cdeh_88592_to_88591.cd = cd_88592_to_88591;
+  cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
+  cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
+
+  cdeh_88592_to_88591_indirectly.cd = (iconv_t)(-1);
+  cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8;
+  cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591;
+
+  cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
+  cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
+  cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
+
+  cdeh_utf8_to_88591.cd = cd_utf8_to_88591;
+  cdeh_utf8_to_88591.cd1 = (iconv_t)(-1);
+  cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591;
+
+  cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8;
+  cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
+  cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
+
   /* ------------------------ Test mem_cd_iconveh() ------------------------ */
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */
-  for (h = 0; h < SIZEOF (handlers); h++)
+  for (indirect = 0; indirect <= 1; indirect++)
     {
-      enum iconv_ilseq_handler handler = handlers[h];
-      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      for (o = 0; o < 2; o++)
+      for (h = 0; h < SIZEOF (handlers); h++)
        {
-         size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
-         char *result = NULL;
-         size_t length = 0;
-         int retval = mem_cd_iconveh (input, strlen (input),
-                                      cd_88592_to_88591,
-                                      cd_88592_to_utf8, cd_utf8_to_88591,
-                                      handler,
-                                      offsets,
-                                      &result, &length);
-         ASSERT (retval == 0);
-         ASSERT (length == strlen (expected));
-         ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-         if (o)
+         enum iconv_ilseq_handler handler = handlers[h];
+         static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+         static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+         for (o = 0; o < 2; o++)
            {
-             for (i = 0; i < 37; i++)
-               ASSERT (offsets[i] == i);
-             ASSERT (offsets[37] == MAGIC);
-             free (offsets);
+             size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+             char *result = NULL;
+             size_t length = 0;
+             int retval = mem_cd_iconveh (input, strlen (input),
+                                          (indirect
+                                           ? &cdeh_88592_to_88591_indirectly
+                                           : &cdeh_88592_to_88591),
+                                          handler,
+                                          offsets,
+                                          &result, &length);
+             ASSERT (retval == 0);
+             ASSERT (length == strlen (expected));
+             ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+             if (o)
+               {
+                 for (i = 0; i < 37; i++)
+                   ASSERT (offsets[i] == i);
+                 ASSERT (offsets[37] == MAGIC);
+                 free (offsets);
+               }
+             free (result);
            }
-         free (result);
        }
     }
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
-  for (h = 0; h < SIZEOF (handlers); h++)
+  for (indirect = 0; indirect <= 1; indirect++)
     {
-      enum iconv_ilseq_handler handler = handlers[h];
-      static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
-      for (o = 0; o < 2; o++)
+      for (h = 0; h < SIZEOF (handlers); h++)
        {
-         size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
-         char *result = NULL;
-         size_t length = 0;
-         int retval = mem_cd_iconveh (input, strlen (input),
-                                      cd_88592_to_88591,
-                                      cd_88592_to_utf8, cd_utf8_to_88591,
-                                      handler,
-                                      offsets,
-                                      &result, &length);
-         switch (handler)
+         enum iconv_ilseq_handler handler = handlers[h];
+         static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
+         for (o = 0; o < 2; o++)
            {
-           case iconveh_error:
-             ASSERT (retval == -1 && errno == EILSEQ);
-             ASSERT (result == NULL);
-             if (o)
-               free (offsets);
-             break;
-           case iconveh_question_mark:
-             {
-               static const char expected[] = "Rafa? Maszkowski";
-               ASSERT (retval == 0);
-               ASSERT (length == strlen (expected));
-               ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-               if (o)
-                 {
-                   for (i = 0; i < 16; i++)
-                     ASSERT (offsets[i] == i);
-                   ASSERT (offsets[16] == MAGIC);
+             size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+             char *result = NULL;
+             size_t length = 0;
+             int retval = mem_cd_iconveh (input, strlen (input),
+                                          (indirect
+                                           ? &cdeh_88592_to_88591_indirectly
+                                           : &cdeh_88592_to_88591),
+                                          handler,
+                                          offsets,
+                                          &result, &length);
+             switch (handler)
+               {
+               case iconveh_error:
+                 ASSERT (retval == -1 && errno == EILSEQ);
+                 ASSERT (result == NULL);
+                 if (o)
                    free (offsets);
+                 break;
+               case iconveh_question_mark:
+                 {
+                   static const char expected[] = "Rafa? Maszkowski";
+                   ASSERT (retval == 0);
+                   ASSERT (length == strlen (expected));
+                   ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+                   if (o)
+                     {
+                       for (i = 0; i < 16; i++)
+                         ASSERT (offsets[i] == i);
+                       ASSERT (offsets[16] == MAGIC);
+                       free (offsets);
+                     }
+                   free (result);
                  }
-               free (result);
-             }
-             break;
-           case iconveh_escape_sequence:
-             {
-               static const char expected[] = "Rafa\\u0142 Maszkowski";
-               ASSERT (retval == 0);
-               ASSERT (length == strlen (expected));
-               ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-               if (o)
+                 break;
+               case iconveh_escape_sequence:
                  {
-                   for (i = 0; i < 16; i++)
-                     ASSERT (offsets[i] == (i < 5 ? i :
-                                            i + 5));
-                   ASSERT (offsets[16] == MAGIC);
-                   free (offsets);
+                   static const char expected[] = "Rafa\\u0142 Maszkowski";
+                   ASSERT (retval == 0);
+                   ASSERT (length == strlen (expected));
+                   ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+                   if (o)
+                     {
+                       for (i = 0; i < 16; i++)
+                         ASSERT (offsets[i] == (i < 5 ? i :
+                                                i + 5));
+                       ASSERT (offsets[16] == MAGIC);
+                       free (offsets);
+                     }
+                   free (result);
                  }
-               free (result);
-             }
-             break;
+                 break;
+               }
            }
        }
     }
@@ -183,8 +218,7 @@ main ()
          char *result = NULL;
          size_t length = 0;
          int retval = mem_cd_iconveh (input, strlen (input),
-                                      cd_88591_to_utf8,
-                                      cd_88591_to_utf8, (iconv_t)(-1),
+                                      &cdeh_88591_to_utf8,
                                       handler,
                                       offsets,
                                       &result, &length);
@@ -217,8 +251,7 @@ main ()
          char *result = NULL;
          size_t length = 0;
          int retval = mem_cd_iconveh (input, strlen (input),
-                                      cd_utf8_to_88591,
-                                      (iconv_t)(-1), cd_utf8_to_88591,
+                                      &cdeh_utf8_to_88591,
                                       handler,
                                       offsets,
                                       &result, &length);
@@ -254,8 +287,7 @@ main ()
          char *result = NULL;
          size_t length = 0;
          int retval = mem_cd_iconveh (input, strlen (input),
-                                      cd_utf8_to_88591,
-                                      (iconv_t)(-1), cd_utf8_to_88591,
+                                      &cdeh_utf8_to_88591,
                                       handler,
                                       offsets,
                                       &result, &length);
@@ -318,8 +350,7 @@ main ()
          char *result = NULL;
          size_t length = 0;
          int retval = mem_cd_iconveh (input, strlen (input),
-                                      cd_utf8_to_88591,
-                                      (iconv_t)(-1), cd_utf8_to_88591,
+                                      &cdeh_utf8_to_88591,
                                       handler,
                                       offsets,
                                       &result, &length);
@@ -335,53 +366,142 @@ main ()
        }
     }
 
+  if (cd_utf7_to_utf8 != (iconv_t)(-1))
+    {
+      /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
+        -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA".  */
+# if !(defined __sun && !defined _LIBICONV_VERSION)
+      /* Test conversion from UTF-7 to UTF-8 with EINVAL.  */
+      for (h = 0; h < SIZEOF (handlers); h++)
+       {
+         enum iconv_ilseq_handler handler = handlers[h];
+         /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40.  It would
+            convert to U+5432 U+D83F U+D840 but these are Unicode surrogates.  */
+         static const char input[] = "+VDLYP9hA";
+         static const char expected1[] = "\345\220\262"; /* 吲 glibc */
+         static const char expected2[] = ""; /* libiconv */
+         char *result = NULL;
+         size_t length = 0;
+         int retval = mem_cd_iconveh (input, 7,
+                                      &cdeh_utf7_to_utf8,
+                                      handler,
+                                      NULL,
+                                      &result, &length);
+         ASSERT (retval == 0);
+         ASSERT (length == strlen (expected1) || length == strlen (expected2));
+         ASSERT (result != NULL);
+         if (length == strlen (expected1))
+           ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
+         else
+           ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
+         free (result);
+       }
+
+      /* Test conversion from UTF-7 to UTF-8 with EILSEQ.  */
+      for (h = 0; h < SIZEOF (handlers); h++)
+       {
+         enum iconv_ilseq_handler handler = handlers[h];
+         /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41.  It would
+            convert to U+D83F U+D840 U+D841 but these are Unicode surrogates.  */
+         static const char input[] = "+2D/YQNhB";
+         char *result = NULL;
+         size_t length = 0;
+         int retval = mem_cd_iconveh (input, strlen (input),
+                                      &cdeh_utf7_to_utf8,
+                                      handler,
+                                      NULL,
+                                      &result, &length);
+         switch (handler)
+           {
+           case iconveh_error:
+             ASSERT (retval == -1 && errno == EILSEQ);
+             ASSERT (result == NULL);
+             break;
+           case iconveh_question_mark:
+           case iconveh_escape_sequence:
+             {
+               /* glibc result */
+               static const char expected1[] = "?????";
+               /* libiconv <= 1.12 result */
+               static const char expected2[] = "?2D/YQNhB";
+               /* libiconv behaviour changed in version 1.13: the result is
+                  '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
+                  by 6 bits.  */
+               static const char expected3[] = "?\340\277\266\341\200\266";
+               ASSERT (retval == 0);
+               ASSERT (length == strlen (expected1)
+                       || length == strlen (expected2)
+                       || length == strlen (expected3));
+               ASSERT (result != NULL);
+               if (length == strlen (expected1))
+                 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
+               else if (length == strlen (expected2))
+                 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
+               else
+                 ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
+               free (result);
+             }
+             break;
+           }
+       }
+# endif
+    }
+
   /* ------------------------ Test str_cd_iconveh() ------------------------ */
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors.  */
-  for (h = 0; h < SIZEOF (handlers); h++)
+  for (indirect = 0; indirect <= 1; indirect++)
     {
-      enum iconv_ilseq_handler handler = handlers[h];
-      static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      char *result = str_cd_iconveh (input,
-                                    cd_88592_to_88591,
-                                    cd_88592_to_utf8, cd_utf8_to_88591,
-                                    handler);
-      ASSERT (result != NULL);
-      ASSERT (strcmp (result, expected) == 0);
-      free (result);
+      for (h = 0; h < SIZEOF (handlers); h++)
+       {
+         enum iconv_ilseq_handler handler = handlers[h];
+         static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+         static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
+         char *result = str_cd_iconveh (input,
+                                        (indirect
+                                         ? &cdeh_88592_to_88591_indirectly
+                                         : &cdeh_88592_to_88591),
+                                        handler);
+         ASSERT (result != NULL);
+         ASSERT (strcmp (result, expected) == 0);
+         free (result);
+       }
     }
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
-  for (h = 0; h < SIZEOF (handlers); h++)
+  for (indirect = 0; indirect <= 1; indirect++)
     {
-      enum iconv_ilseq_handler handler = handlers[h];
-      static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
-      char *result = str_cd_iconveh (input,
-                                    cd_88592_to_88591,
-                                    cd_88592_to_utf8, cd_utf8_to_88591,
-                                    handler);
-      switch (handler)
+      for (h = 0; h < SIZEOF (handlers); h++)
        {
-       case iconveh_error:
-         ASSERT (result == NULL && errno == EILSEQ);
-         break;
-       case iconveh_question_mark:
-         {
-           static const char expected[] = "Rafa? Maszkowski";
-           ASSERT (result != NULL);
-           ASSERT (strcmp (result, expected) == 0);
-           free (result);
-         }
-         break;
-       case iconveh_escape_sequence:
-         {
-           static const char expected[] = "Rafa\\u0142 Maszkowski";
-           ASSERT (result != NULL);
-           ASSERT (strcmp (result, expected) == 0);
-           free (result);
-         }
-         break;
+         enum iconv_ilseq_handler handler = handlers[h];
+         static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
+         char *result = str_cd_iconveh (input,
+                                        (indirect
+                                         ? &cdeh_88592_to_88591_indirectly
+                                         : &cdeh_88592_to_88591),
+                                        handler);
+         switch (handler)
+           {
+           case iconveh_error:
+             ASSERT (result == NULL && errno == EILSEQ);
+             break;
+           case iconveh_question_mark:
+             {
+               static const char expected[] = "Rafa? Maszkowski";
+               ASSERT (result != NULL);
+               ASSERT (strcmp (result, expected) == 0);
+               free (result);
+             }
+             break;
+           case iconveh_escape_sequence:
+             {
+               static const char expected[] = "Rafa\\u0142 Maszkowski";
+               ASSERT (result != NULL);
+               ASSERT (strcmp (result, expected) == 0);
+               free (result);
+             }
+             break;
+           }
        }
     }
 
@@ -392,8 +512,7 @@ main ()
       static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
       static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
       char *result = str_cd_iconveh (input,
-                                    cd_88591_to_utf8,
-                                    cd_88591_to_utf8, (iconv_t)(-1),
+                                    &cdeh_88591_to_utf8,
                                     handler);
       ASSERT (result != NULL);
       ASSERT (strcmp (result, expected) == 0);
@@ -407,8 +526,7 @@ main ()
       static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
       char *result = str_cd_iconveh (input,
-                                    cd_utf8_to_88591,
-                                    (iconv_t)(-1), cd_utf8_to_88591,
+                                    &cdeh_utf8_to_88591,
                                     handler);
       ASSERT (result != NULL);
       ASSERT (strcmp (result, expected) == 0);
@@ -421,8 +539,7 @@ main ()
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
       char *result = str_cd_iconveh (input,
-                                    cd_utf8_to_88591,
-                                    (iconv_t)(-1), cd_utf8_to_88591,
+                                    &cdeh_utf8_to_88591,
                                     handler);
       switch (handler)
        {
@@ -454,8 +571,7 @@ main ()
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\342";
       char *result = str_cd_iconveh (input,
-                                    cd_utf8_to_88591,
-                                    (iconv_t)(-1), cd_utf8_to_88591,
+                                    &cdeh_utf8_to_88591,
                                     handler);
       ASSERT (result != NULL);
       ASSERT (strcmp (result, "") == 0);