lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2007 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "striconveh.h"
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #if HAVE_ICONV
  30 # include <iconv.h>
  31 # include "utf8-ucs4-safe.h"
  32 # include "ucs4-utf8.h"
  33 # include "unistr.h"
  34 #endif
  35
  36 #include "strdup.h"
  37 #include "c-strcase.h"
  38
  39 #ifndef SIZE_MAX
  40 # define SIZE_MAX ((size_t) -1)
  41 #endif
  42
  43
  44 #if HAVE_ICONV
  45
  46 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
  47    error occurs, we may have to determine the Unicode representation of the
  48    inconvertible character.  */
  49
  50 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
  51    a conversion error, and it returns in *INCREMENTED a boolean telling whether
  52    it has incremented the input pointers past the error location.  */
  53 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
  54 /* Irix iconv() inserts a NUL byte if it cannot convert.
  55    NetBSD iconv() inserts a question mark if it cannot convert.
  56    Only GNU libiconv and GNU libc are known to prefer to fail rather
  57    than doing a lossy conversion.  */
  58 static size_t
  59 iconv_carefully (iconv_t cd,
  60                  const char **inbuf, size_t *inbytesleft,
  61                  char **outbuf, size_t *outbytesleft,
  62                  bool *incremented)
  63 {
  64   const char *inptr = *inbuf;
  65   const char *inptr_end = inptr + *inbytesleft;
  66   char *outptr = *outbuf;
  67   size_t outsize = *outbytesleft;
  68   const char *inptr_before;
  69   size_t res;
  70
  71   do
  72     {
  73       size_t insize;
  74
  75       inptr_before = inptr;
  76       res = (size_t)(-1);
  77
  78       for (insize = 1; inptr + insize <= inptr_end; insize++)
  79         {
  80           res = iconv (cd,
  81                        (ICONV_CONST char **) &inptr, &insize,
  82                        &outptr, &outsize);
  83           if (!(res == (size_t)(-1) && errno == EINVAL))
  84             break;
  85           /* We expect that no input bytes have been consumed so far.  */
  86           if (inptr != inptr_before)
  87             abort ();
  88         }
  89
  90       if (res == 0)
  91         {
  92           *outbuf = outptr;
  93           *outbytesleft = outsize;
  94         }
  95     }
  96   while (res == 0 && inptr < inptr_end);
  97
  98   *inbuf = inptr;
  99   *inbytesleft = inptr_end - inptr;
 100   if (res != (size_t)(-1) && res > 0)
 101     {
 102       /* iconv() has already incremented INPTR.  We cannot go back to a
 103          previous INPTR, otherwise the state inside CD would become invalid,
 104          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 105          *INBUF has already been incremented.  */
 106       *incremented = (inptr > inptr_before);
 107       errno = EILSEQ;
 108       return (size_t)(-1);
 109     }
 110   else
 111     {
 112       *incremented = false;
 113       return res;
 114     }
 115 }
 116 # else
 117 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 118      (*(incremented) = false, \
 119       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 120 # endif
 121
 122 static int
 123 mem_cd_iconveh_internal (const char *src, size_t srclen,
 124                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 125                          enum iconv_ilseq_handler handler,
 126                          size_t extra_alloc,
 127                          char **resultp, size_t *lengthp)
 128 {
 129   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 130      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 131      Instead, we have to start afresh from the beginning of SRC.  */
 132   /* Use a temporary buffer, so that for small strings, a single malloc()
 133      call will be sufficient.  */
 134 # define tmpbufsize 4096
 135   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 136      libiconv's UCS-4-INTERNAL encoding.  */
 137   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 138 # define tmpbuf tmp.buf
 139
 140   char *initial_result;
 141   char *result;
 142   size_t allocated;
 143   size_t length;
 144
 145   if (*lengthp >= sizeof (tmpbuf))
 146     {
 147       initial_result = *resultp;
 148       allocated = *lengthp;
 149     }
 150   else
 151     {
 152       initial_result = tmpbuf;
 153       allocated = sizeof (tmpbuf);
 154     }
 155   result = initial_result;
 156   length = 0;
 157
 158   /* First, try a direct conversion, and see whether a conversion error
 159      occurs at all.  */
 160   {
 161     const char *inptr = src;
 162     size_t insize = srclen;
 163
 164     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 165 # if defined _LIBICONV_VERSION \
 166      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 167     /* Set to the initial state.  */
 168     iconv (cd, NULL, NULL, NULL, NULL);
 169 # endif
 170
 171     while (insize > 0)
 172       {
 173         char *outptr = result + length;
 174         size_t outsize = allocated - extra_alloc - length;
 175         bool incremented;
 176         size_t res;
 177         bool grow;
 178
 179         /* Use iconv_carefully instead of iconv here, because:
 180            - If TO_CODESET is UTF-8, we can do the error handling in this loop,
 181              no need for a second loop,
 182            - With iconv() implementations other than GNU libiconv and GNU libc,
 183              if we use iconv() in a big swoop, checking for an E2BIG return,
 184              we lose the number of irreversible conversions.  */
 185         res = iconv_carefully (cd,
 186                                &inptr, &insize,
 187                                &outptr, &outsize,
 188                                &incremented);
 189
 190         length = outptr - result;
 191         grow = (length + extra_alloc > allocated / 2);
 192         if (res == (size_t)(-1))
 193           {
 194             if (errno == E2BIG)
 195               grow = true;
 196             else if (errno == EINVAL)
 197               break;
 198             else if (errno == EILSEQ && handler != iconveh_error)
 199               {
 200                 if (cd2 == (iconv_t)(-1))
 201                   {
 202                     /* TO_CODESET is UTF-8.  */
 203                     /* Error handling can produce up to 1 byte of output.  */
 204                     if (length + 1 + extra_alloc > allocated)
 205                       {
 206                         char *memory;
 207
 208                         allocated = 2 * allocated;
 209                         if (length + 1 + extra_alloc > allocated)
 210                           abort ();
 211                         if (result == initial_result)
 212                           memory = (char *) malloc (allocated);
 213                         else
 214                           memory = (char *) realloc (result, allocated);
 215                         if (memory == NULL)
 216                           {
 217                             if (result != initial_result)
 218                               free (result);
 219                             errno = ENOMEM;
 220                             return -1;
 221                           }
 222                         if (result == initial_result)
 223                           memcpy (memory, initial_result, length);
 224                         result = memory;
 225                         grow = false;
 226                       }
 227                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 228                        and emit a question mark.  */
 229                     if (!incremented)
 230                       {
 231                         if (insize == 0)
 232                           abort ();
 233                         inptr++;
 234                         insize--;
 235                       }
 236                     result[length] = '?';
 237                     length++;
 238                   }
 239                 else
 240                   goto indirectly;
 241               }
 242             else
 243               {
 244                 if (result != initial_result)
 245                   {
 246                     int saved_errno = errno;
 247                     free (result);
 248                     errno = saved_errno;
 249                   }
 250                 return -1;
 251               }
 252           }
 253         if (insize == 0)
 254           break;
 255         if (grow)
 256           {
 257             char *memory;
 258
 259             allocated = 2 * allocated;
 260             if (result == initial_result)
 261               memory = (char *) malloc (allocated);
 262             else
 263               memory = (char *) realloc (result, allocated);
 264             if (memory == NULL)
 265               {
 266                 if (result != initial_result)
 267                   free (result);
 268                 errno = ENOMEM;
 269                 return -1;
 270               }
 271             if (result == initial_result)
 272               memcpy (memory, initial_result, length);
 273             result = memory;
 274           }
 275       }
 276   }
 277
 278   /* Now get the conversion state back to the initial state.
 279      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 280 #if defined _LIBICONV_VERSION \
 281     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 282   for (;;)
 283     {
 284       char *outptr = result + length;
 285       size_t outsize = allocated - extra_alloc - length;
 286       size_t res;
 287
 288       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 289       length = outptr - result;
 290       if (res == (size_t)(-1))
 291         {
 292           if (errno == E2BIG)
 293             {
 294               char *memory;
 295
 296               allocated = 2 * allocated;
 297               if (result == initial_result)
 298                 memory = (char *) malloc (allocated);
 299               else
 300                 memory = (char *) realloc (result, allocated);
 301               if (memory == NULL)
 302                 {
 303                   if (result != initial_result)
 304                     free (result);
 305                   errno = ENOMEM;
 306                   return -1;
 307                 }
 308               if (result == initial_result)
 309                 memcpy (memory, initial_result, length);
 310               result = memory;
 311             }
 312           else
 313             {
 314               if (result != initial_result)
 315                 {
 316                   int saved_errno = errno;
 317                   free (result);
 318                   errno = saved_errno;
 319                 }
 320               return -1;
 321             }
 322         }
 323       else
 324         break;
 325     }
 326 #endif
 327
 328   /* The direct conversion succeeded.  */
 329   goto done;
 330
 331  indirectly:
 332   /* The direct conversion failed, handler != iconveh_error,
 333      and cd2 != (iconv_t)(-1).
 334      Use a conversion through UTF-8.  */
 335   length = 0;
 336   {
 337 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 338     char utf8buf[utf8bufsize + 1];
 339     size_t utf8len = 0;
 340     const char *in1ptr = src;
 341     size_t in1size = srclen;
 342     bool do_final_flush1 = true;
 343     bool do_final_flush2 = true;
 344
 345     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 346 # if defined _LIBICONV_VERSION \
 347      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 348     /* Set to the initial state.  */
 349     if (cd1 != (iconv_t)(-1))
 350       iconv (cd1, NULL, NULL, NULL, NULL);
 351     iconv (cd2, NULL, NULL, NULL, NULL);
 352 # endif
 353
 354     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 355       {
 356         char *out1ptr = utf8buf + utf8len;
 357         size_t out1size = utf8bufsize - utf8len;
 358         bool incremented1;
 359         size_t res1;
 360         int errno1;
 361
 362         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 363         if (in1size > 0)
 364           {
 365             if (cd1 != (iconv_t)(-1))
 366               res1 = iconv_carefully (cd1,
 367                                       (ICONV_CONST char **) &in1ptr, &in1size,
 368                                       &out1ptr, &out1size,
 369                                       &incremented1);
 370             else
 371               {
 372                 /* FROM_CODESET is UTF-8.  */
 373                 res1 = 0;
 374                 do
 375                   {
 376                     ucs4_t uc;
 377                     int n;
 378                     int m;
 379
 380                     n = u8_mbtouc_safe (&uc, (const uint8_t *) in1ptr, in1size);
 381                     if (uc == 0xfffd
 382                         && !(n >= 3
 383                              && (uint8_t)in1ptr[0] == 0xEF
 384                              && (uint8_t)in1ptr[1] == 0xBF
 385                              && (uint8_t)in1ptr[2] == 0xBD))
 386                       {
 387                         in1ptr += n;
 388                         in1size -= n;
 389                         errno = EILSEQ;
 390                         res1 = (size_t)(-1);
 391                         incremented1 = true;
 392                         break;
 393                       }
 394                     if (out1size == 0)
 395                       {
 396                         errno = E2BIG;
 397                         res1 = (size_t)(-1);
 398                         incremented1 = false;
 399                         break;
 400                       }
 401                     m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size);
 402                     if (m == -2)
 403                       {
 404                         errno = E2BIG;
 405                         res1 = (size_t)(-1);
 406                         incremented1 = false;
 407                         break;
 408                       }
 409                     in1ptr += n;
 410                     in1size -= n;
 411                     if (m == -1)
 412                       {
 413                         errno = EILSEQ;
 414                         res1 = (size_t)(-1);
 415                         incremented1 = true;
 416                         break;
 417                       }
 418                     out1ptr += m;
 419                     out1size -= m;
 420                   }
 421                 while (in1size > 0);
 422               }
 423           }
 424         else if (do_final_flush1)
 425           {
 426             /* Now get the conversion state of CD1 back to the initial state.
 427                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 428 # if defined _LIBICONV_VERSION \
 429      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 430             if (cd1 != (iconv_t)(-1))
 431               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 432             else
 433 # endif
 434               res1 = 0;
 435             do_final_flush1 = false;
 436             incremented1 = true;
 437           }
 438         else
 439           {
 440             res1 = 0;
 441             incremented1 = true;
 442           }
 443         if (res1 == (size_t)(-1)
 444             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 445           {
 446             if (result != initial_result)
 447               {
 448                 int saved_errno = errno;
 449                 free (result);
 450                 errno = saved_errno;
 451               }
 452             return -1;
 453           }
 454         if (res1 == (size_t)(-1)
 455             && errno == EILSEQ && handler != iconveh_error)
 456           {
 457             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 458                emit a question mark.  Room for the question mark was allocated
 459                at the end of utf8buf.  */
 460             if (!incremented1)
 461               {
 462                 if (in1size == 0)
 463                   abort ();
 464                 in1ptr++;
 465                 in1size--;
 466               }
 467             utf8buf[utf8len++] = '?';
 468           }
 469         errno1 = errno;
 470         utf8len = out1ptr - utf8buf;
 471
 472         if (in1size == 0
 473             || utf8len > utf8bufsize / 2
 474             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 475           {
 476             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 477             const char *in2ptr = utf8buf;
 478             size_t in2size = utf8len;
 479
 480             while (in2size > 0
 481                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 482               {
 483                 char *out2ptr = result + length;
 484                 size_t out2size = allocated - extra_alloc - length;
 485                 bool incremented2;
 486                 size_t res2;
 487                 bool grow;
 488
 489                 if (in2size > 0)
 490                   res2 = iconv_carefully (cd2,
 491                                           &in2ptr, &in2size,
 492                                           &out2ptr, &out2size,
 493                                           &incremented2);
 494                 else /* in1size == 0 && !do_final_flush1
 495                         && in2size == 0 && do_final_flush2 */
 496                   {
 497                     /* Now get the conversion state of CD1 back to the initial
 498                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 499 # if defined _LIBICONV_VERSION \
 500      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 501                     res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 502 # else
 503                     res2 = 0;
 504 # endif
 505                     do_final_flush2 = false;
 506                     incremented2 = true;
 507                   }
 508
 509                 length = out2ptr - result;
 510                 grow = (length + extra_alloc > allocated / 2);
 511                 if (res2 == (size_t)(-1))
 512                   {
 513                     if (errno == E2BIG)
 514                       grow = true;
 515                     else if (errno == EINVAL)
 516                       break;
 517                     else if (errno == EILSEQ && handler != iconveh_error)
 518                       {
 519                         /* Error handling can produce up to 10 bytes of ASCII
 520                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 521                            UCS-4, so use CD2 here as well.  */
 522                         char scratchbuf[10];
 523                         size_t scratchlen;
 524                         ucs4_t uc;
 525                         const char *inptr;
 526                         size_t insize;
 527                         size_t res;
 528
 529                         if (incremented2)
 530                           {
 531                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 532                                          (const uint8_t *) utf8buf)
 533                                 == NULL)
 534                               abort ();
 535                           }
 536                         else
 537                           {
 538                             int n;
 539                             if (in2size == 0)
 540                               abort ();
 541                             n = u8_mbtouc (&uc, (const uint8_t *) in2ptr,
 542                                            in2size);
 543                             in2ptr += n;
 544                             in2size -= n;
 545                           }
 546
 547                         if (handler == iconveh_escape_sequence)
 548                           {
 549                             static char hex[16] = "0123456789ABCDEF";
 550                             scratchlen = 0;
 551                             scratchbuf[scratchlen++] = '\\';
 552                             if (uc < 0x10000)
 553                               scratchbuf[scratchlen++] = 'u';
 554                             else
 555                               {
 556                                 scratchbuf[scratchlen++] = 'U';
 557                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 558                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 559                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 560                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 561                               }
 562                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 563                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 564                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 565                             scratchbuf[scratchlen++] = hex[uc & 15];
 566                           }
 567                         else
 568                           {
 569                             scratchbuf[0] = '?';
 570                             scratchlen = 1;
 571                           }
 572
 573                         inptr = scratchbuf;
 574                         insize = scratchlen;
 575                         res = iconv (cd2,
 576                                      (ICONV_CONST char **) &inptr, &insize,
 577                                      &out2ptr, &out2size);
 578                         length = out2ptr - result;
 579                         if (res == (size_t)(-1) && errno == E2BIG)
 580                           {
 581                             char *memory;
 582
 583                             allocated = 2 * allocated;
 584                             if (length + 1 + extra_alloc > allocated)
 585                               abort ();
 586                             if (result == initial_result)
 587                               memory = (char *) malloc (allocated);
 588                             else
 589                               memory = (char *) realloc (result, allocated);
 590                             if (memory == NULL)
 591                               {
 592                                 if (result != initial_result)
 593                                   free (result);
 594                                 errno = ENOMEM;
 595                                 return -1;
 596                               }
 597                             if (result == initial_result)
 598                               memcpy (memory, initial_result, length);
 599                             result = memory;
 600                             grow = false;
 601
 602                             out2ptr = result + length;
 603                             out2size = allocated - extra_alloc - length;
 604                             res = iconv (cd2,
 605                                          (ICONV_CONST char **) &inptr, &insize,
 606                                          &out2ptr, &out2size);
 607                             length = out2ptr - result;
 608                           }
 609 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 610                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 611                            NetBSD iconv() inserts a question mark if it cannot
 612                            convert.
 613                            Only GNU libiconv and GNU libc are known to prefer
 614                            to fail rather than doing a lossy conversion.  */
 615                         if (res != (size_t)(-1) && res > 0)
 616                           {
 617                             errno = EILSEQ;
 618                             res = (size_t)(-1);
 619                           }
 620 # endif
 621                         if (res == (size_t)(-1))
 622                           {
 623                             /* Failure converting the ASCII replacement.  */
 624                             if (result != initial_result)
 625                               {
 626                                 int saved_errno = errno;
 627                                 free (result);
 628                                 errno = saved_errno;
 629                               }
 630                             return -1;
 631                           }
 632                       }
 633                     else
 634                       {
 635                         if (result != initial_result)
 636                           {
 637                             int saved_errno = errno;
 638                             free (result);
 639                             errno = saved_errno;
 640                           }
 641                         return -1;
 642                       }
 643                   }
 644                 if (!(in2size > 0
 645                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 646                   break;
 647                 if (grow)
 648                   {
 649                     char *memory;
 650
 651                     allocated = 2 * allocated;
 652                     if (result == initial_result)
 653                       memory = (char *) malloc (allocated);
 654                     else
 655                       memory = (char *) realloc (result, allocated);
 656                     if (memory == NULL)
 657                       {
 658                         if (result != initial_result)
 659                           free (result);
 660                         errno = ENOMEM;
 661                         return -1;
 662                       }
 663                     if (result == initial_result)
 664                       memcpy (memory, initial_result, length);
 665                     result = memory;
 666                   }
 667               }
 668
 669             /* Move the remaining bytes to the beginning of utf8buf.  */
 670             if (in2size > 0)
 671               memmove (utf8buf, in2ptr, in2size);
 672             utf8len = in2size;
 673           }
 674
 675         if (res1 == (size_t)(-1))
 676           {
 677             if (errno1 == EINVAL)
 678               in1size = 0;
 679             else if (errno1 == EILSEQ)
 680               {
 681                 if (result != initial_result)
 682                   free (result);
 683                 errno = errno1;
 684                 return -1;
 685               }
 686           }
 687       }
 688 # undef utf8bufsize
 689   }
 690
 691  done:
 692   /* Now the final memory allocation.  */
 693   if (result == tmpbuf)
 694     {
 695       char *memory;
 696
 697       memory = (char *) malloc (length + extra_alloc);
 698       if (memory != NULL)
 699         {
 700           memcpy (memory, tmpbuf, length);
 701           result = memory;
 702         }
 703       else
 704         {
 705           errno = ENOMEM;
 706           return -1;
 707         }
 708     }
 709   else if (result != *resultp && length + extra_alloc < allocated)
 710     {
 711       /* Shrink the allocated memory if possible.  */
 712       char *memory;
 713
 714       memory = (char *) realloc (result, length + extra_alloc);
 715       if (memory != NULL)
 716         result = memory;
 717     }
 718   *resultp = result;
 719   *lengthp = length;
 720   return 0;
 721 # undef tmpbuf
 722 # undef tmpbufsize
 723 }
 724
 725 int
 726 mem_cd_iconveh (const char *src, size_t srclen,
 727                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 728                 enum iconv_ilseq_handler handler,
 729                 char **resultp, size_t *lengthp)
 730 {
 731   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
 732                                   resultp, lengthp);
 733 }
 734
 735 char *
 736 str_cd_iconveh (const char *src,
 737                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 738                 enum iconv_ilseq_handler handler)
 739 {
 740   /* For most encodings, a trailing NUL byte in the input will be converted
 741      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
 742      function is usable for UTF-7, we have to exclude the NUL byte from the
 743      conversion and add it by hand afterwards.  */
 744   char *result = NULL;
 745   size_t length = 0;
 746   int retval = mem_cd_iconveh_internal (src, strlen (src),
 747                                         cd, cd1, cd2, handler, 1,
 748                                         &result, &length);
 749
 750   if (retval < 0)
 751     {
 752       if (result != NULL)
 753         {
 754           int saved_errno = errno;
 755           free (result);
 756           errno = saved_errno;
 757         }
 758       return NULL;
 759     }
 760
 761   /* Add the terminating NUL byte.  */
 762   result[length] = '\0';
 763
 764   return result;
 765 }
 766
 767 #endif
 768
 769 char *
 770 str_iconveh (const char *src,
 771              const char *from_codeset, const char *to_codeset,
 772              enum iconv_ilseq_handler handler)
 773 {
 774   if (c_strcasecmp (from_codeset, to_codeset) == 0)
 775     return strdup (src);
 776   else
 777     {
 778 #if HAVE_ICONV
 779       iconv_t cd;
 780       iconv_t cd1;
 781       iconv_t cd2;
 782       char *result;
 783
 784       /* Avoid glibc-2.1 bug with EUC-KR.  */
 785 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 786       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
 787           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
 788         {
 789           errno = EINVAL;
 790           return NULL;
 791         }
 792 # endif
 793
 794       cd = iconv_open (to_codeset, from_codeset);
 795       if (cd == (iconv_t)(-1))
 796         return NULL;
 797
 798       if (c_strcasecmp (from_codeset, "UTF-8") == 0)
 799         cd1 = (iconv_t)(-1);
 800       else
 801         {
 802           cd1 = iconv_open ("UTF-8", from_codeset);
 803           if (cd1 == (iconv_t)(-1))
 804             {
 805               int saved_errno = errno;
 806               iconv_close (cd);
 807               errno = saved_errno;
 808               return NULL;
 809             }
 810         }
 811
 812       if (c_strcasecmp (to_codeset, "UTF-8") == 0)
 813         cd2 = (iconv_t)(-1);
 814       else
 815         {
 816           cd2 = iconv_open (to_codeset, "UTF-8");
 817           if (cd2 == (iconv_t)(-1))
 818             {
 819               int saved_errno = errno;
 820               if (cd1 != (iconv_t)(-1))
 821                 iconv_close (cd1);
 822               iconv_close (cd);
 823               errno = saved_errno;
 824               return NULL;
 825             }
 826         }
 827
 828       result = str_cd_iconveh (src, cd, cd1, cd2, handler);
 829
 830       if (result == NULL)
 831         {
 832           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
 833           int saved_errno = errno;
 834           if (cd2 != (iconv_t)(-1))
 835             iconv_close (cd2);
 836           if (cd1 != (iconv_t)(-1))
 837             iconv_close (cd1);
 838           iconv_close (cd);
 839           errno = saved_errno;
 840         }
 841       else
 842         {
 843           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
 844             {
 845               /* Return NULL, but free the allocated memory, and while doing
 846                  that, preserve the errno from iconv_close.  */
 847               int saved_errno = errno;
 848               if (cd1 != (iconv_t)(-1))
 849                 iconv_close (cd1);
 850               iconv_close (cd);
 851               free (result);
 852               errno = saved_errno;
 853               return NULL;
 854             }
 855           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
 856             {
 857               /* Return NULL, but free the allocated memory, and while doing
 858                  that, preserve the errno from iconv_close.  */
 859               int saved_errno = errno;
 860               iconv_close (cd);
 861               free (result);
 862               errno = saved_errno;
 863               return NULL;
 864             }
 865           if (iconv_close (cd) < 0)
 866             {
 867               /* Return NULL, but free the allocated memory, and while doing
 868                  that, preserve the errno from iconv_close.  */
 869               int saved_errno = errno;
 870               free (result);
 871               errno = saved_errno;
 872               return NULL;
 873             }
 874         }
 875       return result;
 876 #else
 877       /* This is a different error code than if iconv_open existed but didn't
 878          support from_codeset and to_codeset, so that the caller can emit
 879          an error message such as
 880            "iconv() is not supported. Installing GNU libiconv and
 881             then reinstalling this package would fix this."  */
 882       errno = ENOSYS;
 883       return NULL;
 884 #endif
 885     }
 886 }