lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2007 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "striconveh.h"
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #if HAVE_ICONV
  30 # include <iconv.h>
  31 # include "utf8-ucs4-safe.h"
  32 # include "ucs4-utf8.h"
  33 # include "unistr.h"
  34 #endif
  35
  36 #include "strdup.h"
  37 #include "c-strcase.h"
  38
  39 #ifndef SIZE_MAX
  40 # define SIZE_MAX ((size_t) -1)
  41 #endif
  42
  43
  44 #if HAVE_ICONV
  45
  46 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
  47    error occurs, we may have to determine the Unicode representation of the
  48    inconvertible character.  */
  49
  50 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
  51    a conversion error, and it returns in *INCREMENTED a boolean telling whether
  52    it has incremented the input pointers past the error location.  */
  53 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
  54 /* Irix iconv() inserts a NUL byte if it cannot convert.
  55    NetBSD iconv() inserts a question mark if it cannot convert.
  56    Only GNU libiconv and GNU libc are known to prefer to fail rather
  57    than doing a lossy conversion.  */
  58 static size_t
  59 iconv_carefully (iconv_t cd,
  60                  const char **inbuf, size_t *inbytesleft,
  61                  char **outbuf, size_t *outbytesleft,
  62                  bool *incremented)
  63 {
  64   const char *inptr = *inbuf;
  65   const char *inptr_end = inptr + *inbytesleft;
  66   char *outptr = *outbuf;
  67   size_t outsize = *outbytesleft;
  68   const char *inptr_before;
  69   size_t res;
  70
  71   do
  72     {
  73       size_t insize;
  74
  75       inptr_before = inptr;
  76       res = (size_t)(-1);
  77
  78       for (insize = 1; inptr + insize <= inptr_end; insize++)
  79         {
  80           res = iconv (cd,
  81                        (ICONV_CONST char **) &inptr, &insize,
  82                        &outptr, &outsize);
  83           if (!(res == (size_t)(-1) && errno == EINVAL))
  84             break;
  85           /* We expect that no input bytes have been consumed so far.  */
  86           if (inptr != inptr_before)
  87             abort ();
  88         }
  89
  90       if (res == 0)
  91         {
  92           *outbuf = outptr;
  93           *outbytesleft = outsize;
  94         }
  95     }
  96   while (res == 0 && inptr < inptr_end);
  97
  98   *inbuf = inptr;
  99   *inbytesleft = inptr_end - inptr;
 100   if (res != (size_t)(-1) && res > 0)
 101     {
 102       /* iconv() has already incremented INPTR.  We cannot go back to a
 103          previous INPTR, otherwise the state inside CD would become invalid,
 104          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 105          *INBUF has already been incremented.  */
 106       *incremented = (inptr > inptr_before);
 107       errno = EILSEQ;
 108       return (size_t)(-1);
 109     }
 110   else
 111     {
 112       *incremented = false;
 113       return res;
 114     }
 115 }
 116 # else
 117 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 118      (*(incremented) = false, \
 119       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 120 # endif
 121
 122 static int
 123 mem_cd_iconveh_internal (const char *src, size_t srclen,
 124                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 125                          enum iconv_ilseq_handler handler,
 126                          size_t extra_alloc,
 127                          char **resultp, size_t *lengthp)
 128 {
 129   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 130      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 131      Instead, we have to start afresh from the beginning of SRC.  */
 132   /* Use a temporary buffer, so that for small strings, a single malloc()
 133      call will be sufficient.  */
 134 # define tmpbufsize 4096
 135   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 136      libiconv's UCS-4-INTERNAL encoding.  */
 137   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 138 # define tmpbuf tmp.buf
 139
 140   char *result = tmpbuf;
 141   size_t allocated = sizeof (tmpbuf);
 142   size_t length = 0;
 143
 144   /* First, try a direct conversion, and see whether a conversion error
 145      occurs at all.  */
 146   {
 147     const char *inptr = src;
 148     size_t insize = srclen;
 149
 150     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 151 # if defined _LIBICONV_VERSION \
 152      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 153     /* Set to the initial state.  */
 154     iconv (cd, NULL, NULL, NULL, NULL);
 155 # endif
 156
 157     while (insize > 0)
 158       {
 159         char *outptr = result + length;
 160         size_t outsize = allocated - extra_alloc - length;
 161         bool incremented;
 162         size_t res;
 163         bool grow;
 164
 165         /* Use iconv_carefully instead of iconv here, because:
 166            - If TO_CODESET is UTF-8, we can do the error handling in this loop,
 167              no need for a second loop,
 168            - With iconv() implementations other than GNU libiconv and GNU libc,
 169              if we use iconv() in a big swoop, checking for an E2BIG return,
 170              we lose the number of irreversible conversions.  */
 171         res = iconv_carefully (cd,
 172                                &inptr, &insize,
 173                                &outptr, &outsize,
 174                                &incremented);
 175
 176         length = outptr - result;
 177         grow = (length + extra_alloc > allocated / 2);
 178         if (res == (size_t)(-1))
 179           {
 180             if (errno == E2BIG)
 181               grow = true;
 182             else if (errno == EINVAL)
 183               break;
 184             else if (errno == EILSEQ && handler != iconveh_error)
 185               {
 186                 if (cd2 == (iconv_t)(-1))
 187                   {
 188                     /* TO_CODESET is UTF-8.  */
 189                     /* Error handling can produce up to 1 byte of output.  */
 190                     if (length + 1 + extra_alloc > allocated)
 191                       {
 192                         char *memory;
 193
 194                         allocated = 2 * allocated;
 195                         if (length + 1 + extra_alloc > allocated)
 196                           abort ();
 197                         if (result == tmpbuf)
 198                           memory = (char *) malloc (allocated);
 199                         else
 200                           memory = (char *) realloc (result, allocated);
 201                         if (memory == NULL)
 202                           {
 203                             if (result != tmpbuf)
 204                               free (result);
 205                             errno = ENOMEM;
 206                             return -1;
 207                           }
 208                         if (result == tmpbuf)
 209                           memcpy (memory, tmpbuf, length);
 210                         result = memory;
 211                         grow = false;
 212                       }
 213                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 214                        and emit a question mark.  */
 215                     if (!incremented)
 216                       {
 217                         if (insize == 0)
 218                           abort ();
 219                         inptr++;
 220                         insize--;
 221                       }
 222                     result[length] = '?';
 223                     length++;
 224                   }
 225                 else
 226                   goto indirectly;
 227               }
 228             else
 229               {
 230                 if (result != tmpbuf)
 231                   {
 232                     int saved_errno = errno;
 233                     free (result);
 234                     errno = saved_errno;
 235                   }
 236                 return -1;
 237               }
 238           }
 239         if (insize == 0)
 240           break;
 241         if (grow)
 242           {
 243             char *memory;
 244
 245             allocated = 2 * allocated;
 246             if (result == tmpbuf)
 247               memory = (char *) malloc (allocated);
 248             else
 249               memory = (char *) realloc (result, allocated);
 250             if (memory == NULL)
 251               {
 252                 if (result != tmpbuf)
 253                   free (result);
 254                 errno = ENOMEM;
 255                 return -1;
 256               }
 257             if (result == tmpbuf)
 258               memcpy (memory, tmpbuf, length);
 259             result = memory;
 260           }
 261       }
 262   }
 263
 264   /* Now get the conversion state back to the initial state.
 265      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 266 #if defined _LIBICONV_VERSION \
 267     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 268   for (;;)
 269     {
 270       char *outptr = result + length;
 271       size_t outsize = allocated - extra_alloc - length;
 272       size_t res;
 273
 274       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 275       length = outptr - result;
 276       if (res == (size_t)(-1))
 277         {
 278           if (errno == E2BIG)
 279             {
 280               char *memory;
 281
 282               allocated = 2 * allocated;
 283               if (result == tmpbuf)
 284                 memory = (char *) malloc (allocated);
 285               else
 286                 memory = (char *) realloc (result, allocated);
 287               if (memory == NULL)
 288                 {
 289                   if (result != tmpbuf)
 290                     free (result);
 291                   errno = ENOMEM;
 292                   return -1;
 293                 }
 294               if (result == tmpbuf)
 295                 memcpy (memory, tmpbuf, length);
 296               result = memory;
 297             }
 298           else
 299             {
 300               if (result != tmpbuf)
 301                 {
 302                   int saved_errno = errno;
 303                   free (result);
 304                   errno = saved_errno;
 305                 }
 306               return -1;
 307             }
 308         }
 309       else
 310         break;
 311     }
 312 #endif
 313
 314   /* The direct conversion succeeded.  */
 315   goto done;
 316
 317  indirectly:
 318   /* The direct conversion failed, handler != iconveh_error,
 319      and cd2 != (iconv_t)(-1).
 320      Use a conversion through UTF-8.  */
 321   length = 0;
 322   {
 323 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 324     char utf8buf[utf8bufsize + 1];
 325     size_t utf8len = 0;
 326     const char *in1ptr = src;
 327     size_t in1size = srclen;
 328     bool do_final_flush1 = true;
 329     bool do_final_flush2 = true;
 330
 331     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 332 # if defined _LIBICONV_VERSION \
 333      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 334     /* Set to the initial state.  */
 335     if (cd1 != (iconv_t)(-1))
 336       iconv (cd1, NULL, NULL, NULL, NULL);
 337     iconv (cd2, NULL, NULL, NULL, NULL);
 338 # endif
 339
 340     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 341       {
 342         char *out1ptr = utf8buf + utf8len;
 343         size_t out1size = utf8bufsize - utf8len;
 344         bool incremented1;
 345         size_t res1;
 346         int errno1;
 347
 348         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 349         if (in1size > 0)
 350           {
 351             if (cd1 != (iconv_t)(-1))
 352               res1 = iconv_carefully (cd1,
 353                                       (ICONV_CONST char **) &in1ptr, &in1size,
 354                                       &out1ptr, &out1size,
 355                                       &incremented1);
 356             else
 357               {
 358                 /* FROM_CODESET is UTF-8.  */
 359                 res1 = 0;
 360                 do
 361                   {
 362                     ucs4_t uc;
 363                     int n;
 364                     int m;
 365
 366                     n = u8_mbtouc_safe (&uc, (const uint8_t *) in1ptr, in1size);
 367                     if (uc == 0xfffd
 368                         && !(n >= 3
 369                              && (uint8_t)in1ptr[0] == 0xEF
 370                              && (uint8_t)in1ptr[1] == 0xBF
 371                              && (uint8_t)in1ptr[2] == 0xBD))
 372                       {
 373                         in1ptr += n;
 374                         in1size -= n;
 375                         errno = EILSEQ;
 376                         res1 = (size_t)(-1);
 377                         incremented1 = true;
 378                         break;
 379                       }
 380                     if (out1size == 0)
 381                       {
 382                         errno = E2BIG;
 383                         res1 = (size_t)(-1);
 384                         incremented1 = false;
 385                         break;
 386                       }
 387                     m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size);
 388                     if (m == -2)
 389                       {
 390                         errno = E2BIG;
 391                         res1 = (size_t)(-1);
 392                         incremented1 = false;
 393                         break;
 394                       }
 395                     in1ptr += n;
 396                     in1size -= n;
 397                     if (m == -1)
 398                       {
 399                         errno = EILSEQ;
 400                         res1 = (size_t)(-1);
 401                         incremented1 = true;
 402                         break;
 403                       }
 404                     out1ptr += m;
 405                     out1size -= m;
 406                   }
 407                 while (in1size > 0);
 408               }
 409           }
 410         else if (do_final_flush1)
 411           {
 412             /* Now get the conversion state of CD1 back to the initial state.
 413                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 414 # if defined _LIBICONV_VERSION \
 415      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 416             if (cd1 != (iconv_t)(-1))
 417               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 418             else
 419 # endif
 420               res1 = 0;
 421             do_final_flush1 = false;
 422             incremented1 = true;
 423           }
 424         else
 425           {
 426             res1 = 0;
 427             incremented1 = true;
 428           }
 429         if (res1 == (size_t)(-1)
 430             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 431           {
 432             if (result != tmpbuf)
 433               {
 434                 int saved_errno = errno;
 435                 free (result);
 436                 errno = saved_errno;
 437               }
 438             return -1;
 439           }
 440         if (res1 == (size_t)(-1)
 441             && errno == EILSEQ && handler != iconveh_error)
 442           {
 443             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 444                emit a question mark.  Room for the question mark was allocated
 445                at the end of utf8buf.  */
 446             if (!incremented1)
 447               {
 448                 if (in1size == 0)
 449                   abort ();
 450                 in1ptr++;
 451                 in1size--;
 452               }
 453             utf8buf[utf8len++] = '?';
 454           }
 455         errno1 = errno;
 456         utf8len = out1ptr - utf8buf;
 457
 458         if (in1size == 0
 459             || utf8len > utf8bufsize / 2
 460             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 461           {
 462             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 463             const char *in2ptr = utf8buf;
 464             size_t in2size = utf8len;
 465
 466             while (in2size > 0
 467                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 468               {
 469                 char *out2ptr = result + length;
 470                 size_t out2size = allocated - extra_alloc - length;
 471                 bool incremented2;
 472                 size_t res2;
 473                 bool grow;
 474
 475                 if (in2size > 0)
 476                   res2 = iconv_carefully (cd2,
 477                                           &in2ptr, &in2size,
 478                                           &out2ptr, &out2size,
 479                                           &incremented2);
 480                 else /* in1size == 0 && !do_final_flush1
 481                         && in2size == 0 && do_final_flush2 */
 482                   {
 483                     /* Now get the conversion state of CD1 back to the initial
 484                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 485 # if defined _LIBICONV_VERSION \
 486      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 487                     res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 488 # else
 489                     res2 = 0;
 490 # endif
 491                     do_final_flush2 = false;
 492                     incremented2 = true;
 493                   }
 494
 495                 length = out2ptr - result;
 496                 grow = (length + extra_alloc > allocated / 2);
 497                 if (res2 == (size_t)(-1))
 498                   {
 499                     if (errno == E2BIG)
 500                       grow = true;
 501                     else if (errno == EINVAL)
 502                       break;
 503                     else if (errno == EILSEQ && handler != iconveh_error)
 504                       {
 505                         /* Error handling can produce up to 10 bytes of ASCII
 506                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 507                            UCS-4, so use CD2 here as well.  */
 508                         char scratchbuf[10];
 509                         size_t scratchlen;
 510                         ucs4_t uc;
 511                         const char *inptr;
 512                         size_t insize;
 513                         size_t res;
 514
 515                         if (incremented2)
 516                           {
 517                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 518                                          (const uint8_t *) utf8buf)
 519                                 == NULL)
 520                               abort ();
 521                           }
 522                         else
 523                           {
 524                             int n;
 525                             if (in2size == 0)
 526                               abort ();
 527                             n = u8_mbtouc (&uc, (const uint8_t *) in2ptr,
 528                                            in2size);
 529                             in2ptr += n;
 530                             in2size -= n;
 531                           }
 532
 533                         if (handler == iconveh_escape_sequence)
 534                           {
 535                             static char hex[16] = "0123456789ABCDEF";
 536                             scratchlen = 0;
 537                             scratchbuf[scratchlen++] = '\\';
 538                             if (uc < 0x10000)
 539                               scratchbuf[scratchlen++] = 'u';
 540                             else
 541                               {
 542                                 scratchbuf[scratchlen++] = 'U';
 543                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 544                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 545                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 546                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 547                               }
 548                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 549                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 550                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 551                             scratchbuf[scratchlen++] = hex[uc & 15];
 552                           }
 553                         else
 554                           {
 555                             scratchbuf[0] = '?';
 556                             scratchlen = 1;
 557                           }
 558
 559                         inptr = scratchbuf;
 560                         insize = scratchlen;
 561                         res = iconv (cd2,
 562                                      (ICONV_CONST char **) &inptr, &insize,
 563                                      &out2ptr, &out2size);
 564                         length = out2ptr - result;
 565                         if (res == (size_t)(-1) && errno == E2BIG)
 566                           {
 567                             char *memory;
 568
 569                             allocated = 2 * allocated;
 570                             if (length + 1 + extra_alloc > allocated)
 571                               abort ();
 572                             if (result == tmpbuf)
 573                               memory = (char *) malloc (allocated);
 574                             else
 575                               memory = (char *) realloc (result, allocated);
 576                             if (memory == NULL)
 577                               {
 578                                 if (result != tmpbuf)
 579                                   free (result);
 580                                 errno = ENOMEM;
 581                                 return -1;
 582                               }
 583                             if (result == tmpbuf)
 584                               memcpy (memory, tmpbuf, length);
 585                             result = memory;
 586                             grow = false;
 587
 588                             out2ptr = result + length;
 589                             out2size = allocated - extra_alloc - length;
 590                             res = iconv (cd2,
 591                                          (ICONV_CONST char **) &inptr, &insize,
 592                                          &out2ptr, &out2size);
 593                             length = out2ptr - result;
 594                           }
 595 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 596                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 597                            NetBSD iconv() inserts a question mark if it cannot
 598                            convert.
 599                            Only GNU libiconv and GNU libc are known to prefer
 600                            to fail rather than doing a lossy conversion.  */
 601                         if (res != (size_t)(-1) && res > 0)
 602                           {
 603                             errno = EILSEQ;
 604                             res = (size_t)(-1);
 605                           }
 606 # endif
 607                         if (res == (size_t)(-1))
 608                           {
 609                             /* Failure converting the ASCII replacement.  */
 610                             if (result != tmpbuf)
 611                               {
 612                                 int saved_errno = errno;
 613                                 free (result);
 614                                 errno = saved_errno;
 615                               }
 616                             return -1;
 617                           }
 618                       }
 619                     else
 620                       {
 621                         if (result != tmpbuf)
 622                           {
 623                             int saved_errno = errno;
 624                             free (result);
 625                             errno = saved_errno;
 626                           }
 627                         return -1;
 628                       }
 629                   }
 630                 if (!(in2size > 0
 631                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 632                   break;
 633                 if (grow)
 634                   {
 635                     char *memory;
 636
 637                     allocated = 2 * allocated;
 638                     if (result == tmpbuf)
 639                       memory = (char *) malloc (allocated);
 640                     else
 641                       memory = (char *) realloc (result, allocated);
 642                     if (memory == NULL)
 643                       {
 644                         if (result != tmpbuf)
 645                           free (result);
 646                         errno = ENOMEM;
 647                         return -1;
 648                       }
 649                     if (result == tmpbuf)
 650                       memcpy (memory, tmpbuf, length);
 651                     result = memory;
 652                   }
 653               }
 654
 655             /* Move the remaining bytes to the beginning of utf8buf.  */
 656             if (in2size > 0)
 657               memmove (utf8buf, in2ptr, in2size);
 658             utf8len = in2size;
 659           }
 660
 661         if (res1 == (size_t)(-1))
 662           {
 663             if (errno1 == EINVAL)
 664               in1size = 0;
 665             else if (errno1 == EILSEQ)
 666               {
 667                 if (result != tmpbuf)
 668                   free (result);
 669                 errno = errno1;
 670                 return -1;
 671               }
 672           }
 673       }
 674 # undef utf8bufsize
 675   }
 676
 677  done:
 678   /* Now the final memory allocation.  */
 679   if (resultp != NULL)
 680     {
 681       if (result == tmpbuf)
 682         {
 683           char *memory;
 684
 685           memory = (char *) malloc (length + extra_alloc);
 686           if (memory != NULL)
 687             {
 688               memcpy (memory, tmpbuf, length);
 689               result = memory;
 690             }
 691           else
 692             {
 693               errno = ENOMEM;
 694               return -1;
 695             }
 696         }
 697       else if (length + extra_alloc < allocated)
 698         {
 699           /* Shrink the allocated memory if possible.  */
 700           char *memory;
 701
 702           memory = (char *) realloc (result, length + extra_alloc);
 703           if (memory != NULL)
 704             result = memory;
 705         }
 706       *resultp = result;
 707     }
 708   else
 709     {
 710       if (result != tmpbuf)
 711         free (result);
 712     }
 713   if (lengthp != NULL)
 714     *lengthp = length;
 715   return 0;
 716 # undef tmpbuf
 717 # undef tmpbufsize
 718 }
 719
 720 int
 721 mem_cd_iconveh (const char *src, size_t srclen,
 722                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 723                 enum iconv_ilseq_handler handler,
 724                 char **resultp, size_t *lengthp)
 725 {
 726   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
 727                                   resultp, lengthp);
 728 }
 729
 730 char *
 731 str_cd_iconveh (const char *src,
 732                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 733                 enum iconv_ilseq_handler handler)
 734 {
 735   /* For most encodings, a trailing NUL byte in the input will be converted
 736      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
 737      function is usable for UTF-7, we have to exclude the NUL byte from the
 738      conversion and add it by hand afterwards.  */
 739   char *result = NULL;
 740   size_t length;
 741   int retval = mem_cd_iconveh_internal (src, strlen (src),
 742                                         cd, cd1, cd2, handler, 1,
 743                                         &result, &length);
 744
 745   if (retval < 0)
 746     {
 747       if (result != NULL)
 748         {
 749           int saved_errno = errno;
 750           free (result);
 751           errno = saved_errno;
 752         }
 753       return NULL;
 754     }
 755
 756   /* Add the terminating NUL byte.  */
 757   result[length] = '\0';
 758
 759   return result;
 760 }
 761
 762 #endif
 763
 764 char *
 765 str_iconveh (const char *src,
 766              const char *from_codeset, const char *to_codeset,
 767              enum iconv_ilseq_handler handler)
 768 {
 769   if (c_strcasecmp (from_codeset, to_codeset) == 0)
 770     return strdup (src);
 771   else
 772     {
 773 #if HAVE_ICONV
 774       iconv_t cd;
 775       iconv_t cd1;
 776       iconv_t cd2;
 777       char *result;
 778
 779       /* Avoid glibc-2.1 bug with EUC-KR.  */
 780 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 781       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
 782           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
 783         {
 784           errno = EINVAL;
 785           return NULL;
 786         }
 787 # endif
 788
 789       cd = iconv_open (to_codeset, from_codeset);
 790       if (cd == (iconv_t)(-1))
 791         return NULL;
 792
 793       if (c_strcasecmp (from_codeset, "UTF-8") == 0)
 794         cd1 = (iconv_t)(-1);
 795       else
 796         {
 797           cd1 = iconv_open ("UTF-8", from_codeset);
 798           if (cd1 == (iconv_t)(-1))
 799             {
 800               int saved_errno = errno;
 801               iconv_close (cd);
 802               errno = saved_errno;
 803               return NULL;
 804             }
 805         }
 806
 807       if (c_strcasecmp (to_codeset, "UTF-8") == 0)
 808         cd2 = (iconv_t)(-1);
 809       else
 810         {
 811           cd2 = iconv_open (to_codeset, "UTF-8");
 812           if (cd2 == (iconv_t)(-1))
 813             {
 814               int saved_errno = errno;
 815               if (cd1 != (iconv_t)(-1))
 816                 iconv_close (cd1);
 817               iconv_close (cd);
 818               errno = saved_errno;
 819               return NULL;
 820             }
 821         }
 822
 823       result = str_cd_iconveh (src, cd, cd1, cd2, handler);
 824
 825       if (result == NULL)
 826         {
 827           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
 828           int saved_errno = errno;
 829           if (cd2 != (iconv_t)(-1))
 830             iconv_close (cd2);
 831           if (cd1 != (iconv_t)(-1))
 832             iconv_close (cd1);
 833           iconv_close (cd);
 834           errno = saved_errno;
 835         }
 836       else
 837         {
 838           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
 839             {
 840               /* Return NULL, but free the allocated memory, and while doing
 841                  that, preserve the errno from iconv_close.  */
 842               int saved_errno = errno;
 843               if (cd1 != (iconv_t)(-1))
 844                 iconv_close (cd1);
 845               iconv_close (cd);
 846               free (result);
 847               errno = saved_errno;
 848               return NULL;
 849             }
 850           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
 851             {
 852               /* Return NULL, but free the allocated memory, and while doing
 853                  that, preserve the errno from iconv_close.  */
 854               int saved_errno = errno;
 855               iconv_close (cd);
 856               free (result);
 857               errno = saved_errno;
 858               return NULL;
 859             }
 860           if (iconv_close (cd) < 0)
 861             {
 862               /* Return NULL, but free the allocated memory, and while doing
 863                  that, preserve the errno from iconv_close.  */
 864               int saved_errno = errno;
 865               free (result);
 866               errno = saved_errno;
 867               return NULL;
 868             }
 869         }
 870       return result;
 871 #else
 872       /* This is a different error code than if iconv_open existed but didn't
 873          support from_codeset and to_codeset, so that the caller can emit
 874          an error message such as
 875            "iconv() is not supported. Installing GNU libiconv and
 876             then reinstalling this package would fix this."  */
 877       errno = ENOSYS;
 878       return NULL;
 879 #endif
 880     }
 881 }