lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2007 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "striconveh.h"
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #if HAVE_ICONV
  30 # include <iconv.h>
  31 # include "unistr.h"
  32 #endif
  33
  34 #include "c-strcase.h"
  35 #include "c-strcaseeq.h"
  36
  37 #ifndef SIZE_MAX
  38 # define SIZE_MAX ((size_t) -1)
  39 #endif
  40
  41
  42 #if HAVE_ICONV
  43
  44 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
  45    error occurs, we may have to determine the Unicode representation of the
  46    inconvertible character.  */
  47
  48 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
  49    a conversion error, and it returns in *INCREMENTED a boolean telling whether
  50    it has incremented the input pointers past the error location.  */
  51 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
  52 /* Irix iconv() inserts a NUL byte if it cannot convert.
  53    NetBSD iconv() inserts a question mark if it cannot convert.
  54    Only GNU libiconv and GNU libc are known to prefer to fail rather
  55    than doing a lossy conversion.  */
  56 static size_t
  57 iconv_carefully (iconv_t cd,
  58                  const char **inbuf, size_t *inbytesleft,
  59                  char **outbuf, size_t *outbytesleft,
  60                  bool *incremented)
  61 {
  62   const char *inptr = *inbuf;
  63   const char *inptr_end = inptr + *inbytesleft;
  64   char *outptr = *outbuf;
  65   size_t outsize = *outbytesleft;
  66   const char *inptr_before;
  67   size_t res;
  68
  69   do
  70     {
  71       size_t insize;
  72
  73       inptr_before = inptr;
  74       res = (size_t)(-1);
  75
  76       for (insize = 1; inptr + insize <= inptr_end; insize++)
  77         {
  78           res = iconv (cd,
  79                        (ICONV_CONST char **) &inptr, &insize,
  80                        &outptr, &outsize);
  81           if (!(res == (size_t)(-1) && errno == EINVAL))
  82             break;
  83           /* We expect that no input bytes have been consumed so far.  */
  84           if (inptr != inptr_before)
  85             abort ();
  86         }
  87
  88       if (res == 0)
  89         {
  90           *outbuf = outptr;
  91           *outbytesleft = outsize;
  92         }
  93     }
  94   while (res == 0 && inptr < inptr_end);
  95
  96   *inbuf = inptr;
  97   *inbytesleft = inptr_end - inptr;
  98   if (res != (size_t)(-1) && res > 0)
  99     {
 100       /* iconv() has already incremented INPTR.  We cannot go back to a
 101          previous INPTR, otherwise the state inside CD would become invalid,
 102          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 103          *INBUF has already been incremented.  */
 104       *incremented = (inptr > inptr_before);
 105       errno = EILSEQ;
 106       return (size_t)(-1);
 107     }
 108   else
 109     {
 110       *incremented = false;
 111       return res;
 112     }
 113 }
 114 # else
 115 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 116      (*(incremented) = false, \
 117       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 118 # endif
 119
 120 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
 121    converting one character.  */
 122 static size_t
 123 iconv_carefully_1 (iconv_t cd,
 124                    const char **inbuf, size_t *inbytesleft,
 125                    char **outbuf, size_t *outbytesleft,
 126                    bool *incremented)
 127 {
 128   const char *inptr = *inbuf;
 129   const char *inptr_end = inptr + *inbytesleft;
 130   char *outptr = *outbuf;
 131   size_t outsize = *outbytesleft;
 132   const char *inptr_before = inptr;
 133   size_t res = (size_t)(-1);
 134   size_t insize;
 135
 136   for (insize = 1; inptr + insize <= inptr_end; insize++)
 137     {
 138       res = iconv (cd,
 139                    (ICONV_CONST char **) &inptr, &insize,
 140                    &outptr, &outsize);
 141       if (!(res == (size_t)(-1) && errno == EINVAL))
 142         break;
 143       /* We expect that no input bytes have been consumed so far.  */
 144       if (inptr != inptr_before)
 145         abort ();
 146     }
 147
 148   *inbuf = inptr;
 149   *inbytesleft = inptr_end - inptr;
 150 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 151   /* Irix iconv() inserts a NUL byte if it cannot convert.
 152      NetBSD iconv() inserts a question mark if it cannot convert.
 153      Only GNU libiconv and GNU libc are known to prefer to fail rather
 154      than doing a lossy conversion.  */
 155   if (res != (size_t)(-1) && res > 0)
 156     {
 157       /* iconv() has already incremented INPTR.  We cannot go back to a
 158          previous INPTR, otherwise the state inside CD would become invalid,
 159          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 160          *INBUF has already been incremented.  */
 161       *incremented = (inptr > inptr_before);
 162       errno = EILSEQ;
 163       return (size_t)(-1);
 164     }
 165 # endif
 166
 167   if (res != (size_t)(-1))
 168     {
 169       *outbuf = outptr;
 170       *outbytesleft = outsize;
 171     }
 172   *incremented = false;
 173   return res;
 174 }
 175
 176 static int
 177 mem_cd_iconveh_internal (const char *src, size_t srclen,
 178                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 179                          enum iconv_ilseq_handler handler,
 180                          size_t extra_alloc,
 181                          size_t *offsets,
 182                          char **resultp, size_t *lengthp)
 183 {
 184   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 185      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 186      Instead, we have to start afresh from the beginning of SRC.  */
 187   /* Use a temporary buffer, so that for small strings, a single malloc()
 188      call will be sufficient.  */
 189 # define tmpbufsize 4096
 190   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 191      libiconv's UCS-4-INTERNAL encoding.  */
 192   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 193 # define tmpbuf tmp.buf
 194
 195   char *initial_result;
 196   char *result;
 197   size_t allocated;
 198   size_t length;
 199   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
 200
 201   if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
 202     {
 203       initial_result = *resultp;
 204       allocated = *lengthp;
 205     }
 206   else
 207     {
 208       initial_result = tmpbuf;
 209       allocated = sizeof (tmpbuf);
 210     }
 211   result = initial_result;
 212
 213   if (offsets != NULL)
 214     {
 215       size_t i;
 216
 217       for (i = 0; i < srclen; i++)
 218         offsets[i] = (size_t)(-1);
 219
 220       last_length = (size_t)(-1);
 221     }
 222   length = 0;
 223
 224   /* First, try a direct conversion, and see whether a conversion error
 225      occurs at all.  */
 226   {
 227     const char *inptr = src;
 228     size_t insize = srclen;
 229
 230     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 231 # if defined _LIBICONV_VERSION \
 232      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 233     /* Set to the initial state.  */
 234     iconv (cd, NULL, NULL, NULL, NULL);
 235 # endif
 236
 237     while (insize > 0)
 238       {
 239         char *outptr = result + length;
 240         size_t outsize = allocated - extra_alloc - length;
 241         bool incremented;
 242         size_t res;
 243         bool grow;
 244
 245         if (offsets != NULL)
 246           {
 247             if (length != last_length) /* ensure that offset[] be increasing */
 248               {
 249                 offsets[inptr - src] = length;
 250                 last_length = length;
 251               }
 252             res = iconv_carefully_1 (cd,
 253                                      &inptr, &insize,
 254                                      &outptr, &outsize,
 255                                      &incremented);
 256           }
 257         else
 258           /* Use iconv_carefully instead of iconv here, because:
 259              - If TO_CODESET is UTF-8, we can do the error handling in this
 260                loop, no need for a second loop,
 261              - With iconv() implementations other than GNU libiconv and GNU
 262                libc, if we use iconv() in a big swoop, checking for an E2BIG
 263                return, we lose the number of irreversible conversions.  */
 264           res = iconv_carefully (cd,
 265                                  &inptr, &insize,
 266                                  &outptr, &outsize,
 267                                  &incremented);
 268
 269         length = outptr - result;
 270         grow = (length + extra_alloc > allocated / 2);
 271         if (res == (size_t)(-1))
 272           {
 273             if (errno == E2BIG)
 274               grow = true;
 275             else if (errno == EINVAL)
 276               break;
 277             else if (errno == EILSEQ && handler != iconveh_error)
 278               {
 279                 if (cd2 == (iconv_t)(-1))
 280                   {
 281                     /* TO_CODESET is UTF-8.  */
 282                     /* Error handling can produce up to 1 byte of output.  */
 283                     if (length + 1 + extra_alloc > allocated)
 284                       {
 285                         char *memory;
 286
 287                         allocated = 2 * allocated;
 288                         if (length + 1 + extra_alloc > allocated)
 289                           abort ();
 290                         if (result == initial_result)
 291                           memory = (char *) malloc (allocated);
 292                         else
 293                           memory = (char *) realloc (result, allocated);
 294                         if (memory == NULL)
 295                           {
 296                             if (result != initial_result)
 297                               free (result);
 298                             errno = ENOMEM;
 299                             return -1;
 300                           }
 301                         if (result == initial_result)
 302                           memcpy (memory, initial_result, length);
 303                         result = memory;
 304                         grow = false;
 305                       }
 306                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 307                        and emit a question mark.  */
 308                     if (!incremented)
 309                       {
 310                         if (insize == 0)
 311                           abort ();
 312                         inptr++;
 313                         insize--;
 314                       }
 315                     result[length] = '?';
 316                     length++;
 317                   }
 318                 else
 319                   goto indirectly;
 320               }
 321             else
 322               {
 323                 if (result != initial_result)
 324                   {
 325                     int saved_errno = errno;
 326                     free (result);
 327                     errno = saved_errno;
 328                   }
 329                 return -1;
 330               }
 331           }
 332         if (insize == 0)
 333           break;
 334         if (grow)
 335           {
 336             char *memory;
 337
 338             allocated = 2 * allocated;
 339             if (result == initial_result)
 340               memory = (char *) malloc (allocated);
 341             else
 342               memory = (char *) realloc (result, allocated);
 343             if (memory == NULL)
 344               {
 345                 if (result != initial_result)
 346                   free (result);
 347                 errno = ENOMEM;
 348                 return -1;
 349               }
 350             if (result == initial_result)
 351               memcpy (memory, initial_result, length);
 352             result = memory;
 353           }
 354       }
 355   }
 356
 357   /* Now get the conversion state back to the initial state.
 358      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 359 #if defined _LIBICONV_VERSION \
 360     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 361   for (;;)
 362     {
 363       char *outptr = result + length;
 364       size_t outsize = allocated - extra_alloc - length;
 365       size_t res;
 366
 367       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 368       length = outptr - result;
 369       if (res == (size_t)(-1))
 370         {
 371           if (errno == E2BIG)
 372             {
 373               char *memory;
 374
 375               allocated = 2 * allocated;
 376               if (result == initial_result)
 377                 memory = (char *) malloc (allocated);
 378               else
 379                 memory = (char *) realloc (result, allocated);
 380               if (memory == NULL)
 381                 {
 382                   if (result != initial_result)
 383                     free (result);
 384                   errno = ENOMEM;
 385                   return -1;
 386                 }
 387               if (result == initial_result)
 388                 memcpy (memory, initial_result, length);
 389               result = memory;
 390             }
 391           else
 392             {
 393               if (result != initial_result)
 394                 {
 395                   int saved_errno = errno;
 396                   free (result);
 397                   errno = saved_errno;
 398                 }
 399               return -1;
 400             }
 401         }
 402       else
 403         break;
 404     }
 405 #endif
 406
 407   /* The direct conversion succeeded.  */
 408   goto done;
 409
 410  indirectly:
 411   /* The direct conversion failed, handler != iconveh_error,
 412      and cd2 != (iconv_t)(-1).
 413      Use a conversion through UTF-8.  */
 414   if (offsets != NULL)
 415     {
 416       size_t i;
 417
 418       for (i = 0; i < srclen; i++)
 419         offsets[i] = (size_t)(-1);
 420
 421       last_length = (size_t)(-1);
 422     }
 423   length = 0;
 424   {
 425 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 426     char utf8buf[utf8bufsize + 1];
 427     size_t utf8len = 0;
 428     const char *in1ptr = src;
 429     size_t in1size = srclen;
 430     bool do_final_flush1 = true;
 431     bool do_final_flush2 = true;
 432
 433     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 434 # if defined _LIBICONV_VERSION \
 435      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 436     /* Set to the initial state.  */
 437     if (cd1 != (iconv_t)(-1))
 438       iconv (cd1, NULL, NULL, NULL, NULL);
 439     iconv (cd2, NULL, NULL, NULL, NULL);
 440 # endif
 441
 442     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 443       {
 444         char *out1ptr = utf8buf + utf8len;
 445         size_t out1size = utf8bufsize - utf8len;
 446         bool incremented1;
 447         size_t res1;
 448         int errno1;
 449
 450         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 451         if (in1size > 0)
 452           {
 453             if (offsets != NULL
 454                 && length != last_length) /* ensure that offset[] be increasing */
 455               {
 456                 offsets[in1ptr - src] = length;
 457                 last_length = length;
 458               }
 459             if (cd1 != (iconv_t)(-1))
 460               {
 461                 if (offsets != NULL)
 462                   res1 = iconv_carefully_1 (cd1,
 463                                             &in1ptr, &in1size,
 464                                             &out1ptr, &out1size,
 465                                             &incremented1);
 466                 else
 467                   res1 = iconv_carefully (cd1,
 468                                           &in1ptr, &in1size,
 469                                           &out1ptr, &out1size,
 470                                           &incremented1);
 471               }
 472             else
 473               {
 474                 /* FROM_CODESET is UTF-8.  */
 475                 res1 = 0;
 476                 do
 477                   {
 478                     ucs4_t uc;
 479                     int n;
 480                     int m;
 481
 482                     n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
 483                     if (uc == 0xfffd
 484                         && !(n >= 3
 485                              && (uint8_t)in1ptr[0] == 0xEF
 486                              && (uint8_t)in1ptr[1] == 0xBF
 487                              && (uint8_t)in1ptr[2] == 0xBD))
 488                       {
 489                         in1ptr += n;
 490                         in1size -= n;
 491                         errno = EILSEQ;
 492                         res1 = (size_t)(-1);
 493                         incremented1 = true;
 494                         break;
 495                       }
 496                     if (out1size == 0)
 497                       {
 498                         errno = E2BIG;
 499                         res1 = (size_t)(-1);
 500                         incremented1 = false;
 501                         break;
 502                       }
 503                     m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size);
 504                     if (m == -2)
 505                       {
 506                         errno = E2BIG;
 507                         res1 = (size_t)(-1);
 508                         incremented1 = false;
 509                         break;
 510                       }
 511                     in1ptr += n;
 512                     in1size -= n;
 513                     if (m == -1)
 514                       {
 515                         errno = EILSEQ;
 516                         res1 = (size_t)(-1);
 517                         incremented1 = true;
 518                         break;
 519                       }
 520                     out1ptr += m;
 521                     out1size -= m;
 522                   }
 523                 while (offsets == NULL && in1size > 0);
 524               }
 525           }
 526         else if (do_final_flush1)
 527           {
 528             /* Now get the conversion state of CD1 back to the initial state.
 529                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 530 # if defined _LIBICONV_VERSION \
 531      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 532             if (cd1 != (iconv_t)(-1))
 533               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 534             else
 535 # endif
 536               res1 = 0;
 537             do_final_flush1 = false;
 538             incremented1 = true;
 539           }
 540         else
 541           {
 542             res1 = 0;
 543             incremented1 = true;
 544           }
 545         if (res1 == (size_t)(-1)
 546             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 547           {
 548             if (result != initial_result)
 549               {
 550                 int saved_errno = errno;
 551                 free (result);
 552                 errno = saved_errno;
 553               }
 554             return -1;
 555           }
 556         if (res1 == (size_t)(-1)
 557             && errno == EILSEQ && handler != iconveh_error)
 558           {
 559             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 560                emit a question mark.  Room for the question mark was allocated
 561                at the end of utf8buf.  */
 562             if (!incremented1)
 563               {
 564                 if (in1size == 0)
 565                   abort ();
 566                 in1ptr++;
 567                 in1size--;
 568               }
 569             utf8buf[utf8len++] = '?';
 570           }
 571         errno1 = errno;
 572         utf8len = out1ptr - utf8buf;
 573
 574         if (offsets != NULL
 575             || in1size == 0
 576             || utf8len > utf8bufsize / 2
 577             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 578           {
 579             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 580             const char *in2ptr = utf8buf;
 581             size_t in2size = utf8len;
 582
 583             while (in2size > 0
 584                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 585               {
 586                 char *out2ptr = result + length;
 587                 size_t out2size = allocated - extra_alloc - length;
 588                 bool incremented2;
 589                 size_t res2;
 590                 bool grow;
 591
 592                 if (in2size > 0)
 593                   res2 = iconv_carefully (cd2,
 594                                           &in2ptr, &in2size,
 595                                           &out2ptr, &out2size,
 596                                           &incremented2);
 597                 else /* in1size == 0 && !do_final_flush1
 598                         && in2size == 0 && do_final_flush2 */
 599                   {
 600                     /* Now get the conversion state of CD1 back to the initial
 601                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 602 # if defined _LIBICONV_VERSION \
 603      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 604                     res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 605 # else
 606                     res2 = 0;
 607 # endif
 608                     do_final_flush2 = false;
 609                     incremented2 = true;
 610                   }
 611
 612                 length = out2ptr - result;
 613                 grow = (length + extra_alloc > allocated / 2);
 614                 if (res2 == (size_t)(-1))
 615                   {
 616                     if (errno == E2BIG)
 617                       grow = true;
 618                     else if (errno == EINVAL)
 619                       break;
 620                     else if (errno == EILSEQ && handler != iconveh_error)
 621                       {
 622                         /* Error handling can produce up to 10 bytes of ASCII
 623                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 624                            UCS-4, so use CD2 here as well.  */
 625                         char scratchbuf[10];
 626                         size_t scratchlen;
 627                         ucs4_t uc;
 628                         const char *inptr;
 629                         size_t insize;
 630                         size_t res;
 631
 632                         if (incremented2)
 633                           {
 634                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 635                                          (const uint8_t *) utf8buf)
 636                                 == NULL)
 637                               abort ();
 638                           }
 639                         else
 640                           {
 641                             int n;
 642                             if (in2size == 0)
 643                               abort ();
 644                             n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
 645                                                   in2size);
 646                             in2ptr += n;
 647                             in2size -= n;
 648                           }
 649
 650                         if (handler == iconveh_escape_sequence)
 651                           {
 652                             static char hex[16] = "0123456789ABCDEF";
 653                             scratchlen = 0;
 654                             scratchbuf[scratchlen++] = '\\';
 655                             if (uc < 0x10000)
 656                               scratchbuf[scratchlen++] = 'u';
 657                             else
 658                               {
 659                                 scratchbuf[scratchlen++] = 'U';
 660                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 661                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 662                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 663                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 664                               }
 665                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 666                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 667                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 668                             scratchbuf[scratchlen++] = hex[uc & 15];
 669                           }
 670                         else
 671                           {
 672                             scratchbuf[0] = '?';
 673                             scratchlen = 1;
 674                           }
 675
 676                         inptr = scratchbuf;
 677                         insize = scratchlen;
 678                         res = iconv (cd2,
 679                                      (ICONV_CONST char **) &inptr, &insize,
 680                                      &out2ptr, &out2size);
 681                         length = out2ptr - result;
 682                         if (res == (size_t)(-1) && errno == E2BIG)
 683                           {
 684                             char *memory;
 685
 686                             allocated = 2 * allocated;
 687                             if (length + 1 + extra_alloc > allocated)
 688                               abort ();
 689                             if (result == initial_result)
 690                               memory = (char *) malloc (allocated);
 691                             else
 692                               memory = (char *) realloc (result, allocated);
 693                             if (memory == NULL)
 694                               {
 695                                 if (result != initial_result)
 696                                   free (result);
 697                                 errno = ENOMEM;
 698                                 return -1;
 699                               }
 700                             if (result == initial_result)
 701                               memcpy (memory, initial_result, length);
 702                             result = memory;
 703                             grow = false;
 704
 705                             out2ptr = result + length;
 706                             out2size = allocated - extra_alloc - length;
 707                             res = iconv (cd2,
 708                                          (ICONV_CONST char **) &inptr, &insize,
 709                                          &out2ptr, &out2size);
 710                             length = out2ptr - result;
 711                           }
 712 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 713                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 714                            NetBSD iconv() inserts a question mark if it cannot
 715                            convert.
 716                            Only GNU libiconv and GNU libc are known to prefer
 717                            to fail rather than doing a lossy conversion.  */
 718                         if (res != (size_t)(-1) && res > 0)
 719                           {
 720                             errno = EILSEQ;
 721                             res = (size_t)(-1);
 722                           }
 723 # endif
 724                         if (res == (size_t)(-1))
 725                           {
 726                             /* Failure converting the ASCII replacement.  */
 727                             if (result != initial_result)
 728                               {
 729                                 int saved_errno = errno;
 730                                 free (result);
 731                                 errno = saved_errno;
 732                               }
 733                             return -1;
 734                           }
 735                       }
 736                     else
 737                       {
 738                         if (result != initial_result)
 739                           {
 740                             int saved_errno = errno;
 741                             free (result);
 742                             errno = saved_errno;
 743                           }
 744                         return -1;
 745                       }
 746                   }
 747                 if (!(in2size > 0
 748                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 749                   break;
 750                 if (grow)
 751                   {
 752                     char *memory;
 753
 754                     allocated = 2 * allocated;
 755                     if (result == initial_result)
 756                       memory = (char *) malloc (allocated);
 757                     else
 758                       memory = (char *) realloc (result, allocated);
 759                     if (memory == NULL)
 760                       {
 761                         if (result != initial_result)
 762                           free (result);
 763                         errno = ENOMEM;
 764                         return -1;
 765                       }
 766                     if (result == initial_result)
 767                       memcpy (memory, initial_result, length);
 768                     result = memory;
 769                   }
 770               }
 771
 772             /* Move the remaining bytes to the beginning of utf8buf.  */
 773             if (in2size > 0)
 774               memmove (utf8buf, in2ptr, in2size);
 775             utf8len = in2size;
 776           }
 777
 778         if (res1 == (size_t)(-1))
 779           {
 780             if (errno1 == EINVAL)
 781               in1size = 0;
 782             else if (errno1 == EILSEQ)
 783               {
 784                 if (result != initial_result)
 785                   free (result);
 786                 errno = errno1;
 787                 return -1;
 788               }
 789           }
 790       }
 791 # undef utf8bufsize
 792   }
 793
 794  done:
 795   /* Now the final memory allocation.  */
 796   if (result == tmpbuf)
 797     {
 798       char *memory;
 799
 800       memory = (char *) malloc (length + extra_alloc);
 801       if (memory != NULL)
 802         {
 803           memcpy (memory, tmpbuf, length);
 804           result = memory;
 805         }
 806       else
 807         {
 808           errno = ENOMEM;
 809           return -1;
 810         }
 811     }
 812   else if (result != *resultp && length + extra_alloc < allocated)
 813     {
 814       /* Shrink the allocated memory if possible.  */
 815       char *memory;
 816
 817       memory = (char *) realloc (result, length + extra_alloc);
 818       if (memory != NULL)
 819         result = memory;
 820     }
 821   *resultp = result;
 822   *lengthp = length;
 823   return 0;
 824 # undef tmpbuf
 825 # undef tmpbufsize
 826 }
 827
 828 int
 829 mem_cd_iconveh (const char *src, size_t srclen,
 830                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 831                 enum iconv_ilseq_handler handler,
 832                 size_t *offsets,
 833                 char **resultp, size_t *lengthp)
 834 {
 835   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
 836                                   offsets, resultp, lengthp);
 837 }
 838
 839 char *
 840 str_cd_iconveh (const char *src,
 841                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 842                 enum iconv_ilseq_handler handler)
 843 {
 844   /* For most encodings, a trailing NUL byte in the input will be converted
 845      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
 846      function is usable for UTF-7, we have to exclude the NUL byte from the
 847      conversion and add it by hand afterwards.  */
 848   char *result = NULL;
 849   size_t length = 0;
 850   int retval = mem_cd_iconveh_internal (src, strlen (src),
 851                                         cd, cd1, cd2, handler, 1, NULL,
 852                                         &result, &length);
 853
 854   if (retval < 0)
 855     {
 856       if (result != NULL)
 857         {
 858           int saved_errno = errno;
 859           free (result);
 860           errno = saved_errno;
 861         }
 862       return NULL;
 863     }
 864
 865   /* Add the terminating NUL byte.  */
 866   result[length] = '\0';
 867
 868   return result;
 869 }
 870
 871 #endif
 872
 873 int
 874 mem_iconveh (const char *src, size_t srclen,
 875              const char *from_codeset, const char *to_codeset,
 876              enum iconv_ilseq_handler handler,
 877              size_t *offsets,
 878              char **resultp, size_t *lengthp)
 879 {
 880   if (srclen == 0)
 881     {
 882       /* Nothing to convert.  */
 883       *lengthp = 0;
 884       return 0;
 885     }
 886   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
 887     {
 888       char *result;
 889
 890       if (*resultp != NULL && *lengthp >= srclen)
 891         result = *resultp;
 892       else
 893         {
 894           result = (char *) malloc (srclen);
 895           if (result == NULL)
 896             {
 897               errno = ENOMEM;
 898               return -1;
 899             }
 900         }
 901       memcpy (result, src, srclen);
 902       *resultp = result;
 903       *lengthp = srclen;
 904       return 0;
 905     }
 906   else
 907     {
 908 #if HAVE_ICONV
 909       iconv_t cd;
 910       iconv_t cd1;
 911       iconv_t cd2;
 912       char *result;
 913       size_t length;
 914       int retval;
 915
 916       /* Avoid glibc-2.1 bug with EUC-KR.  */
 917 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 918       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
 919           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
 920         {
 921           errno = EINVAL;
 922           return -1;
 923         }
 924 # endif
 925
 926       cd = iconv_open (to_codeset, from_codeset);
 927       if (cd == (iconv_t)(-1))
 928         return -1;
 929
 930       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 931         cd1 = (iconv_t)(-1);
 932       else
 933         {
 934           cd1 = iconv_open ("UTF-8", from_codeset);
 935           if (cd1 == (iconv_t)(-1))
 936             {
 937               int saved_errno = errno;
 938               iconv_close (cd);
 939               errno = saved_errno;
 940               return -1;
 941             }
 942         }
 943
 944       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 945         cd2 = (iconv_t)(-1);
 946       else
 947         {
 948           cd2 = iconv_open (to_codeset, "UTF-8");
 949           if (cd2 == (iconv_t)(-1))
 950             {
 951               int saved_errno = errno;
 952               if (cd1 != (iconv_t)(-1))
 953                 iconv_close (cd1);
 954               iconv_close (cd);
 955               errno = saved_errno;
 956               return -1;
 957             }
 958         }
 959
 960       result = *resultp;
 961       length = *lengthp;
 962       retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
 963                                &result, &length);
 964
 965       if (retval < 0)
 966         {
 967           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
 968           int saved_errno = errno;
 969           if (cd2 != (iconv_t)(-1))
 970             iconv_close (cd2);
 971           if (cd1 != (iconv_t)(-1))
 972             iconv_close (cd1);
 973           iconv_close (cd);
 974           errno = saved_errno;
 975         }
 976       else
 977         {
 978           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
 979             {
 980               /* Return -1, but free the allocated memory, and while doing
 981                  that, preserve the errno from iconv_close.  */
 982               int saved_errno = errno;
 983               if (cd1 != (iconv_t)(-1))
 984                 iconv_close (cd1);
 985               iconv_close (cd);
 986               if (result != *resultp && result != NULL)
 987                 free (result);
 988               errno = saved_errno;
 989               return -1;
 990             }
 991           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
 992             {
 993               /* Return -1, but free the allocated memory, and while doing
 994                  that, preserve the errno from iconv_close.  */
 995               int saved_errno = errno;
 996               iconv_close (cd);
 997               if (result != *resultp && result != NULL)
 998                 free (result);
 999               errno = saved_errno;
1000               return -1;
1001             }
1002           if (iconv_close (cd) < 0)
1003             {
1004               /* Return -1, but free the allocated memory, and while doing
1005                  that, preserve the errno from iconv_close.  */
1006               int saved_errno = errno;
1007               if (result != *resultp && result != NULL)
1008                 free (result);
1009               errno = saved_errno;
1010               return -1;
1011             }
1012           *resultp = result;
1013           *lengthp = length;
1014         }
1015       return retval;
1016 #else
1017       /* This is a different error code than if iconv_open existed but didn't
1018          support from_codeset and to_codeset, so that the caller can emit
1019          an error message such as
1020            "iconv() is not supported. Installing GNU libiconv and
1021             then reinstalling this package would fix this."  */
1022       errno = ENOSYS;
1023       return -1;
1024 #endif
1025     }
1026 }
1027
1028 char *
1029 str_iconveh (const char *src,
1030              const char *from_codeset, const char *to_codeset,
1031              enum iconv_ilseq_handler handler)
1032 {
1033   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
1034     {
1035       char *result = strdup (src);
1036
1037       if (result == NULL)
1038         errno = ENOMEM;
1039       return result;
1040     }
1041   else
1042     {
1043 #if HAVE_ICONV
1044       iconv_t cd;
1045       iconv_t cd1;
1046       iconv_t cd2;
1047       char *result;
1048
1049       /* Avoid glibc-2.1 bug with EUC-KR.  */
1050 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
1051       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
1052           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
1053         {
1054           errno = EINVAL;
1055           return NULL;
1056         }
1057 # endif
1058
1059       cd = iconv_open (to_codeset, from_codeset);
1060       if (cd == (iconv_t)(-1))
1061         return NULL;
1062
1063       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1064         cd1 = (iconv_t)(-1);
1065       else
1066         {
1067           cd1 = iconv_open ("UTF-8", from_codeset);
1068           if (cd1 == (iconv_t)(-1))
1069             {
1070               int saved_errno = errno;
1071               iconv_close (cd);
1072               errno = saved_errno;
1073               return NULL;
1074             }
1075         }
1076
1077       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1078         cd2 = (iconv_t)(-1);
1079       else
1080         {
1081           cd2 = iconv_open (to_codeset, "UTF-8");
1082           if (cd2 == (iconv_t)(-1))
1083             {
1084               int saved_errno = errno;
1085               if (cd1 != (iconv_t)(-1))
1086                 iconv_close (cd1);
1087               iconv_close (cd);
1088               errno = saved_errno;
1089               return NULL;
1090             }
1091         }
1092
1093       result = str_cd_iconveh (src, cd, cd1, cd2, handler);
1094
1095       if (result == NULL)
1096         {
1097           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
1098           int saved_errno = errno;
1099           if (cd2 != (iconv_t)(-1))
1100             iconv_close (cd2);
1101           if (cd1 != (iconv_t)(-1))
1102             iconv_close (cd1);
1103           iconv_close (cd);
1104           errno = saved_errno;
1105         }
1106       else
1107         {
1108           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
1109             {
1110               /* Return NULL, but free the allocated memory, and while doing
1111                  that, preserve the errno from iconv_close.  */
1112               int saved_errno = errno;
1113               if (cd1 != (iconv_t)(-1))
1114                 iconv_close (cd1);
1115               iconv_close (cd);
1116               free (result);
1117               errno = saved_errno;
1118               return NULL;
1119             }
1120           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
1121             {
1122               /* Return NULL, but free the allocated memory, and while doing
1123                  that, preserve the errno from iconv_close.  */
1124               int saved_errno = errno;
1125               iconv_close (cd);
1126               free (result);
1127               errno = saved_errno;
1128               return NULL;
1129             }
1130           if (iconv_close (cd) < 0)
1131             {
1132               /* Return NULL, but free the allocated memory, and while doing
1133                  that, preserve the errno from iconv_close.  */
1134               int saved_errno = errno;
1135               free (result);
1136               errno = saved_errno;
1137               return NULL;
1138             }
1139         }
1140       return result;
1141 #else
1142       /* This is a different error code than if iconv_open existed but didn't
1143          support from_codeset and to_codeset, so that the caller can emit
1144          an error message such as
1145            "iconv() is not supported. Installing GNU libiconv and
1146             then reinstalling this package would fix this."  */
1147       errno = ENOSYS;
1148       return NULL;
1149 #endif
1150     }
1151 }