lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2007 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "striconveh.h"
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #if HAVE_ICONV
  30 # include <iconv.h>
  31 # include "unistr.h"
  32 #endif
  33
  34 #include "c-strcase.h"
  35 #include "c-strcaseeq.h"
  36
  37 #ifndef SIZE_MAX
  38 # define SIZE_MAX ((size_t) -1)
  39 #endif
  40
  41
  42 #if HAVE_ICONV
  43
  44 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
  45    error occurs, we may have to determine the Unicode representation of the
  46    inconvertible character.  */
  47
  48 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
  49    a conversion error, and it returns in *INCREMENTED a boolean telling whether
  50    it has incremented the input pointers past the error location.  */
  51 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
  52 /* Irix iconv() inserts a NUL byte if it cannot convert.
  53    NetBSD iconv() inserts a question mark if it cannot convert.
  54    Only GNU libiconv and GNU libc are known to prefer to fail rather
  55    than doing a lossy conversion.  */
  56 static size_t
  57 iconv_carefully (iconv_t cd,
  58                  const char **inbuf, size_t *inbytesleft,
  59                  char **outbuf, size_t *outbytesleft,
  60                  bool *incremented)
  61 {
  62   const char *inptr = *inbuf;
  63   const char *inptr_end = inptr + *inbytesleft;
  64   char *outptr = *outbuf;
  65   size_t outsize = *outbytesleft;
  66   const char *inptr_before;
  67   size_t res;
  68
  69   do
  70     {
  71       size_t insize;
  72
  73       inptr_before = inptr;
  74       res = (size_t)(-1);
  75
  76       for (insize = 1; inptr + insize <= inptr_end; insize++)
  77         {
  78           res = iconv (cd,
  79                        (ICONV_CONST char **) &inptr, &insize,
  80                        &outptr, &outsize);
  81           if (!(res == (size_t)(-1) && errno == EINVAL))
  82             break;
  83           /* We expect that no input bytes have been consumed so far.  */
  84           if (inptr != inptr_before)
  85             abort ();
  86         }
  87
  88       if (res == 0)
  89         {
  90           *outbuf = outptr;
  91           *outbytesleft = outsize;
  92         }
  93     }
  94   while (res == 0 && inptr < inptr_end);
  95
  96   *inbuf = inptr;
  97   *inbytesleft = inptr_end - inptr;
  98   if (res != (size_t)(-1) && res > 0)
  99     {
 100       /* iconv() has already incremented INPTR.  We cannot go back to a
 101          previous INPTR, otherwise the state inside CD would become invalid,
 102          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 103          *INBUF has already been incremented.  */
 104       *incremented = (inptr > inptr_before);
 105       errno = EILSEQ;
 106       return (size_t)(-1);
 107     }
 108   else
 109     {
 110       *incremented = false;
 111       return res;
 112     }
 113 }
 114 # else
 115 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 116      (*(incremented) = false, \
 117       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 118 # endif
 119
 120 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
 121    converting one character.  */
 122 static size_t
 123 iconv_carefully_1 (iconv_t cd,
 124                    const char **inbuf, size_t *inbytesleft,
 125                    char **outbuf, size_t *outbytesleft,
 126                    bool *incremented)
 127 {
 128   const char *inptr = *inbuf;
 129   const char *inptr_end = inptr + *inbytesleft;
 130   char *outptr = *outbuf;
 131   size_t outsize = *outbytesleft;
 132   const char *inptr_before = inptr;
 133   size_t res = (size_t)(-1);
 134   size_t insize;
 135
 136   for (insize = 1; inptr + insize <= inptr_end; insize++)
 137     {
 138       res = iconv (cd,
 139                    (ICONV_CONST char **) &inptr, &insize,
 140                    &outptr, &outsize);
 141       if (!(res == (size_t)(-1) && errno == EINVAL))
 142         break;
 143       /* We expect that no input bytes have been consumed so far.  */
 144       if (inptr != inptr_before)
 145         abort ();
 146     }
 147
 148   *inbuf = inptr;
 149   *inbytesleft = inptr_end - inptr;
 150 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 151   /* Irix iconv() inserts a NUL byte if it cannot convert.
 152      NetBSD iconv() inserts a question mark if it cannot convert.
 153      Only GNU libiconv and GNU libc are known to prefer to fail rather
 154      than doing a lossy conversion.  */
 155   if (res != (size_t)(-1) && res > 0)
 156     {
 157       /* iconv() has already incremented INPTR.  We cannot go back to a
 158          previous INPTR, otherwise the state inside CD would become invalid,
 159          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 160          *INBUF has already been incremented.  */
 161       *incremented = (inptr > inptr_before);
 162       errno = EILSEQ;
 163       return (size_t)(-1);
 164     }
 165 # endif
 166
 167   if (res != (size_t)(-1))
 168     {
 169       *outbuf = outptr;
 170       *outbytesleft = outsize;
 171     }
 172   *incremented = false;
 173   return res;
 174 }
 175
 176 static int
 177 mem_cd_iconveh_internal (const char *src, size_t srclen,
 178                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 179                          enum iconv_ilseq_handler handler,
 180                          size_t extra_alloc,
 181                          size_t *offsets,
 182                          char **resultp, size_t *lengthp)
 183 {
 184   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 185      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 186      Instead, we have to start afresh from the beginning of SRC.  */
 187   /* Use a temporary buffer, so that for small strings, a single malloc()
 188      call will be sufficient.  */
 189 # define tmpbufsize 4096
 190   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 191      libiconv's UCS-4-INTERNAL encoding.  */
 192   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 193 # define tmpbuf tmp.buf
 194
 195   char *initial_result;
 196   char *result;
 197   size_t allocated;
 198   size_t length;
 199   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
 200
 201   if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
 202     {
 203       initial_result = *resultp;
 204       allocated = *lengthp;
 205     }
 206   else
 207     {
 208       initial_result = tmpbuf;
 209       allocated = sizeof (tmpbuf);
 210     }
 211   result = initial_result;
 212
 213   if (offsets != NULL)
 214     {
 215       size_t i;
 216
 217       for (i = 0; i < srclen; i++)
 218         offsets[i] = (size_t)(-1);
 219
 220       last_length = (size_t)(-1);
 221     }
 222   length = 0;
 223
 224   /* First, try a direct conversion, and see whether a conversion error
 225      occurs at all.  */
 226   {
 227     const char *inptr = src;
 228     size_t insize = srclen;
 229
 230     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 231 # if defined _LIBICONV_VERSION \
 232      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 233     /* Set to the initial state.  */
 234     iconv (cd, NULL, NULL, NULL, NULL);
 235 # endif
 236
 237     while (insize > 0)
 238       {
 239         char *outptr = result + length;
 240         size_t outsize = allocated - extra_alloc - length;
 241         bool incremented;
 242         size_t res;
 243         bool grow;
 244
 245         if (offsets != NULL)
 246           {
 247             if (length != last_length) /* ensure that offset[] be increasing */
 248               {
 249                 offsets[inptr - src] = length;
 250                 last_length = length;
 251               }
 252             res = iconv_carefully_1 (cd,
 253                                      &inptr, &insize,
 254                                      &outptr, &outsize,
 255                                      &incremented);
 256           }
 257         else
 258           /* Use iconv_carefully instead of iconv here, because:
 259              - If TO_CODESET is UTF-8, we can do the error handling in this
 260                loop, no need for a second loop,
 261              - With iconv() implementations other than GNU libiconv and GNU
 262                libc, if we use iconv() in a big swoop, checking for an E2BIG
 263                return, we lose the number of irreversible conversions.  */
 264           res = iconv_carefully (cd,
 265                                  &inptr, &insize,
 266                                  &outptr, &outsize,
 267                                  &incremented);
 268
 269         length = outptr - result;
 270         grow = (length + extra_alloc > allocated / 2);
 271         if (res == (size_t)(-1))
 272           {
 273             if (errno == E2BIG)
 274               grow = true;
 275             else if (errno == EINVAL)
 276               break;
 277             else if (errno == EILSEQ && handler != iconveh_error)
 278               {
 279                 if (cd2 == (iconv_t)(-1))
 280                   {
 281                     /* TO_CODESET is UTF-8.  */
 282                     /* Error handling can produce up to 1 byte of output.  */
 283                     if (length + 1 + extra_alloc > allocated)
 284                       {
 285                         char *memory;
 286
 287                         allocated = 2 * allocated;
 288                         if (length + 1 + extra_alloc > allocated)
 289                           abort ();
 290                         if (result == initial_result)
 291                           memory = (char *) malloc (allocated);
 292                         else
 293                           memory = (char *) realloc (result, allocated);
 294                         if (memory == NULL)
 295                           {
 296                             if (result != initial_result)
 297                               free (result);
 298                             errno = ENOMEM;
 299                             return -1;
 300                           }
 301                         if (result == initial_result)
 302                           memcpy (memory, initial_result, length);
 303                         result = memory;
 304                         grow = false;
 305                       }
 306                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 307                        and emit a question mark.  */
 308                     if (!incremented)
 309                       {
 310                         if (insize == 0)
 311                           abort ();
 312                         inptr++;
 313                         insize--;
 314                       }
 315                     result[length] = '?';
 316                     length++;
 317                   }
 318                 else
 319                   goto indirectly;
 320               }
 321             else
 322               {
 323                 if (result != initial_result)
 324                   {
 325                     int saved_errno = errno;
 326                     free (result);
 327                     errno = saved_errno;
 328                   }
 329                 return -1;
 330               }
 331           }
 332         if (insize == 0)
 333           break;
 334         if (grow)
 335           {
 336             char *memory;
 337
 338             allocated = 2 * allocated;
 339             if (result == initial_result)
 340               memory = (char *) malloc (allocated);
 341             else
 342               memory = (char *) realloc (result, allocated);
 343             if (memory == NULL)
 344               {
 345                 if (result != initial_result)
 346                   free (result);
 347                 errno = ENOMEM;
 348                 return -1;
 349               }
 350             if (result == initial_result)
 351               memcpy (memory, initial_result, length);
 352             result = memory;
 353           }
 354       }
 355   }
 356
 357   /* Now get the conversion state back to the initial state.
 358      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 359 #if defined _LIBICONV_VERSION \
 360     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 361   for (;;)
 362     {
 363       char *outptr = result + length;
 364       size_t outsize = allocated - extra_alloc - length;
 365       size_t res;
 366
 367       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 368       length = outptr - result;
 369       if (res == (size_t)(-1))
 370         {
 371           if (errno == E2BIG)
 372             {
 373               char *memory;
 374
 375               allocated = 2 * allocated;
 376               if (result == initial_result)
 377                 memory = (char *) malloc (allocated);
 378               else
 379                 memory = (char *) realloc (result, allocated);
 380               if (memory == NULL)
 381                 {
 382                   if (result != initial_result)
 383                     free (result);
 384                   errno = ENOMEM;
 385                   return -1;
 386                 }
 387               if (result == initial_result)
 388                 memcpy (memory, initial_result, length);
 389               result = memory;
 390             }
 391           else
 392             {
 393               if (result != initial_result)
 394                 {
 395                   int saved_errno = errno;
 396                   free (result);
 397                   errno = saved_errno;
 398                 }
 399               return -1;
 400             }
 401         }
 402       else
 403         break;
 404     }
 405 #endif
 406
 407   /* The direct conversion succeeded.  */
 408   goto done;
 409
 410  indirectly:
 411   /* The direct conversion failed, handler != iconveh_error,
 412      and cd2 != (iconv_t)(-1).
 413      Use a conversion through UTF-8.  */
 414   if (offsets != NULL)
 415     {
 416       size_t i;
 417
 418       for (i = 0; i < srclen; i++)
 419         offsets[i] = (size_t)(-1);
 420
 421       last_length = (size_t)(-1);
 422     }
 423   length = 0;
 424   {
 425 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 426     char utf8buf[utf8bufsize + 1];
 427     size_t utf8len = 0;
 428     const char *in1ptr = src;
 429     size_t in1size = srclen;
 430     bool do_final_flush1 = true;
 431     bool do_final_flush2 = true;
 432
 433     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 434 # if defined _LIBICONV_VERSION \
 435      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 436     /* Set to the initial state.  */
 437     if (cd1 != (iconv_t)(-1))
 438       iconv (cd1, NULL, NULL, NULL, NULL);
 439     iconv (cd2, NULL, NULL, NULL, NULL);
 440 # endif
 441
 442     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 443       {
 444         char *out1ptr = utf8buf + utf8len;
 445         size_t out1size = utf8bufsize - utf8len;
 446         bool incremented1;
 447         size_t res1;
 448         int errno1;
 449
 450         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 451         if (in1size > 0)
 452           {
 453             if (offsets != NULL
 454                 && length != last_length) /* ensure that offset[] be increasing */
 455               {
 456                 offsets[in1ptr - src] = length;
 457                 last_length = length;
 458               }
 459             if (cd1 != (iconv_t)(-1))
 460               {
 461                 if (offsets != NULL)
 462                   res1 = iconv_carefully_1 (cd1,
 463                                             &in1ptr, &in1size,
 464                                             &out1ptr, &out1size,
 465                                             &incremented1);
 466                 else
 467                   res1 = iconv_carefully (cd1,
 468                                           &in1ptr, &in1size,
 469                                           &out1ptr, &out1size,
 470                                           &incremented1);
 471               }
 472             else
 473               {
 474                 /* FROM_CODESET is UTF-8.  */
 475                 res1 = 0;
 476                 do
 477                   {
 478                     ucs4_t uc;
 479                     int n;
 480                     int m;
 481
 482                     n = u8_mbtoucr (&uc, (const uint8_t *) in1ptr, in1size);
 483                     if (n < 0)
 484                       {
 485                         errno = (n == -2 ? EINVAL : EILSEQ);
 486                         n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
 487                         in1ptr += n;
 488                         in1size -= n;
 489                         res1 = (size_t)(-1);
 490                         incremented1 = true;
 491                         break;
 492                       }
 493                     if (out1size == 0)
 494                       {
 495                         errno = E2BIG;
 496                         res1 = (size_t)(-1);
 497                         incremented1 = false;
 498                         break;
 499                       }
 500                     m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size);
 501                     if (m == -2)
 502                       {
 503                         errno = E2BIG;
 504                         res1 = (size_t)(-1);
 505                         incremented1 = false;
 506                         break;
 507                       }
 508                     in1ptr += n;
 509                     in1size -= n;
 510                     if (m == -1)
 511                       {
 512                         errno = EILSEQ;
 513                         res1 = (size_t)(-1);
 514                         incremented1 = true;
 515                         break;
 516                       }
 517                     out1ptr += m;
 518                     out1size -= m;
 519                   }
 520                 while (offsets == NULL && in1size > 0);
 521               }
 522           }
 523         else if (do_final_flush1)
 524           {
 525             /* Now get the conversion state of CD1 back to the initial state.
 526                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 527 # if defined _LIBICONV_VERSION \
 528      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 529             if (cd1 != (iconv_t)(-1))
 530               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 531             else
 532 # endif
 533               res1 = 0;
 534             do_final_flush1 = false;
 535             incremented1 = true;
 536           }
 537         else
 538           {
 539             res1 = 0;
 540             incremented1 = true;
 541           }
 542         if (res1 == (size_t)(-1)
 543             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 544           {
 545             if (result != initial_result)
 546               {
 547                 int saved_errno = errno;
 548                 free (result);
 549                 errno = saved_errno;
 550               }
 551             return -1;
 552           }
 553         if (res1 == (size_t)(-1)
 554             && errno == EILSEQ && handler != iconveh_error)
 555           {
 556             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 557                emit a question mark.  Room for the question mark was allocated
 558                at the end of utf8buf.  */
 559             if (!incremented1)
 560               {
 561                 if (in1size == 0)
 562                   abort ();
 563                 in1ptr++;
 564                 in1size--;
 565               }
 566             utf8buf[utf8len++] = '?';
 567           }
 568         errno1 = errno;
 569         utf8len = out1ptr - utf8buf;
 570
 571         if (offsets != NULL
 572             || in1size == 0
 573             || utf8len > utf8bufsize / 2
 574             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 575           {
 576             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 577             const char *in2ptr = utf8buf;
 578             size_t in2size = utf8len;
 579
 580             while (in2size > 0
 581                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 582               {
 583                 char *out2ptr = result + length;
 584                 size_t out2size = allocated - extra_alloc - length;
 585                 bool incremented2;
 586                 size_t res2;
 587                 bool grow;
 588
 589                 if (in2size > 0)
 590                   res2 = iconv_carefully (cd2,
 591                                           &in2ptr, &in2size,
 592                                           &out2ptr, &out2size,
 593                                           &incremented2);
 594                 else /* in1size == 0 && !do_final_flush1
 595                         && in2size == 0 && do_final_flush2 */
 596                   {
 597                     /* Now get the conversion state of CD1 back to the initial
 598                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 599 # if defined _LIBICONV_VERSION \
 600      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 601                     res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 602 # else
 603                     res2 = 0;
 604 # endif
 605                     do_final_flush2 = false;
 606                     incremented2 = true;
 607                   }
 608
 609                 length = out2ptr - result;
 610                 grow = (length + extra_alloc > allocated / 2);
 611                 if (res2 == (size_t)(-1))
 612                   {
 613                     if (errno == E2BIG)
 614                       grow = true;
 615                     else if (errno == EINVAL)
 616                       break;
 617                     else if (errno == EILSEQ && handler != iconveh_error)
 618                       {
 619                         /* Error handling can produce up to 10 bytes of ASCII
 620                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 621                            UCS-4, so use CD2 here as well.  */
 622                         char scratchbuf[10];
 623                         size_t scratchlen;
 624                         ucs4_t uc;
 625                         const char *inptr;
 626                         size_t insize;
 627                         size_t res;
 628
 629                         if (incremented2)
 630                           {
 631                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 632                                          (const uint8_t *) utf8buf)
 633                                 == NULL)
 634                               abort ();
 635                           }
 636                         else
 637                           {
 638                             int n;
 639                             if (in2size == 0)
 640                               abort ();
 641                             n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
 642                                                   in2size);
 643                             in2ptr += n;
 644                             in2size -= n;
 645                           }
 646
 647                         if (handler == iconveh_escape_sequence)
 648                           {
 649                             static char hex[16] = "0123456789ABCDEF";
 650                             scratchlen = 0;
 651                             scratchbuf[scratchlen++] = '\\';
 652                             if (uc < 0x10000)
 653                               scratchbuf[scratchlen++] = 'u';
 654                             else
 655                               {
 656                                 scratchbuf[scratchlen++] = 'U';
 657                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 658                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 659                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 660                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 661                               }
 662                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 663                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 664                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 665                             scratchbuf[scratchlen++] = hex[uc & 15];
 666                           }
 667                         else
 668                           {
 669                             scratchbuf[0] = '?';
 670                             scratchlen = 1;
 671                           }
 672
 673                         inptr = scratchbuf;
 674                         insize = scratchlen;
 675                         res = iconv (cd2,
 676                                      (ICONV_CONST char **) &inptr, &insize,
 677                                      &out2ptr, &out2size);
 678                         length = out2ptr - result;
 679                         if (res == (size_t)(-1) && errno == E2BIG)
 680                           {
 681                             char *memory;
 682
 683                             allocated = 2 * allocated;
 684                             if (length + 1 + extra_alloc > allocated)
 685                               abort ();
 686                             if (result == initial_result)
 687                               memory = (char *) malloc (allocated);
 688                             else
 689                               memory = (char *) realloc (result, allocated);
 690                             if (memory == NULL)
 691                               {
 692                                 if (result != initial_result)
 693                                   free (result);
 694                                 errno = ENOMEM;
 695                                 return -1;
 696                               }
 697                             if (result == initial_result)
 698                               memcpy (memory, initial_result, length);
 699                             result = memory;
 700                             grow = false;
 701
 702                             out2ptr = result + length;
 703                             out2size = allocated - extra_alloc - length;
 704                             res = iconv (cd2,
 705                                          (ICONV_CONST char **) &inptr, &insize,
 706                                          &out2ptr, &out2size);
 707                             length = out2ptr - result;
 708                           }
 709 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 710                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 711                            NetBSD iconv() inserts a question mark if it cannot
 712                            convert.
 713                            Only GNU libiconv and GNU libc are known to prefer
 714                            to fail rather than doing a lossy conversion.  */
 715                         if (res != (size_t)(-1) && res > 0)
 716                           {
 717                             errno = EILSEQ;
 718                             res = (size_t)(-1);
 719                           }
 720 # endif
 721                         if (res == (size_t)(-1))
 722                           {
 723                             /* Failure converting the ASCII replacement.  */
 724                             if (result != initial_result)
 725                               {
 726                                 int saved_errno = errno;
 727                                 free (result);
 728                                 errno = saved_errno;
 729                               }
 730                             return -1;
 731                           }
 732                       }
 733                     else
 734                       {
 735                         if (result != initial_result)
 736                           {
 737                             int saved_errno = errno;
 738                             free (result);
 739                             errno = saved_errno;
 740                           }
 741                         return -1;
 742                       }
 743                   }
 744                 if (!(in2size > 0
 745                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 746                   break;
 747                 if (grow)
 748                   {
 749                     char *memory;
 750
 751                     allocated = 2 * allocated;
 752                     if (result == initial_result)
 753                       memory = (char *) malloc (allocated);
 754                     else
 755                       memory = (char *) realloc (result, allocated);
 756                     if (memory == NULL)
 757                       {
 758                         if (result != initial_result)
 759                           free (result);
 760                         errno = ENOMEM;
 761                         return -1;
 762                       }
 763                     if (result == initial_result)
 764                       memcpy (memory, initial_result, length);
 765                     result = memory;
 766                   }
 767               }
 768
 769             /* Move the remaining bytes to the beginning of utf8buf.  */
 770             if (in2size > 0)
 771               memmove (utf8buf, in2ptr, in2size);
 772             utf8len = in2size;
 773           }
 774
 775         if (res1 == (size_t)(-1))
 776           {
 777             if (errno1 == EINVAL)
 778               in1size = 0;
 779             else if (errno1 == EILSEQ)
 780               {
 781                 if (result != initial_result)
 782                   free (result);
 783                 errno = errno1;
 784                 return -1;
 785               }
 786           }
 787       }
 788 # undef utf8bufsize
 789   }
 790
 791  done:
 792   /* Now the final memory allocation.  */
 793   if (result == tmpbuf)
 794     {
 795       char *memory;
 796
 797       memory = (char *) malloc (length + extra_alloc);
 798       if (memory != NULL)
 799         {
 800           memcpy (memory, tmpbuf, length);
 801           result = memory;
 802         }
 803       else
 804         {
 805           errno = ENOMEM;
 806           return -1;
 807         }
 808     }
 809   else if (result != *resultp && length + extra_alloc < allocated)
 810     {
 811       /* Shrink the allocated memory if possible.  */
 812       char *memory;
 813
 814       memory = (char *) realloc (result, length + extra_alloc);
 815       if (memory != NULL)
 816         result = memory;
 817     }
 818   *resultp = result;
 819   *lengthp = length;
 820   return 0;
 821 # undef tmpbuf
 822 # undef tmpbufsize
 823 }
 824
 825 int
 826 mem_cd_iconveh (const char *src, size_t srclen,
 827                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 828                 enum iconv_ilseq_handler handler,
 829                 size_t *offsets,
 830                 char **resultp, size_t *lengthp)
 831 {
 832   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
 833                                   offsets, resultp, lengthp);
 834 }
 835
 836 char *
 837 str_cd_iconveh (const char *src,
 838                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 839                 enum iconv_ilseq_handler handler)
 840 {
 841   /* For most encodings, a trailing NUL byte in the input will be converted
 842      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
 843      function is usable for UTF-7, we have to exclude the NUL byte from the
 844      conversion and add it by hand afterwards.  */
 845   char *result = NULL;
 846   size_t length = 0;
 847   int retval = mem_cd_iconveh_internal (src, strlen (src),
 848                                         cd, cd1, cd2, handler, 1, NULL,
 849                                         &result, &length);
 850
 851   if (retval < 0)
 852     {
 853       if (result != NULL)
 854         {
 855           int saved_errno = errno;
 856           free (result);
 857           errno = saved_errno;
 858         }
 859       return NULL;
 860     }
 861
 862   /* Add the terminating NUL byte.  */
 863   result[length] = '\0';
 864
 865   return result;
 866 }
 867
 868 #endif
 869
 870 int
 871 mem_iconveh (const char *src, size_t srclen,
 872              const char *from_codeset, const char *to_codeset,
 873              enum iconv_ilseq_handler handler,
 874              size_t *offsets,
 875              char **resultp, size_t *lengthp)
 876 {
 877   if (srclen == 0)
 878     {
 879       /* Nothing to convert.  */
 880       *lengthp = 0;
 881       return 0;
 882     }
 883   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
 884     {
 885       char *result;
 886
 887       if (*resultp != NULL && *lengthp >= srclen)
 888         result = *resultp;
 889       else
 890         {
 891           result = (char *) malloc (srclen);
 892           if (result == NULL)
 893             {
 894               errno = ENOMEM;
 895               return -1;
 896             }
 897         }
 898       memcpy (result, src, srclen);
 899       *resultp = result;
 900       *lengthp = srclen;
 901       return 0;
 902     }
 903   else
 904     {
 905 #if HAVE_ICONV
 906       iconv_t cd;
 907       iconv_t cd1;
 908       iconv_t cd2;
 909       char *result;
 910       size_t length;
 911       int retval;
 912
 913       /* Avoid glibc-2.1 bug with EUC-KR.  */
 914 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 915       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
 916           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
 917         {
 918           errno = EINVAL;
 919           return -1;
 920         }
 921 # endif
 922
 923       cd = iconv_open (to_codeset, from_codeset);
 924       if (cd == (iconv_t)(-1))
 925         return -1;
 926
 927       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 928         cd1 = (iconv_t)(-1);
 929       else
 930         {
 931           cd1 = iconv_open ("UTF-8", from_codeset);
 932           if (cd1 == (iconv_t)(-1))
 933             {
 934               int saved_errno = errno;
 935               iconv_close (cd);
 936               errno = saved_errno;
 937               return -1;
 938             }
 939         }
 940
 941       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 942         cd2 = (iconv_t)(-1);
 943       else
 944         {
 945           cd2 = iconv_open (to_codeset, "UTF-8");
 946           if (cd2 == (iconv_t)(-1))
 947             {
 948               int saved_errno = errno;
 949               if (cd1 != (iconv_t)(-1))
 950                 iconv_close (cd1);
 951               iconv_close (cd);
 952               errno = saved_errno;
 953               return -1;
 954             }
 955         }
 956
 957       result = *resultp;
 958       length = *lengthp;
 959       retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
 960                                &result, &length);
 961
 962       if (retval < 0)
 963         {
 964           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
 965           int saved_errno = errno;
 966           if (cd2 != (iconv_t)(-1))
 967             iconv_close (cd2);
 968           if (cd1 != (iconv_t)(-1))
 969             iconv_close (cd1);
 970           iconv_close (cd);
 971           errno = saved_errno;
 972         }
 973       else
 974         {
 975           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
 976             {
 977               /* Return -1, but free the allocated memory, and while doing
 978                  that, preserve the errno from iconv_close.  */
 979               int saved_errno = errno;
 980               if (cd1 != (iconv_t)(-1))
 981                 iconv_close (cd1);
 982               iconv_close (cd);
 983               if (result != *resultp && result != NULL)
 984                 free (result);
 985               errno = saved_errno;
 986               return -1;
 987             }
 988           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
 989             {
 990               /* Return -1, but free the allocated memory, and while doing
 991                  that, preserve the errno from iconv_close.  */
 992               int saved_errno = errno;
 993               iconv_close (cd);
 994               if (result != *resultp && result != NULL)
 995                 free (result);
 996               errno = saved_errno;
 997               return -1;
 998             }
 999           if (iconv_close (cd) < 0)
1000             {
1001               /* Return -1, but free the allocated memory, and while doing
1002                  that, preserve the errno from iconv_close.  */
1003               int saved_errno = errno;
1004               if (result != *resultp && result != NULL)
1005                 free (result);
1006               errno = saved_errno;
1007               return -1;
1008             }
1009           *resultp = result;
1010           *lengthp = length;
1011         }
1012       return retval;
1013 #else
1014       /* This is a different error code than if iconv_open existed but didn't
1015          support from_codeset and to_codeset, so that the caller can emit
1016          an error message such as
1017            "iconv() is not supported. Installing GNU libiconv and
1018             then reinstalling this package would fix this."  */
1019       errno = ENOSYS;
1020       return -1;
1021 #endif
1022     }
1023 }
1024
1025 char *
1026 str_iconveh (const char *src,
1027              const char *from_codeset, const char *to_codeset,
1028              enum iconv_ilseq_handler handler)
1029 {
1030   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
1031     {
1032       char *result = strdup (src);
1033
1034       if (result == NULL)
1035         errno = ENOMEM;
1036       return result;
1037     }
1038   else
1039     {
1040 #if HAVE_ICONV
1041       iconv_t cd;
1042       iconv_t cd1;
1043       iconv_t cd2;
1044       char *result;
1045
1046       /* Avoid glibc-2.1 bug with EUC-KR.  */
1047 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
1048       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
1049           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
1050         {
1051           errno = EINVAL;
1052           return NULL;
1053         }
1054 # endif
1055
1056       cd = iconv_open (to_codeset, from_codeset);
1057       if (cd == (iconv_t)(-1))
1058         return NULL;
1059
1060       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1061         cd1 = (iconv_t)(-1);
1062       else
1063         {
1064           cd1 = iconv_open ("UTF-8", from_codeset);
1065           if (cd1 == (iconv_t)(-1))
1066             {
1067               int saved_errno = errno;
1068               iconv_close (cd);
1069               errno = saved_errno;
1070               return NULL;
1071             }
1072         }
1073
1074       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1075         cd2 = (iconv_t)(-1);
1076       else
1077         {
1078           cd2 = iconv_open (to_codeset, "UTF-8");
1079           if (cd2 == (iconv_t)(-1))
1080             {
1081               int saved_errno = errno;
1082               if (cd1 != (iconv_t)(-1))
1083                 iconv_close (cd1);
1084               iconv_close (cd);
1085               errno = saved_errno;
1086               return NULL;
1087             }
1088         }
1089
1090       result = str_cd_iconveh (src, cd, cd1, cd2, handler);
1091
1092       if (result == NULL)
1093         {
1094           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
1095           int saved_errno = errno;
1096           if (cd2 != (iconv_t)(-1))
1097             iconv_close (cd2);
1098           if (cd1 != (iconv_t)(-1))
1099             iconv_close (cd1);
1100           iconv_close (cd);
1101           errno = saved_errno;
1102         }
1103       else
1104         {
1105           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
1106             {
1107               /* Return NULL, but free the allocated memory, and while doing
1108                  that, preserve the errno from iconv_close.  */
1109               int saved_errno = errno;
1110               if (cd1 != (iconv_t)(-1))
1111                 iconv_close (cd1);
1112               iconv_close (cd);
1113               free (result);
1114               errno = saved_errno;
1115               return NULL;
1116             }
1117           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
1118             {
1119               /* Return NULL, but free the allocated memory, and while doing
1120                  that, preserve the errno from iconv_close.  */
1121               int saved_errno = errno;
1122               iconv_close (cd);
1123               free (result);
1124               errno = saved_errno;
1125               return NULL;
1126             }
1127           if (iconv_close (cd) < 0)
1128             {
1129               /* Return NULL, but free the allocated memory, and while doing
1130                  that, preserve the errno from iconv_close.  */
1131               int saved_errno = errno;
1132               free (result);
1133               errno = saved_errno;
1134               return NULL;
1135             }
1136         }
1137       return result;
1138 #else
1139       /* This is a different error code than if iconv_open existed but didn't
1140          support from_codeset and to_codeset, so that the caller can emit
1141          an error message such as
1142            "iconv() is not supported. Installing GNU libiconv and
1143             then reinstalling this package would fix this."  */
1144       errno = ENOSYS;
1145       return NULL;
1146 #endif
1147     }
1148 }