lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2007 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "striconveh.h"
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #if HAVE_ICONV
  30 # include <iconv.h>
  31 # include "utf8-ucs4.h"
  32 # include "ucs4-utf8.h"
  33 # include "unistr.h"
  34 #endif
  35
  36 #include "c-strcase.h"
  37 #include "c-strcaseeq.h"
  38
  39 #ifndef SIZE_MAX
  40 # define SIZE_MAX ((size_t) -1)
  41 #endif
  42
  43
  44 #if HAVE_ICONV
  45
  46 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
  47    error occurs, we may have to determine the Unicode representation of the
  48    inconvertible character.  */
  49
  50 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
  51    a conversion error, and it returns in *INCREMENTED a boolean telling whether
  52    it has incremented the input pointers past the error location.  */
  53 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
  54 /* Irix iconv() inserts a NUL byte if it cannot convert.
  55    NetBSD iconv() inserts a question mark if it cannot convert.
  56    Only GNU libiconv and GNU libc are known to prefer to fail rather
  57    than doing a lossy conversion.  */
  58 static size_t
  59 iconv_carefully (iconv_t cd,
  60                  const char **inbuf, size_t *inbytesleft,
  61                  char **outbuf, size_t *outbytesleft,
  62                  bool *incremented)
  63 {
  64   const char *inptr = *inbuf;
  65   const char *inptr_end = inptr + *inbytesleft;
  66   char *outptr = *outbuf;
  67   size_t outsize = *outbytesleft;
  68   const char *inptr_before;
  69   size_t res;
  70
  71   do
  72     {
  73       size_t insize;
  74
  75       inptr_before = inptr;
  76       res = (size_t)(-1);
  77
  78       for (insize = 1; inptr + insize <= inptr_end; insize++)
  79         {
  80           res = iconv (cd,
  81                        (ICONV_CONST char **) &inptr, &insize,
  82                        &outptr, &outsize);
  83           if (!(res == (size_t)(-1) && errno == EINVAL))
  84             break;
  85           /* We expect that no input bytes have been consumed so far.  */
  86           if (inptr != inptr_before)
  87             abort ();
  88         }
  89
  90       if (res == 0)
  91         {
  92           *outbuf = outptr;
  93           *outbytesleft = outsize;
  94         }
  95     }
  96   while (res == 0 && inptr < inptr_end);
  97
  98   *inbuf = inptr;
  99   *inbytesleft = inptr_end - inptr;
 100   if (res != (size_t)(-1) && res > 0)
 101     {
 102       /* iconv() has already incremented INPTR.  We cannot go back to a
 103          previous INPTR, otherwise the state inside CD would become invalid,
 104          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 105          *INBUF has already been incremented.  */
 106       *incremented = (inptr > inptr_before);
 107       errno = EILSEQ;
 108       return (size_t)(-1);
 109     }
 110   else
 111     {
 112       *incremented = false;
 113       return res;
 114     }
 115 }
 116 # else
 117 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 118      (*(incremented) = false, \
 119       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 120 # endif
 121
 122 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
 123    converting one character.  */
 124 static size_t
 125 iconv_carefully_1 (iconv_t cd,
 126                    const char **inbuf, size_t *inbytesleft,
 127                    char **outbuf, size_t *outbytesleft,
 128                    bool *incremented)
 129 {
 130   const char *inptr = *inbuf;
 131   const char *inptr_end = inptr + *inbytesleft;
 132   char *outptr = *outbuf;
 133   size_t outsize = *outbytesleft;
 134   const char *inptr_before = inptr;
 135   size_t res = (size_t)(-1);
 136   size_t insize;
 137
 138   for (insize = 1; inptr + insize <= inptr_end; insize++)
 139     {
 140       res = iconv (cd,
 141                    (ICONV_CONST char **) &inptr, &insize,
 142                    &outptr, &outsize);
 143       if (!(res == (size_t)(-1) && errno == EINVAL))
 144         break;
 145       /* We expect that no input bytes have been consumed so far.  */
 146       if (inptr != inptr_before)
 147         abort ();
 148     }
 149
 150   *inbuf = inptr;
 151   *inbytesleft = inptr_end - inptr;
 152 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 153   /* Irix iconv() inserts a NUL byte if it cannot convert.
 154      NetBSD iconv() inserts a question mark if it cannot convert.
 155      Only GNU libiconv and GNU libc are known to prefer to fail rather
 156      than doing a lossy conversion.  */
 157   if (res != (size_t)(-1) && res > 0)
 158     {
 159       /* iconv() has already incremented INPTR.  We cannot go back to a
 160          previous INPTR, otherwise the state inside CD would become invalid,
 161          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 162          *INBUF has already been incremented.  */
 163       *incremented = (inptr > inptr_before);
 164       errno = EILSEQ;
 165       return (size_t)(-1);
 166     }
 167 # endif
 168
 169   if (res != (size_t)(-1))
 170     {
 171       *outbuf = outptr;
 172       *outbytesleft = outsize;
 173     }
 174   *incremented = false;
 175   return res;
 176 }
 177
 178 static int
 179 mem_cd_iconveh_internal (const char *src, size_t srclen,
 180                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 181                          enum iconv_ilseq_handler handler,
 182                          size_t extra_alloc,
 183                          size_t *offsets,
 184                          char **resultp, size_t *lengthp)
 185 {
 186   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 187      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 188      Instead, we have to start afresh from the beginning of SRC.  */
 189   /* Use a temporary buffer, so that for small strings, a single malloc()
 190      call will be sufficient.  */
 191 # define tmpbufsize 4096
 192   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 193      libiconv's UCS-4-INTERNAL encoding.  */
 194   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 195 # define tmpbuf tmp.buf
 196
 197   char *initial_result;
 198   char *result;
 199   size_t allocated;
 200   size_t length;
 201   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
 202
 203   if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
 204     {
 205       initial_result = *resultp;
 206       allocated = *lengthp;
 207     }
 208   else
 209     {
 210       initial_result = tmpbuf;
 211       allocated = sizeof (tmpbuf);
 212     }
 213   result = initial_result;
 214
 215   if (offsets != NULL)
 216     {
 217       size_t i;
 218
 219       for (i = 0; i < srclen; i++)
 220         offsets[i] = (size_t)(-1);
 221
 222       last_length = (size_t)(-1);
 223     }
 224   length = 0;
 225
 226   /* First, try a direct conversion, and see whether a conversion error
 227      occurs at all.  */
 228   {
 229     const char *inptr = src;
 230     size_t insize = srclen;
 231
 232     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 233 # if defined _LIBICONV_VERSION \
 234      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 235     /* Set to the initial state.  */
 236     iconv (cd, NULL, NULL, NULL, NULL);
 237 # endif
 238
 239     while (insize > 0)
 240       {
 241         char *outptr = result + length;
 242         size_t outsize = allocated - extra_alloc - length;
 243         bool incremented;
 244         size_t res;
 245         bool grow;
 246
 247         if (offsets != NULL)
 248           {
 249             if (length != last_length) /* ensure that offset[] be increasing */
 250               {
 251                 offsets[inptr - src] = length;
 252                 last_length = length;
 253               }
 254             res = iconv_carefully_1 (cd,
 255                                      &inptr, &insize,
 256                                      &outptr, &outsize,
 257                                      &incremented);
 258           }
 259         else
 260           /* Use iconv_carefully instead of iconv here, because:
 261              - If TO_CODESET is UTF-8, we can do the error handling in this
 262                loop, no need for a second loop,
 263              - With iconv() implementations other than GNU libiconv and GNU
 264                libc, if we use iconv() in a big swoop, checking for an E2BIG
 265                return, we lose the number of irreversible conversions.  */
 266           res = iconv_carefully (cd,
 267                                  &inptr, &insize,
 268                                  &outptr, &outsize,
 269                                  &incremented);
 270
 271         length = outptr - result;
 272         grow = (length + extra_alloc > allocated / 2);
 273         if (res == (size_t)(-1))
 274           {
 275             if (errno == E2BIG)
 276               grow = true;
 277             else if (errno == EINVAL)
 278               break;
 279             else if (errno == EILSEQ && handler != iconveh_error)
 280               {
 281                 if (cd2 == (iconv_t)(-1))
 282                   {
 283                     /* TO_CODESET is UTF-8.  */
 284                     /* Error handling can produce up to 1 byte of output.  */
 285                     if (length + 1 + extra_alloc > allocated)
 286                       {
 287                         char *memory;
 288
 289                         allocated = 2 * allocated;
 290                         if (length + 1 + extra_alloc > allocated)
 291                           abort ();
 292                         if (result == initial_result)
 293                           memory = (char *) malloc (allocated);
 294                         else
 295                           memory = (char *) realloc (result, allocated);
 296                         if (memory == NULL)
 297                           {
 298                             if (result != initial_result)
 299                               free (result);
 300                             errno = ENOMEM;
 301                             return -1;
 302                           }
 303                         if (result == initial_result)
 304                           memcpy (memory, initial_result, length);
 305                         result = memory;
 306                         grow = false;
 307                       }
 308                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 309                        and emit a question mark.  */
 310                     if (!incremented)
 311                       {
 312                         if (insize == 0)
 313                           abort ();
 314                         inptr++;
 315                         insize--;
 316                       }
 317                     result[length] = '?';
 318                     length++;
 319                   }
 320                 else
 321                   goto indirectly;
 322               }
 323             else
 324               {
 325                 if (result != initial_result)
 326                   {
 327                     int saved_errno = errno;
 328                     free (result);
 329                     errno = saved_errno;
 330                   }
 331                 return -1;
 332               }
 333           }
 334         if (insize == 0)
 335           break;
 336         if (grow)
 337           {
 338             char *memory;
 339
 340             allocated = 2 * allocated;
 341             if (result == initial_result)
 342               memory = (char *) malloc (allocated);
 343             else
 344               memory = (char *) realloc (result, allocated);
 345             if (memory == NULL)
 346               {
 347                 if (result != initial_result)
 348                   free (result);
 349                 errno = ENOMEM;
 350                 return -1;
 351               }
 352             if (result == initial_result)
 353               memcpy (memory, initial_result, length);
 354             result = memory;
 355           }
 356       }
 357   }
 358
 359   /* Now get the conversion state back to the initial state.
 360      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 361 #if defined _LIBICONV_VERSION \
 362     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 363   for (;;)
 364     {
 365       char *outptr = result + length;
 366       size_t outsize = allocated - extra_alloc - length;
 367       size_t res;
 368
 369       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 370       length = outptr - result;
 371       if (res == (size_t)(-1))
 372         {
 373           if (errno == E2BIG)
 374             {
 375               char *memory;
 376
 377               allocated = 2 * allocated;
 378               if (result == initial_result)
 379                 memory = (char *) malloc (allocated);
 380               else
 381                 memory = (char *) realloc (result, allocated);
 382               if (memory == NULL)
 383                 {
 384                   if (result != initial_result)
 385                     free (result);
 386                   errno = ENOMEM;
 387                   return -1;
 388                 }
 389               if (result == initial_result)
 390                 memcpy (memory, initial_result, length);
 391               result = memory;
 392             }
 393           else
 394             {
 395               if (result != initial_result)
 396                 {
 397                   int saved_errno = errno;
 398                   free (result);
 399                   errno = saved_errno;
 400                 }
 401               return -1;
 402             }
 403         }
 404       else
 405         break;
 406     }
 407 #endif
 408
 409   /* The direct conversion succeeded.  */
 410   goto done;
 411
 412  indirectly:
 413   /* The direct conversion failed, handler != iconveh_error,
 414      and cd2 != (iconv_t)(-1).
 415      Use a conversion through UTF-8.  */
 416   if (offsets != NULL)
 417     {
 418       size_t i;
 419
 420       for (i = 0; i < srclen; i++)
 421         offsets[i] = (size_t)(-1);
 422
 423       last_length = (size_t)(-1);
 424     }
 425   length = 0;
 426   {
 427 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 428     char utf8buf[utf8bufsize + 1];
 429     size_t utf8len = 0;
 430     const char *in1ptr = src;
 431     size_t in1size = srclen;
 432     bool do_final_flush1 = true;
 433     bool do_final_flush2 = true;
 434
 435     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 436 # if defined _LIBICONV_VERSION \
 437      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 438     /* Set to the initial state.  */
 439     if (cd1 != (iconv_t)(-1))
 440       iconv (cd1, NULL, NULL, NULL, NULL);
 441     iconv (cd2, NULL, NULL, NULL, NULL);
 442 # endif
 443
 444     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 445       {
 446         char *out1ptr = utf8buf + utf8len;
 447         size_t out1size = utf8bufsize - utf8len;
 448         bool incremented1;
 449         size_t res1;
 450         int errno1;
 451
 452         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 453         if (in1size > 0)
 454           {
 455             if (offsets != NULL
 456                 && length != last_length) /* ensure that offset[] be increasing */
 457               {
 458                 offsets[in1ptr - src] = length;
 459                 last_length = length;
 460               }
 461             if (cd1 != (iconv_t)(-1))
 462               {
 463                 if (offsets != NULL)
 464                   res1 = iconv_carefully_1 (cd1,
 465                                             &in1ptr, &in1size,
 466                                             &out1ptr, &out1size,
 467                                             &incremented1);
 468                 else
 469                   res1 = iconv_carefully (cd1,
 470                                           &in1ptr, &in1size,
 471                                           &out1ptr, &out1size,
 472                                           &incremented1);
 473               }
 474             else
 475               {
 476                 /* FROM_CODESET is UTF-8.  */
 477                 res1 = 0;
 478                 do
 479                   {
 480                     ucs4_t uc;
 481                     int n;
 482                     int m;
 483
 484                     n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
 485                     if (uc == 0xfffd
 486                         && !(n >= 3
 487                              && (uint8_t)in1ptr[0] == 0xEF
 488                              && (uint8_t)in1ptr[1] == 0xBF
 489                              && (uint8_t)in1ptr[2] == 0xBD))
 490                       {
 491                         in1ptr += n;
 492                         in1size -= n;
 493                         errno = EILSEQ;
 494                         res1 = (size_t)(-1);
 495                         incremented1 = true;
 496                         break;
 497                       }
 498                     if (out1size == 0)
 499                       {
 500                         errno = E2BIG;
 501                         res1 = (size_t)(-1);
 502                         incremented1 = false;
 503                         break;
 504                       }
 505                     m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size);
 506                     if (m == -2)
 507                       {
 508                         errno = E2BIG;
 509                         res1 = (size_t)(-1);
 510                         incremented1 = false;
 511                         break;
 512                       }
 513                     in1ptr += n;
 514                     in1size -= n;
 515                     if (m == -1)
 516                       {
 517                         errno = EILSEQ;
 518                         res1 = (size_t)(-1);
 519                         incremented1 = true;
 520                         break;
 521                       }
 522                     out1ptr += m;
 523                     out1size -= m;
 524                   }
 525                 while (offsets == NULL && in1size > 0);
 526               }
 527           }
 528         else if (do_final_flush1)
 529           {
 530             /* Now get the conversion state of CD1 back to the initial state.
 531                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 532 # if defined _LIBICONV_VERSION \
 533      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 534             if (cd1 != (iconv_t)(-1))
 535               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 536             else
 537 # endif
 538               res1 = 0;
 539             do_final_flush1 = false;
 540             incremented1 = true;
 541           }
 542         else
 543           {
 544             res1 = 0;
 545             incremented1 = true;
 546           }
 547         if (res1 == (size_t)(-1)
 548             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 549           {
 550             if (result != initial_result)
 551               {
 552                 int saved_errno = errno;
 553                 free (result);
 554                 errno = saved_errno;
 555               }
 556             return -1;
 557           }
 558         if (res1 == (size_t)(-1)
 559             && errno == EILSEQ && handler != iconveh_error)
 560           {
 561             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 562                emit a question mark.  Room for the question mark was allocated
 563                at the end of utf8buf.  */
 564             if (!incremented1)
 565               {
 566                 if (in1size == 0)
 567                   abort ();
 568                 in1ptr++;
 569                 in1size--;
 570               }
 571             utf8buf[utf8len++] = '?';
 572           }
 573         errno1 = errno;
 574         utf8len = out1ptr - utf8buf;
 575
 576         if (offsets != NULL
 577             || in1size == 0
 578             || utf8len > utf8bufsize / 2
 579             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 580           {
 581             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 582             const char *in2ptr = utf8buf;
 583             size_t in2size = utf8len;
 584
 585             while (in2size > 0
 586                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 587               {
 588                 char *out2ptr = result + length;
 589                 size_t out2size = allocated - extra_alloc - length;
 590                 bool incremented2;
 591                 size_t res2;
 592                 bool grow;
 593
 594                 if (in2size > 0)
 595                   res2 = iconv_carefully (cd2,
 596                                           &in2ptr, &in2size,
 597                                           &out2ptr, &out2size,
 598                                           &incremented2);
 599                 else /* in1size == 0 && !do_final_flush1
 600                         && in2size == 0 && do_final_flush2 */
 601                   {
 602                     /* Now get the conversion state of CD1 back to the initial
 603                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 604 # if defined _LIBICONV_VERSION \
 605      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 606                     res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 607 # else
 608                     res2 = 0;
 609 # endif
 610                     do_final_flush2 = false;
 611                     incremented2 = true;
 612                   }
 613
 614                 length = out2ptr - result;
 615                 grow = (length + extra_alloc > allocated / 2);
 616                 if (res2 == (size_t)(-1))
 617                   {
 618                     if (errno == E2BIG)
 619                       grow = true;
 620                     else if (errno == EINVAL)
 621                       break;
 622                     else if (errno == EILSEQ && handler != iconveh_error)
 623                       {
 624                         /* Error handling can produce up to 10 bytes of ASCII
 625                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 626                            UCS-4, so use CD2 here as well.  */
 627                         char scratchbuf[10];
 628                         size_t scratchlen;
 629                         ucs4_t uc;
 630                         const char *inptr;
 631                         size_t insize;
 632                         size_t res;
 633
 634                         if (incremented2)
 635                           {
 636                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 637                                          (const uint8_t *) utf8buf)
 638                                 == NULL)
 639                               abort ();
 640                           }
 641                         else
 642                           {
 643                             int n;
 644                             if (in2size == 0)
 645                               abort ();
 646                             n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
 647                                                   in2size);
 648                             in2ptr += n;
 649                             in2size -= n;
 650                           }
 651
 652                         if (handler == iconveh_escape_sequence)
 653                           {
 654                             static char hex[16] = "0123456789ABCDEF";
 655                             scratchlen = 0;
 656                             scratchbuf[scratchlen++] = '\\';
 657                             if (uc < 0x10000)
 658                               scratchbuf[scratchlen++] = 'u';
 659                             else
 660                               {
 661                                 scratchbuf[scratchlen++] = 'U';
 662                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 663                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 664                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 665                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 666                               }
 667                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 668                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 669                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 670                             scratchbuf[scratchlen++] = hex[uc & 15];
 671                           }
 672                         else
 673                           {
 674                             scratchbuf[0] = '?';
 675                             scratchlen = 1;
 676                           }
 677
 678                         inptr = scratchbuf;
 679                         insize = scratchlen;
 680                         res = iconv (cd2,
 681                                      (ICONV_CONST char **) &inptr, &insize,
 682                                      &out2ptr, &out2size);
 683                         length = out2ptr - result;
 684                         if (res == (size_t)(-1) && errno == E2BIG)
 685                           {
 686                             char *memory;
 687
 688                             allocated = 2 * allocated;
 689                             if (length + 1 + extra_alloc > allocated)
 690                               abort ();
 691                             if (result == initial_result)
 692                               memory = (char *) malloc (allocated);
 693                             else
 694                               memory = (char *) realloc (result, allocated);
 695                             if (memory == NULL)
 696                               {
 697                                 if (result != initial_result)
 698                                   free (result);
 699                                 errno = ENOMEM;
 700                                 return -1;
 701                               }
 702                             if (result == initial_result)
 703                               memcpy (memory, initial_result, length);
 704                             result = memory;
 705                             grow = false;
 706
 707                             out2ptr = result + length;
 708                             out2size = allocated - extra_alloc - length;
 709                             res = iconv (cd2,
 710                                          (ICONV_CONST char **) &inptr, &insize,
 711                                          &out2ptr, &out2size);
 712                             length = out2ptr - result;
 713                           }
 714 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 715                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 716                            NetBSD iconv() inserts a question mark if it cannot
 717                            convert.
 718                            Only GNU libiconv and GNU libc are known to prefer
 719                            to fail rather than doing a lossy conversion.  */
 720                         if (res != (size_t)(-1) && res > 0)
 721                           {
 722                             errno = EILSEQ;
 723                             res = (size_t)(-1);
 724                           }
 725 # endif
 726                         if (res == (size_t)(-1))
 727                           {
 728                             /* Failure converting the ASCII replacement.  */
 729                             if (result != initial_result)
 730                               {
 731                                 int saved_errno = errno;
 732                                 free (result);
 733                                 errno = saved_errno;
 734                               }
 735                             return -1;
 736                           }
 737                       }
 738                     else
 739                       {
 740                         if (result != initial_result)
 741                           {
 742                             int saved_errno = errno;
 743                             free (result);
 744                             errno = saved_errno;
 745                           }
 746                         return -1;
 747                       }
 748                   }
 749                 if (!(in2size > 0
 750                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 751                   break;
 752                 if (grow)
 753                   {
 754                     char *memory;
 755
 756                     allocated = 2 * allocated;
 757                     if (result == initial_result)
 758                       memory = (char *) malloc (allocated);
 759                     else
 760                       memory = (char *) realloc (result, allocated);
 761                     if (memory == NULL)
 762                       {
 763                         if (result != initial_result)
 764                           free (result);
 765                         errno = ENOMEM;
 766                         return -1;
 767                       }
 768                     if (result == initial_result)
 769                       memcpy (memory, initial_result, length);
 770                     result = memory;
 771                   }
 772               }
 773
 774             /* Move the remaining bytes to the beginning of utf8buf.  */
 775             if (in2size > 0)
 776               memmove (utf8buf, in2ptr, in2size);
 777             utf8len = in2size;
 778           }
 779
 780         if (res1 == (size_t)(-1))
 781           {
 782             if (errno1 == EINVAL)
 783               in1size = 0;
 784             else if (errno1 == EILSEQ)
 785               {
 786                 if (result != initial_result)
 787                   free (result);
 788                 errno = errno1;
 789                 return -1;
 790               }
 791           }
 792       }
 793 # undef utf8bufsize
 794   }
 795
 796  done:
 797   /* Now the final memory allocation.  */
 798   if (result == tmpbuf)
 799     {
 800       char *memory;
 801
 802       memory = (char *) malloc (length + extra_alloc);
 803       if (memory != NULL)
 804         {
 805           memcpy (memory, tmpbuf, length);
 806           result = memory;
 807         }
 808       else
 809         {
 810           errno = ENOMEM;
 811           return -1;
 812         }
 813     }
 814   else if (result != *resultp && length + extra_alloc < allocated)
 815     {
 816       /* Shrink the allocated memory if possible.  */
 817       char *memory;
 818
 819       memory = (char *) realloc (result, length + extra_alloc);
 820       if (memory != NULL)
 821         result = memory;
 822     }
 823   *resultp = result;
 824   *lengthp = length;
 825   return 0;
 826 # undef tmpbuf
 827 # undef tmpbufsize
 828 }
 829
 830 int
 831 mem_cd_iconveh (const char *src, size_t srclen,
 832                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 833                 enum iconv_ilseq_handler handler,
 834                 size_t *offsets,
 835                 char **resultp, size_t *lengthp)
 836 {
 837   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
 838                                   offsets, resultp, lengthp);
 839 }
 840
 841 char *
 842 str_cd_iconveh (const char *src,
 843                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 844                 enum iconv_ilseq_handler handler)
 845 {
 846   /* For most encodings, a trailing NUL byte in the input will be converted
 847      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
 848      function is usable for UTF-7, we have to exclude the NUL byte from the
 849      conversion and add it by hand afterwards.  */
 850   char *result = NULL;
 851   size_t length = 0;
 852   int retval = mem_cd_iconveh_internal (src, strlen (src),
 853                                         cd, cd1, cd2, handler, 1, NULL,
 854                                         &result, &length);
 855
 856   if (retval < 0)
 857     {
 858       if (result != NULL)
 859         {
 860           int saved_errno = errno;
 861           free (result);
 862           errno = saved_errno;
 863         }
 864       return NULL;
 865     }
 866
 867   /* Add the terminating NUL byte.  */
 868   result[length] = '\0';
 869
 870   return result;
 871 }
 872
 873 #endif
 874
 875 int
 876 mem_iconveh (const char *src, size_t srclen,
 877              const char *from_codeset, const char *to_codeset,
 878              enum iconv_ilseq_handler handler,
 879              size_t *offsets,
 880              char **resultp, size_t *lengthp)
 881 {
 882   if (srclen == 0)
 883     {
 884       /* Nothing to convert.  */
 885       *lengthp = 0;
 886       return 0;
 887     }
 888   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
 889     {
 890       char *result;
 891
 892       if (*resultp != NULL && *lengthp >= srclen)
 893         result = *resultp;
 894       else
 895         {
 896           result = (char *) malloc (srclen);
 897           if (result == NULL)
 898             {
 899               errno = ENOMEM;
 900               return -1;
 901             }
 902         }
 903       memcpy (result, src, srclen);
 904       *resultp = result;
 905       *lengthp = srclen;
 906       return 0;
 907     }
 908   else
 909     {
 910 #if HAVE_ICONV
 911       iconv_t cd;
 912       iconv_t cd1;
 913       iconv_t cd2;
 914       char *result;
 915       size_t length;
 916       int retval;
 917
 918       /* Avoid glibc-2.1 bug with EUC-KR.  */
 919 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 920       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
 921           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
 922         {
 923           errno = EINVAL;
 924           return -1;
 925         }
 926 # endif
 927
 928       cd = iconv_open (to_codeset, from_codeset);
 929       if (cd == (iconv_t)(-1))
 930         return -1;
 931
 932       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 933         cd1 = (iconv_t)(-1);
 934       else
 935         {
 936           cd1 = iconv_open ("UTF-8", from_codeset);
 937           if (cd1 == (iconv_t)(-1))
 938             {
 939               int saved_errno = errno;
 940               iconv_close (cd);
 941               errno = saved_errno;
 942               return -1;
 943             }
 944         }
 945
 946       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 947         cd2 = (iconv_t)(-1);
 948       else
 949         {
 950           cd2 = iconv_open (to_codeset, "UTF-8");
 951           if (cd2 == (iconv_t)(-1))
 952             {
 953               int saved_errno = errno;
 954               if (cd1 != (iconv_t)(-1))
 955                 iconv_close (cd1);
 956               iconv_close (cd);
 957               errno = saved_errno;
 958               return -1;
 959             }
 960         }
 961
 962       result = *resultp;
 963       length = *lengthp;
 964       retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
 965                                &result, &length);
 966
 967       if (retval < 0)
 968         {
 969           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
 970           int saved_errno = errno;
 971           if (cd2 != (iconv_t)(-1))
 972             iconv_close (cd2);
 973           if (cd1 != (iconv_t)(-1))
 974             iconv_close (cd1);
 975           iconv_close (cd);
 976           errno = saved_errno;
 977         }
 978       else
 979         {
 980           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
 981             {
 982               /* Return -1, but free the allocated memory, and while doing
 983                  that, preserve the errno from iconv_close.  */
 984               int saved_errno = errno;
 985               if (cd1 != (iconv_t)(-1))
 986                 iconv_close (cd1);
 987               iconv_close (cd);
 988               if (result != *resultp && result != NULL)
 989                 free (result);
 990               errno = saved_errno;
 991               return -1;
 992             }
 993           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
 994             {
 995               /* Return -1, but free the allocated memory, and while doing
 996                  that, preserve the errno from iconv_close.  */
 997               int saved_errno = errno;
 998               iconv_close (cd);
 999               if (result != *resultp && result != NULL)
1000                 free (result);
1001               errno = saved_errno;
1002               return -1;
1003             }
1004           if (iconv_close (cd) < 0)
1005             {
1006               /* Return -1, but free the allocated memory, and while doing
1007                  that, preserve the errno from iconv_close.  */
1008               int saved_errno = errno;
1009               if (result != *resultp && result != NULL)
1010                 free (result);
1011               errno = saved_errno;
1012               return -1;
1013             }
1014           *resultp = result;
1015           *lengthp = length;
1016         }
1017       return retval;
1018 #else
1019       /* This is a different error code than if iconv_open existed but didn't
1020          support from_codeset and to_codeset, so that the caller can emit
1021          an error message such as
1022            "iconv() is not supported. Installing GNU libiconv and
1023             then reinstalling this package would fix this."  */
1024       errno = ENOSYS;
1025       return -1;
1026 #endif
1027     }
1028 }
1029
1030 char *
1031 str_iconveh (const char *src,
1032              const char *from_codeset, const char *to_codeset,
1033              enum iconv_ilseq_handler handler)
1034 {
1035   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
1036     {
1037       char *result = strdup (src);
1038
1039       if (result == NULL)
1040         errno = ENOMEM;
1041       return result;
1042     }
1043   else
1044     {
1045 #if HAVE_ICONV
1046       iconv_t cd;
1047       iconv_t cd1;
1048       iconv_t cd2;
1049       char *result;
1050
1051       /* Avoid glibc-2.1 bug with EUC-KR.  */
1052 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
1053       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
1054           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
1055         {
1056           errno = EINVAL;
1057           return NULL;
1058         }
1059 # endif
1060
1061       cd = iconv_open (to_codeset, from_codeset);
1062       if (cd == (iconv_t)(-1))
1063         return NULL;
1064
1065       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1066         cd1 = (iconv_t)(-1);
1067       else
1068         {
1069           cd1 = iconv_open ("UTF-8", from_codeset);
1070           if (cd1 == (iconv_t)(-1))
1071             {
1072               int saved_errno = errno;
1073               iconv_close (cd);
1074               errno = saved_errno;
1075               return NULL;
1076             }
1077         }
1078
1079       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1080         cd2 = (iconv_t)(-1);
1081       else
1082         {
1083           cd2 = iconv_open (to_codeset, "UTF-8");
1084           if (cd2 == (iconv_t)(-1))
1085             {
1086               int saved_errno = errno;
1087               if (cd1 != (iconv_t)(-1))
1088                 iconv_close (cd1);
1089               iconv_close (cd);
1090               errno = saved_errno;
1091               return NULL;
1092             }
1093         }
1094
1095       result = str_cd_iconveh (src, cd, cd1, cd2, handler);
1096
1097       if (result == NULL)
1098         {
1099           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
1100           int saved_errno = errno;
1101           if (cd2 != (iconv_t)(-1))
1102             iconv_close (cd2);
1103           if (cd1 != (iconv_t)(-1))
1104             iconv_close (cd1);
1105           iconv_close (cd);
1106           errno = saved_errno;
1107         }
1108       else
1109         {
1110           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
1111             {
1112               /* Return NULL, but free the allocated memory, and while doing
1113                  that, preserve the errno from iconv_close.  */
1114               int saved_errno = errno;
1115               if (cd1 != (iconv_t)(-1))
1116                 iconv_close (cd1);
1117               iconv_close (cd);
1118               free (result);
1119               errno = saved_errno;
1120               return NULL;
1121             }
1122           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
1123             {
1124               /* Return NULL, but free the allocated memory, and while doing
1125                  that, preserve the errno from iconv_close.  */
1126               int saved_errno = errno;
1127               iconv_close (cd);
1128               free (result);
1129               errno = saved_errno;
1130               return NULL;
1131             }
1132           if (iconv_close (cd) < 0)
1133             {
1134               /* Return NULL, but free the allocated memory, and while doing
1135                  that, preserve the errno from iconv_close.  */
1136               int saved_errno = errno;
1137               free (result);
1138               errno = saved_errno;
1139               return NULL;
1140             }
1141         }
1142       return result;
1143 #else
1144       /* This is a different error code than if iconv_open existed but didn't
1145          support from_codeset and to_codeset, so that the caller can emit
1146          an error message such as
1147            "iconv() is not supported. Installing GNU libiconv and
1148             then reinstalling this package would fix this."  */
1149       errno = ENOSYS;
1150       return NULL;
1151 #endif
1152     }
1153 }