lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2007 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "striconveh.h"
  23
  24 #include <errno.h>
  25 #include <stdbool.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 #if HAVE_ICONV
  30 # include <iconv.h>
  31 # include "utf8-ucs4-safe.h"
  32 # include "ucs4-utf8.h"
  33 # include "unistr.h"
  34 #endif
  35
  36 #include "strdup.h"
  37 #include "c-strcase.h"
  38 #include "c-strcaseeq.h"
  39
  40 #ifndef SIZE_MAX
  41 # define SIZE_MAX ((size_t) -1)
  42 #endif
  43
  44
  45 #if HAVE_ICONV
  46
  47 /* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
  48    error occurs, we may have to determine the Unicode representation of the
  49    inconvertible character.  */
  50
  51 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
  52    a conversion error, and it returns in *INCREMENTED a boolean telling whether
  53    it has incremented the input pointers past the error location.  */
  54 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
  55 /* Irix iconv() inserts a NUL byte if it cannot convert.
  56    NetBSD iconv() inserts a question mark if it cannot convert.
  57    Only GNU libiconv and GNU libc are known to prefer to fail rather
  58    than doing a lossy conversion.  */
  59 static size_t
  60 iconv_carefully (iconv_t cd,
  61                  const char **inbuf, size_t *inbytesleft,
  62                  char **outbuf, size_t *outbytesleft,
  63                  bool *incremented)
  64 {
  65   const char *inptr = *inbuf;
  66   const char *inptr_end = inptr + *inbytesleft;
  67   char *outptr = *outbuf;
  68   size_t outsize = *outbytesleft;
  69   const char *inptr_before;
  70   size_t res;
  71
  72   do
  73     {
  74       size_t insize;
  75
  76       inptr_before = inptr;
  77       res = (size_t)(-1);
  78
  79       for (insize = 1; inptr + insize <= inptr_end; insize++)
  80         {
  81           res = iconv (cd,
  82                        (ICONV_CONST char **) &inptr, &insize,
  83                        &outptr, &outsize);
  84           if (!(res == (size_t)(-1) && errno == EINVAL))
  85             break;
  86           /* We expect that no input bytes have been consumed so far.  */
  87           if (inptr != inptr_before)
  88             abort ();
  89         }
  90
  91       if (res == 0)
  92         {
  93           *outbuf = outptr;
  94           *outbytesleft = outsize;
  95         }
  96     }
  97   while (res == 0 && inptr < inptr_end);
  98
  99   *inbuf = inptr;
 100   *inbytesleft = inptr_end - inptr;
 101   if (res != (size_t)(-1) && res > 0)
 102     {
 103       /* iconv() has already incremented INPTR.  We cannot go back to a
 104          previous INPTR, otherwise the state inside CD would become invalid,
 105          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 106          *INBUF has already been incremented.  */
 107       *incremented = (inptr > inptr_before);
 108       errno = EILSEQ;
 109       return (size_t)(-1);
 110     }
 111   else
 112     {
 113       *incremented = false;
 114       return res;
 115     }
 116 }
 117 # else
 118 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 119      (*(incremented) = false, \
 120       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 121 # endif
 122
 123 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
 124    converting one character.  */
 125 static size_t
 126 iconv_carefully_1 (iconv_t cd,
 127                    const char **inbuf, size_t *inbytesleft,
 128                    char **outbuf, size_t *outbytesleft,
 129                    bool *incremented)
 130 {
 131   const char *inptr = *inbuf;
 132   const char *inptr_end = inptr + *inbytesleft;
 133   char *outptr = *outbuf;
 134   size_t outsize = *outbytesleft;
 135   const char *inptr_before = inptr;
 136   size_t res = (size_t)(-1);
 137   size_t insize;
 138
 139   for (insize = 1; inptr + insize <= inptr_end; insize++)
 140     {
 141       res = iconv (cd,
 142                    (ICONV_CONST char **) &inptr, &insize,
 143                    &outptr, &outsize);
 144       if (!(res == (size_t)(-1) && errno == EINVAL))
 145         break;
 146       /* We expect that no input bytes have been consumed so far.  */
 147       if (inptr != inptr_before)
 148         abort ();
 149     }
 150
 151   *inbuf = inptr;
 152   *inbytesleft = inptr_end - inptr;
 153 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 154   /* Irix iconv() inserts a NUL byte if it cannot convert.
 155      NetBSD iconv() inserts a question mark if it cannot convert.
 156      Only GNU libiconv and GNU libc are known to prefer to fail rather
 157      than doing a lossy conversion.  */
 158   if (res != (size_t)(-1) && res > 0)
 159     {
 160       /* iconv() has already incremented INPTR.  We cannot go back to a
 161          previous INPTR, otherwise the state inside CD would become invalid,
 162          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 163          *INBUF has already been incremented.  */
 164       *incremented = (inptr > inptr_before);
 165       errno = EILSEQ;
 166       return (size_t)(-1);
 167     }
 168 # endif
 169
 170   if (res != (size_t)(-1))
 171     {
 172       *outbuf = outptr;
 173       *outbytesleft = outsize;
 174     }
 175   *incremented = false;
 176   return res;
 177 }
 178
 179 static int
 180 mem_cd_iconveh_internal (const char *src, size_t srclen,
 181                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 182                          enum iconv_ilseq_handler handler,
 183                          size_t extra_alloc,
 184                          size_t *offsets,
 185                          char **resultp, size_t *lengthp)
 186 {
 187   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 188      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 189      Instead, we have to start afresh from the beginning of SRC.  */
 190   /* Use a temporary buffer, so that for small strings, a single malloc()
 191      call will be sufficient.  */
 192 # define tmpbufsize 4096
 193   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 194      libiconv's UCS-4-INTERNAL encoding.  */
 195   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 196 # define tmpbuf tmp.buf
 197
 198   char *initial_result;
 199   char *result;
 200   size_t allocated;
 201   size_t length;
 202   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
 203
 204   if (*lengthp >= sizeof (tmpbuf))
 205     {
 206       initial_result = *resultp;
 207       allocated = *lengthp;
 208     }
 209   else
 210     {
 211       initial_result = tmpbuf;
 212       allocated = sizeof (tmpbuf);
 213     }
 214   result = initial_result;
 215
 216   if (offsets != NULL)
 217     {
 218       size_t i;
 219
 220       for (i = 0; i < srclen; i++)
 221         offsets[i] = (size_t)(-1);
 222
 223       last_length = (size_t)(-1);
 224     }
 225   length = 0;
 226
 227   /* First, try a direct conversion, and see whether a conversion error
 228      occurs at all.  */
 229   {
 230     const char *inptr = src;
 231     size_t insize = srclen;
 232
 233     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 234 # if defined _LIBICONV_VERSION \
 235      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 236     /* Set to the initial state.  */
 237     iconv (cd, NULL, NULL, NULL, NULL);
 238 # endif
 239
 240     while (insize > 0)
 241       {
 242         char *outptr = result + length;
 243         size_t outsize = allocated - extra_alloc - length;
 244         bool incremented;
 245         size_t res;
 246         bool grow;
 247
 248         if (offsets != NULL)
 249           {
 250             if (length != last_length) /* ensure that offset[] be increasing */
 251               {
 252                 offsets[inptr - src] = length;
 253                 last_length = length;
 254               }
 255             res = iconv_carefully_1 (cd,
 256                                      &inptr, &insize,
 257                                      &outptr, &outsize,
 258                                      &incremented);
 259           }
 260         else
 261           /* Use iconv_carefully instead of iconv here, because:
 262              - If TO_CODESET is UTF-8, we can do the error handling in this
 263                loop, no need for a second loop,
 264              - With iconv() implementations other than GNU libiconv and GNU
 265                libc, if we use iconv() in a big swoop, checking for an E2BIG
 266                return, we lose the number of irreversible conversions.  */
 267           res = iconv_carefully (cd,
 268                                  &inptr, &insize,
 269                                  &outptr, &outsize,
 270                                  &incremented);
 271
 272         length = outptr - result;
 273         grow = (length + extra_alloc > allocated / 2);
 274         if (res == (size_t)(-1))
 275           {
 276             if (errno == E2BIG)
 277               grow = true;
 278             else if (errno == EINVAL)
 279               break;
 280             else if (errno == EILSEQ && handler != iconveh_error)
 281               {
 282                 if (cd2 == (iconv_t)(-1))
 283                   {
 284                     /* TO_CODESET is UTF-8.  */
 285                     /* Error handling can produce up to 1 byte of output.  */
 286                     if (length + 1 + extra_alloc > allocated)
 287                       {
 288                         char *memory;
 289
 290                         allocated = 2 * allocated;
 291                         if (length + 1 + extra_alloc > allocated)
 292                           abort ();
 293                         if (result == initial_result)
 294                           memory = (char *) malloc (allocated);
 295                         else
 296                           memory = (char *) realloc (result, allocated);
 297                         if (memory == NULL)
 298                           {
 299                             if (result != initial_result)
 300                               free (result);
 301                             errno = ENOMEM;
 302                             return -1;
 303                           }
 304                         if (result == initial_result)
 305                           memcpy (memory, initial_result, length);
 306                         result = memory;
 307                         grow = false;
 308                       }
 309                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 310                        and emit a question mark.  */
 311                     if (!incremented)
 312                       {
 313                         if (insize == 0)
 314                           abort ();
 315                         inptr++;
 316                         insize--;
 317                       }
 318                     result[length] = '?';
 319                     length++;
 320                   }
 321                 else
 322                   goto indirectly;
 323               }
 324             else
 325               {
 326                 if (result != initial_result)
 327                   {
 328                     int saved_errno = errno;
 329                     free (result);
 330                     errno = saved_errno;
 331                   }
 332                 return -1;
 333               }
 334           }
 335         if (insize == 0)
 336           break;
 337         if (grow)
 338           {
 339             char *memory;
 340
 341             allocated = 2 * allocated;
 342             if (result == initial_result)
 343               memory = (char *) malloc (allocated);
 344             else
 345               memory = (char *) realloc (result, allocated);
 346             if (memory == NULL)
 347               {
 348                 if (result != initial_result)
 349                   free (result);
 350                 errno = ENOMEM;
 351                 return -1;
 352               }
 353             if (result == initial_result)
 354               memcpy (memory, initial_result, length);
 355             result = memory;
 356           }
 357       }
 358   }
 359
 360   /* Now get the conversion state back to the initial state.
 361      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 362 #if defined _LIBICONV_VERSION \
 363     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 364   for (;;)
 365     {
 366       char *outptr = result + length;
 367       size_t outsize = allocated - extra_alloc - length;
 368       size_t res;
 369
 370       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 371       length = outptr - result;
 372       if (res == (size_t)(-1))
 373         {
 374           if (errno == E2BIG)
 375             {
 376               char *memory;
 377
 378               allocated = 2 * allocated;
 379               if (result == initial_result)
 380                 memory = (char *) malloc (allocated);
 381               else
 382                 memory = (char *) realloc (result, allocated);
 383               if (memory == NULL)
 384                 {
 385                   if (result != initial_result)
 386                     free (result);
 387                   errno = ENOMEM;
 388                   return -1;
 389                 }
 390               if (result == initial_result)
 391                 memcpy (memory, initial_result, length);
 392               result = memory;
 393             }
 394           else
 395             {
 396               if (result != initial_result)
 397                 {
 398                   int saved_errno = errno;
 399                   free (result);
 400                   errno = saved_errno;
 401                 }
 402               return -1;
 403             }
 404         }
 405       else
 406         break;
 407     }
 408 #endif
 409
 410   /* The direct conversion succeeded.  */
 411   goto done;
 412
 413  indirectly:
 414   /* The direct conversion failed, handler != iconveh_error,
 415      and cd2 != (iconv_t)(-1).
 416      Use a conversion through UTF-8.  */
 417   if (offsets != NULL)
 418     {
 419       size_t i;
 420
 421       for (i = 0; i < srclen; i++)
 422         offsets[i] = (size_t)(-1);
 423
 424       last_length = (size_t)(-1);
 425     }
 426   length = 0;
 427   {
 428 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 429     char utf8buf[utf8bufsize + 1];
 430     size_t utf8len = 0;
 431     const char *in1ptr = src;
 432     size_t in1size = srclen;
 433     bool do_final_flush1 = true;
 434     bool do_final_flush2 = true;
 435
 436     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 437 # if defined _LIBICONV_VERSION \
 438      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 439     /* Set to the initial state.  */
 440     if (cd1 != (iconv_t)(-1))
 441       iconv (cd1, NULL, NULL, NULL, NULL);
 442     iconv (cd2, NULL, NULL, NULL, NULL);
 443 # endif
 444
 445     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 446       {
 447         char *out1ptr = utf8buf + utf8len;
 448         size_t out1size = utf8bufsize - utf8len;
 449         bool incremented1;
 450         size_t res1;
 451         int errno1;
 452
 453         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 454         if (in1size > 0)
 455           {
 456             if (offsets != NULL
 457                 && length != last_length) /* ensure that offset[] be increasing */
 458               {
 459                 offsets[in1ptr - src] = length;
 460                 last_length = length;
 461               }
 462             if (cd1 != (iconv_t)(-1))
 463               {
 464                 if (offsets != NULL)
 465                   res1 = iconv_carefully_1 (cd1,
 466                                             &in1ptr, &in1size,
 467                                             &out1ptr, &out1size,
 468                                             &incremented1);
 469                 else
 470                   res1 = iconv_carefully (cd1,
 471                                           &in1ptr, &in1size,
 472                                           &out1ptr, &out1size,
 473                                           &incremented1);
 474               }
 475             else
 476               {
 477                 /* FROM_CODESET is UTF-8.  */
 478                 res1 = 0;
 479                 do
 480                   {
 481                     ucs4_t uc;
 482                     int n;
 483                     int m;
 484
 485                     n = u8_mbtouc_safe (&uc, (const uint8_t *) in1ptr, in1size);
 486                     if (uc == 0xfffd
 487                         && !(n >= 3
 488                              && (uint8_t)in1ptr[0] == 0xEF
 489                              && (uint8_t)in1ptr[1] == 0xBF
 490                              && (uint8_t)in1ptr[2] == 0xBD))
 491                       {
 492                         in1ptr += n;
 493                         in1size -= n;
 494                         errno = EILSEQ;
 495                         res1 = (size_t)(-1);
 496                         incremented1 = true;
 497                         break;
 498                       }
 499                     if (out1size == 0)
 500                       {
 501                         errno = E2BIG;
 502                         res1 = (size_t)(-1);
 503                         incremented1 = false;
 504                         break;
 505                       }
 506                     m = u8_uctomb ((uint8_t *) out1ptr, uc, out1size);
 507                     if (m == -2)
 508                       {
 509                         errno = E2BIG;
 510                         res1 = (size_t)(-1);
 511                         incremented1 = false;
 512                         break;
 513                       }
 514                     in1ptr += n;
 515                     in1size -= n;
 516                     if (m == -1)
 517                       {
 518                         errno = EILSEQ;
 519                         res1 = (size_t)(-1);
 520                         incremented1 = true;
 521                         break;
 522                       }
 523                     out1ptr += m;
 524                     out1size -= m;
 525                   }
 526                 while (offsets == NULL && in1size > 0);
 527               }
 528           }
 529         else if (do_final_flush1)
 530           {
 531             /* Now get the conversion state of CD1 back to the initial state.
 532                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 533 # if defined _LIBICONV_VERSION \
 534      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 535             if (cd1 != (iconv_t)(-1))
 536               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 537             else
 538 # endif
 539               res1 = 0;
 540             do_final_flush1 = false;
 541             incremented1 = true;
 542           }
 543         else
 544           {
 545             res1 = 0;
 546             incremented1 = true;
 547           }
 548         if (res1 == (size_t)(-1)
 549             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 550           {
 551             if (result != initial_result)
 552               {
 553                 int saved_errno = errno;
 554                 free (result);
 555                 errno = saved_errno;
 556               }
 557             return -1;
 558           }
 559         if (res1 == (size_t)(-1)
 560             && errno == EILSEQ && handler != iconveh_error)
 561           {
 562             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 563                emit a question mark.  Room for the question mark was allocated
 564                at the end of utf8buf.  */
 565             if (!incremented1)
 566               {
 567                 if (in1size == 0)
 568                   abort ();
 569                 in1ptr++;
 570                 in1size--;
 571               }
 572             utf8buf[utf8len++] = '?';
 573           }
 574         errno1 = errno;
 575         utf8len = out1ptr - utf8buf;
 576
 577         if (offsets != NULL
 578             || in1size == 0
 579             || utf8len > utf8bufsize / 2
 580             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 581           {
 582             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 583             const char *in2ptr = utf8buf;
 584             size_t in2size = utf8len;
 585
 586             while (in2size > 0
 587                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 588               {
 589                 char *out2ptr = result + length;
 590                 size_t out2size = allocated - extra_alloc - length;
 591                 bool incremented2;
 592                 size_t res2;
 593                 bool grow;
 594
 595                 if (in2size > 0)
 596                   res2 = iconv_carefully (cd2,
 597                                           &in2ptr, &in2size,
 598                                           &out2ptr, &out2size,
 599                                           &incremented2);
 600                 else /* in1size == 0 && !do_final_flush1
 601                         && in2size == 0 && do_final_flush2 */
 602                   {
 603                     /* Now get the conversion state of CD1 back to the initial
 604                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 605 # if defined _LIBICONV_VERSION \
 606      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 607                     res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 608 # else
 609                     res2 = 0;
 610 # endif
 611                     do_final_flush2 = false;
 612                     incremented2 = true;
 613                   }
 614
 615                 length = out2ptr - result;
 616                 grow = (length + extra_alloc > allocated / 2);
 617                 if (res2 == (size_t)(-1))
 618                   {
 619                     if (errno == E2BIG)
 620                       grow = true;
 621                     else if (errno == EINVAL)
 622                       break;
 623                     else if (errno == EILSEQ && handler != iconveh_error)
 624                       {
 625                         /* Error handling can produce up to 10 bytes of ASCII
 626                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 627                            UCS-4, so use CD2 here as well.  */
 628                         char scratchbuf[10];
 629                         size_t scratchlen;
 630                         ucs4_t uc;
 631                         const char *inptr;
 632                         size_t insize;
 633                         size_t res;
 634
 635                         if (incremented2)
 636                           {
 637                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 638                                          (const uint8_t *) utf8buf)
 639                                 == NULL)
 640                               abort ();
 641                           }
 642                         else
 643                           {
 644                             int n;
 645                             if (in2size == 0)
 646                               abort ();
 647                             n = u8_mbtouc (&uc, (const uint8_t *) in2ptr,
 648                                            in2size);
 649                             in2ptr += n;
 650                             in2size -= n;
 651                           }
 652
 653                         if (handler == iconveh_escape_sequence)
 654                           {
 655                             static char hex[16] = "0123456789ABCDEF";
 656                             scratchlen = 0;
 657                             scratchbuf[scratchlen++] = '\\';
 658                             if (uc < 0x10000)
 659                               scratchbuf[scratchlen++] = 'u';
 660                             else
 661                               {
 662                                 scratchbuf[scratchlen++] = 'U';
 663                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 664                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 665                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 666                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 667                               }
 668                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 669                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 670                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 671                             scratchbuf[scratchlen++] = hex[uc & 15];
 672                           }
 673                         else
 674                           {
 675                             scratchbuf[0] = '?';
 676                             scratchlen = 1;
 677                           }
 678
 679                         inptr = scratchbuf;
 680                         insize = scratchlen;
 681                         res = iconv (cd2,
 682                                      (ICONV_CONST char **) &inptr, &insize,
 683                                      &out2ptr, &out2size);
 684                         length = out2ptr - result;
 685                         if (res == (size_t)(-1) && errno == E2BIG)
 686                           {
 687                             char *memory;
 688
 689                             allocated = 2 * allocated;
 690                             if (length + 1 + extra_alloc > allocated)
 691                               abort ();
 692                             if (result == initial_result)
 693                               memory = (char *) malloc (allocated);
 694                             else
 695                               memory = (char *) realloc (result, allocated);
 696                             if (memory == NULL)
 697                               {
 698                                 if (result != initial_result)
 699                                   free (result);
 700                                 errno = ENOMEM;
 701                                 return -1;
 702                               }
 703                             if (result == initial_result)
 704                               memcpy (memory, initial_result, length);
 705                             result = memory;
 706                             grow = false;
 707
 708                             out2ptr = result + length;
 709                             out2size = allocated - extra_alloc - length;
 710                             res = iconv (cd2,
 711                                          (ICONV_CONST char **) &inptr, &insize,
 712                                          &out2ptr, &out2size);
 713                             length = out2ptr - result;
 714                           }
 715 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 716                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 717                            NetBSD iconv() inserts a question mark if it cannot
 718                            convert.
 719                            Only GNU libiconv and GNU libc are known to prefer
 720                            to fail rather than doing a lossy conversion.  */
 721                         if (res != (size_t)(-1) && res > 0)
 722                           {
 723                             errno = EILSEQ;
 724                             res = (size_t)(-1);
 725                           }
 726 # endif
 727                         if (res == (size_t)(-1))
 728                           {
 729                             /* Failure converting the ASCII replacement.  */
 730                             if (result != initial_result)
 731                               {
 732                                 int saved_errno = errno;
 733                                 free (result);
 734                                 errno = saved_errno;
 735                               }
 736                             return -1;
 737                           }
 738                       }
 739                     else
 740                       {
 741                         if (result != initial_result)
 742                           {
 743                             int saved_errno = errno;
 744                             free (result);
 745                             errno = saved_errno;
 746                           }
 747                         return -1;
 748                       }
 749                   }
 750                 if (!(in2size > 0
 751                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 752                   break;
 753                 if (grow)
 754                   {
 755                     char *memory;
 756
 757                     allocated = 2 * allocated;
 758                     if (result == initial_result)
 759                       memory = (char *) malloc (allocated);
 760                     else
 761                       memory = (char *) realloc (result, allocated);
 762                     if (memory == NULL)
 763                       {
 764                         if (result != initial_result)
 765                           free (result);
 766                         errno = ENOMEM;
 767                         return -1;
 768                       }
 769                     if (result == initial_result)
 770                       memcpy (memory, initial_result, length);
 771                     result = memory;
 772                   }
 773               }
 774
 775             /* Move the remaining bytes to the beginning of utf8buf.  */
 776             if (in2size > 0)
 777               memmove (utf8buf, in2ptr, in2size);
 778             utf8len = in2size;
 779           }
 780
 781         if (res1 == (size_t)(-1))
 782           {
 783             if (errno1 == EINVAL)
 784               in1size = 0;
 785             else if (errno1 == EILSEQ)
 786               {
 787                 if (result != initial_result)
 788                   free (result);
 789                 errno = errno1;
 790                 return -1;
 791               }
 792           }
 793       }
 794 # undef utf8bufsize
 795   }
 796
 797  done:
 798   /* Now the final memory allocation.  */
 799   if (result == tmpbuf)
 800     {
 801       char *memory;
 802
 803       memory = (char *) malloc (length + extra_alloc);
 804       if (memory != NULL)
 805         {
 806           memcpy (memory, tmpbuf, length);
 807           result = memory;
 808         }
 809       else
 810         {
 811           errno = ENOMEM;
 812           return -1;
 813         }
 814     }
 815   else if (result != *resultp && length + extra_alloc < allocated)
 816     {
 817       /* Shrink the allocated memory if possible.  */
 818       char *memory;
 819
 820       memory = (char *) realloc (result, length + extra_alloc);
 821       if (memory != NULL)
 822         result = memory;
 823     }
 824   *resultp = result;
 825   *lengthp = length;
 826   return 0;
 827 # undef tmpbuf
 828 # undef tmpbufsize
 829 }
 830
 831 int
 832 mem_cd_iconveh (const char *src, size_t srclen,
 833                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 834                 enum iconv_ilseq_handler handler,
 835                 size_t *offsets,
 836                 char **resultp, size_t *lengthp)
 837 {
 838   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
 839                                   offsets, resultp, lengthp);
 840 }
 841
 842 char *
 843 str_cd_iconveh (const char *src,
 844                 iconv_t cd, iconv_t cd1, iconv_t cd2,
 845                 enum iconv_ilseq_handler handler)
 846 {
 847   /* For most encodings, a trailing NUL byte in the input will be converted
 848      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
 849      function is usable for UTF-7, we have to exclude the NUL byte from the
 850      conversion and add it by hand afterwards.  */
 851   char *result = NULL;
 852   size_t length = 0;
 853   int retval = mem_cd_iconveh_internal (src, strlen (src),
 854                                         cd, cd1, cd2, handler, 1, NULL,
 855                                         &result, &length);
 856
 857   if (retval < 0)
 858     {
 859       if (result != NULL)
 860         {
 861           int saved_errno = errno;
 862           free (result);
 863           errno = saved_errno;
 864         }
 865       return NULL;
 866     }
 867
 868   /* Add the terminating NUL byte.  */
 869   result[length] = '\0';
 870
 871   return result;
 872 }
 873
 874 #endif
 875
 876 int
 877 mem_iconveh (const char *src, size_t srclen,
 878              const char *from_codeset, const char *to_codeset,
 879              enum iconv_ilseq_handler handler,
 880              size_t *offsets,
 881              char **resultp, size_t *lengthp)
 882 {
 883   if (srclen == 0)
 884     {
 885       /* Nothing to convert.  */
 886       *lengthp = 0;
 887       return 0;
 888     }
 889   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
 890     {
 891       char *result;
 892
 893       if (*resultp != NULL && *lengthp >= srclen)
 894         result = *resultp;
 895       else
 896         {
 897           result = (char *) malloc (srclen);
 898           if (result == NULL)
 899             {
 900               errno = ENOMEM;
 901               return -1;
 902             }
 903         }
 904       memcpy (result, src, srclen);
 905       *resultp = result;
 906       *lengthp = srclen;
 907       return 0;
 908     }
 909   else
 910     {
 911 #if HAVE_ICONV
 912       iconv_t cd;
 913       iconv_t cd1;
 914       iconv_t cd2;
 915       char *result;
 916       size_t length;
 917       int retval;
 918
 919       /* Avoid glibc-2.1 bug with EUC-KR.  */
 920 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
 921       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
 922           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
 923         {
 924           errno = EINVAL;
 925           return -1;
 926         }
 927 # endif
 928
 929       cd = iconv_open (to_codeset, from_codeset);
 930       if (cd == (iconv_t)(-1))
 931         return -1;
 932
 933       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 934         cd1 = (iconv_t)(-1);
 935       else
 936         {
 937           cd1 = iconv_open ("UTF-8", from_codeset);
 938           if (cd1 == (iconv_t)(-1))
 939             {
 940               int saved_errno = errno;
 941               iconv_close (cd);
 942               errno = saved_errno;
 943               return -1;
 944             }
 945         }
 946
 947       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 948         cd2 = (iconv_t)(-1);
 949       else
 950         {
 951           cd2 = iconv_open (to_codeset, "UTF-8");
 952           if (cd2 == (iconv_t)(-1))
 953             {
 954               int saved_errno = errno;
 955               if (cd1 != (iconv_t)(-1))
 956                 iconv_close (cd1);
 957               iconv_close (cd);
 958               errno = saved_errno;
 959               return -1;
 960             }
 961         }
 962
 963       result = *resultp;
 964       length = *lengthp;
 965       retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
 966                                &result, &length);
 967
 968       if (retval < 0)
 969         {
 970           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
 971           int saved_errno = errno;
 972           if (cd2 != (iconv_t)(-1))
 973             iconv_close (cd2);
 974           if (cd1 != (iconv_t)(-1))
 975             iconv_close (cd1);
 976           iconv_close (cd);
 977           errno = saved_errno;
 978         }
 979       else
 980         {
 981           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
 982             {
 983               /* Return -1, but free the allocated memory, and while doing
 984                  that, preserve the errno from iconv_close.  */
 985               int saved_errno = errno;
 986               if (cd1 != (iconv_t)(-1))
 987                 iconv_close (cd1);
 988               iconv_close (cd);
 989               if (result != *resultp && result != NULL)
 990                 free (result);
 991               errno = saved_errno;
 992               return -1;
 993             }
 994           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
 995             {
 996               /* Return -1, but free the allocated memory, and while doing
 997                  that, preserve the errno from iconv_close.  */
 998               int saved_errno = errno;
 999               iconv_close (cd);
1000               if (result != *resultp && result != NULL)
1001                 free (result);
1002               errno = saved_errno;
1003               return -1;
1004             }
1005           if (iconv_close (cd) < 0)
1006             {
1007               /* Return -1, but free the allocated memory, and while doing
1008                  that, preserve the errno from iconv_close.  */
1009               int saved_errno = errno;
1010               if (result != *resultp && result != NULL)
1011                 free (result);
1012               errno = saved_errno;
1013               return -1;
1014             }
1015           *resultp = result;
1016           *lengthp = length;
1017         }
1018       return retval;
1019 #else
1020       /* This is a different error code than if iconv_open existed but didn't
1021          support from_codeset and to_codeset, so that the caller can emit
1022          an error message such as
1023            "iconv() is not supported. Installing GNU libiconv and
1024             then reinstalling this package would fix this."  */
1025       errno = ENOSYS;
1026       return -1;
1027 #endif
1028     }
1029 }
1030
1031 char *
1032 str_iconveh (const char *src,
1033              const char *from_codeset, const char *to_codeset,
1034              enum iconv_ilseq_handler handler)
1035 {
1036   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
1037     {
1038       char *result = strdup (src);
1039
1040       if (result == NULL)
1041         errno = ENOMEM;
1042       return result;
1043     }
1044   else
1045     {
1046 #if HAVE_ICONV
1047       iconv_t cd;
1048       iconv_t cd1;
1049       iconv_t cd2;
1050       char *result;
1051
1052       /* Avoid glibc-2.1 bug with EUC-KR.  */
1053 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
1054       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
1055           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
1056         {
1057           errno = EINVAL;
1058           return NULL;
1059         }
1060 # endif
1061
1062       cd = iconv_open (to_codeset, from_codeset);
1063       if (cd == (iconv_t)(-1))
1064         return NULL;
1065
1066       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1067         cd1 = (iconv_t)(-1);
1068       else
1069         {
1070           cd1 = iconv_open ("UTF-8", from_codeset);
1071           if (cd1 == (iconv_t)(-1))
1072             {
1073               int saved_errno = errno;
1074               iconv_close (cd);
1075               errno = saved_errno;
1076               return NULL;
1077             }
1078         }
1079
1080       if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
1081         cd2 = (iconv_t)(-1);
1082       else
1083         {
1084           cd2 = iconv_open (to_codeset, "UTF-8");
1085           if (cd2 == (iconv_t)(-1))
1086             {
1087               int saved_errno = errno;
1088               if (cd1 != (iconv_t)(-1))
1089                 iconv_close (cd1);
1090               iconv_close (cd);
1091               errno = saved_errno;
1092               return NULL;
1093             }
1094         }
1095
1096       result = str_cd_iconveh (src, cd, cd1, cd2, handler);
1097
1098       if (result == NULL)
1099         {
1100           /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv.  */
1101           int saved_errno = errno;
1102           if (cd2 != (iconv_t)(-1))
1103             iconv_close (cd2);
1104           if (cd1 != (iconv_t)(-1))
1105             iconv_close (cd1);
1106           iconv_close (cd);
1107           errno = saved_errno;
1108         }
1109       else
1110         {
1111           if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
1112             {
1113               /* Return NULL, but free the allocated memory, and while doing
1114                  that, preserve the errno from iconv_close.  */
1115               int saved_errno = errno;
1116               if (cd1 != (iconv_t)(-1))
1117                 iconv_close (cd1);
1118               iconv_close (cd);
1119               free (result);
1120               errno = saved_errno;
1121               return NULL;
1122             }
1123           if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
1124             {
1125               /* Return NULL, but free the allocated memory, and while doing
1126                  that, preserve the errno from iconv_close.  */
1127               int saved_errno = errno;
1128               iconv_close (cd);
1129               free (result);
1130               errno = saved_errno;
1131               return NULL;
1132             }
1133           if (iconv_close (cd) < 0)
1134             {
1135               /* Return NULL, but free the allocated memory, and while doing
1136                  that, preserve the errno from iconv_close.  */
1137               int saved_errno = errno;
1138               free (result);
1139               errno = saved_errno;
1140               return NULL;
1141             }
1142         }
1143       return result;
1144 #else
1145       /* This is a different error code than if iconv_open existed but didn't
1146          support from_codeset and to_codeset, so that the caller can emit
1147          an error message such as
1148            "iconv() is not supported. Installing GNU libiconv and
1149             then reinstalling this package would fix this."  */
1150       errno = ENOSYS;
1151       return NULL;
1152 #endif
1153     }
1154 }