File: striconveh.c

package info (click to toggle)
libunistring 0.9.10-4
links: PTS, VCS
area: main
in suites: bullseye
size: 28,668 kB
sloc: ansic: 91,149; perl: 15,827; sh: 7,478; makefile: 367; lisp: 308
file content (1208 lines) | stat: -rw-r--r-- 39,502 bytes
parent folder | download | duplicates (2)
/* Character set conversion with error handling.
   Copyright (C) 2001-2018 Free Software Foundation, Inc.
   Written by Bruno Haible and Simon Josefsson.

   This program is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#include <config.h>

/* Specification.  */
#include "striconveh.h"

#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>

#if HAVE_ICONV
# include <iconv.h>
# include "unistr.h"
#endif

#include "c-strcase.h"
#include "c-strcaseeq.h"

#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif


#if HAVE_ICONV

/* The caller must provide an iconveh_t, not just an iconv_t, because when a
   conversion error occurs, we may have to determine the Unicode representation
   of the inconvertible character.  */

int
iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)
{
  iconv_t cd;
  iconv_t cd1;
  iconv_t cd2;

  /* Avoid glibc-2.1 bug with EUC-KR.  */
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     && !defined _LIBICONV_VERSION
  if (c_strcasecmp (from_codeset, "EUC-KR") == 0
      || c_strcasecmp (to_codeset, "EUC-KR") == 0)
    {
      errno = EINVAL;
      return -1;
    }
# endif

  cd = iconv_open (to_codeset, from_codeset);

  if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    cd1 = (iconv_t)(-1);
  else
    {
      cd1 = iconv_open ("UTF-8", from_codeset);
      if (cd1 == (iconv_t)(-1))
        {
          int saved_errno = errno;
          if (cd != (iconv_t)(-1))
            iconv_close (cdp->cd);
          errno = saved_errno;
          return -1;
        }
    }

  if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
# if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
      && !defined __UCLIBC__) \
     || _LIBICONV_VERSION >= 0x0105
      || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
# endif
     )
    cd2 = (iconv_t)(-1);
  else
    {
      cd2 = iconv_open (to_codeset, "UTF-8");
      if (cd2 == (iconv_t)(-1))
        {
          int saved_errno = errno;
          if (cd1 != (iconv_t)(-1))
            iconv_close (cd1);
          if (cd != (iconv_t)(-1))
            iconv_close (cd);
          errno = saved_errno;
          return -1;
        }
    }

  cdp->cd = cd;
  cdp->cd1 = cd1;
  cdp->cd2 = cd2;
  return 0;
}

int
iconveh_close (const iconveh_t *cd)
{
  if (cd->cd2 != (iconv_t)(-1) && iconv_close (cd->cd2) < 0)
    {
      /* Return -1, but preserve the errno from iconv_close.  */
      int saved_errno = errno;
      if (cd->cd1 != (iconv_t)(-1))
        iconv_close (cd->cd1);
      if (cd->cd != (iconv_t)(-1))
        iconv_close (cd->cd);
      errno = saved_errno;
      return -1;
    }
  if (cd->cd1 != (iconv_t)(-1) && iconv_close (cd->cd1) < 0)
    {
      /* Return -1, but preserve the errno from iconv_close.  */
      int saved_errno = errno;
      if (cd->cd != (iconv_t)(-1))
        iconv_close (cd->cd);
      errno = saved_errno;
      return -1;
    }
  if (cd->cd != (iconv_t)(-1) && iconv_close (cd->cd) < 0)
    return -1;
  return 0;
}

/* iconv_carefully is like iconv, except that it stops as soon as it encounters
   a conversion error, and it returns in *INCREMENTED a boolean telling whether
   it has incremented the input pointers past the error location.  */
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
/* Irix iconv() inserts a NUL byte if it cannot convert.
   NetBSD iconv() inserts a question mark if it cannot convert.
   Only GNU libiconv and GNU libc are known to prefer to fail rather
   than doing a lossy conversion.  */
static size_t
iconv_carefully (iconv_t cd,
                 const char **inbuf, size_t *inbytesleft,
                 char **outbuf, size_t *outbytesleft,
                 bool *incremented)
{
  const char *inptr = *inbuf;
  const char *inptr_end = inptr + *inbytesleft;
  char *outptr = *outbuf;
  size_t outsize = *outbytesleft;
  const char *inptr_before;
  size_t res;

  do
    {
      size_t insize;

      inptr_before = inptr;
      res = (size_t)(-1);

      for (insize = 1; inptr + insize <= inptr_end; insize++)
        {
          res = iconv (cd,
                       (ICONV_CONST char **) &inptr, &insize,
                       &outptr, &outsize);
          if (!(res == (size_t)(-1) && errno == EINVAL))
            break;
          /* iconv can eat up a shift sequence but give EINVAL while attempting
             to convert the first character.  E.g. libiconv does this.  */
          if (inptr > inptr_before)
            {
              res = 0;
              break;
            }
        }

      if (res == 0)
        {
          *outbuf = outptr;
          *outbytesleft = outsize;
        }
    }
  while (res == 0 && inptr < inptr_end);

  *inbuf = inptr;
  *inbytesleft = inptr_end - inptr;
  if (res != (size_t)(-1) && res > 0)
    {
      /* iconv() has already incremented INPTR.  We cannot go back to a
         previous INPTR, otherwise the state inside CD would become invalid,
         if FROM_CODESET is a stateful encoding.  So, tell the caller that
         *INBUF has already been incremented.  */
      *incremented = (inptr > inptr_before);
      errno = EILSEQ;
      return (size_t)(-1);
    }
  else
    {
      *incremented = false;
      return res;
    }
}
# else
#  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
     (*(incremented) = false, \
      iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
# endif

/* iconv_carefully_1 is like iconv_carefully, except that it stops after
   converting one character or one shift sequence.  */
static size_t
iconv_carefully_1 (iconv_t cd,
                   const char **inbuf, size_t *inbytesleft,
                   char **outbuf, size_t *outbytesleft,
                   bool *incremented)
{
  const char *inptr_before = *inbuf;
  const char *inptr = inptr_before;
  const char *inptr_end = inptr_before + *inbytesleft;
  char *outptr = *outbuf;
  size_t outsize = *outbytesleft;
  size_t res = (size_t)(-1);
  size_t insize;

  for (insize = 1; inptr_before + insize <= inptr_end; insize++)
    {
      inptr = inptr_before;
      res = iconv (cd,
                   (ICONV_CONST char **) &inptr, &insize,
                   &outptr, &outsize);
      if (!(res == (size_t)(-1) && errno == EINVAL))
        break;
      /* iconv can eat up a shift sequence but give EINVAL while attempting
         to convert the first character.  E.g. libiconv does this.  */
      if (inptr > inptr_before)
        {
          res = 0;
          break;
        }
    }

  *inbuf = inptr;
  *inbytesleft = inptr_end - inptr;
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
  /* Irix iconv() inserts a NUL byte if it cannot convert.
     NetBSD iconv() inserts a question mark if it cannot convert.
     Only GNU libiconv and GNU libc are known to prefer to fail rather
     than doing a lossy conversion.  */
  if (res != (size_t)(-1) && res > 0)
    {
      /* iconv() has already incremented INPTR.  We cannot go back to a
         previous INPTR, otherwise the state inside CD would become invalid,
         if FROM_CODESET is a stateful encoding.  So, tell the caller that
         *INBUF has already been incremented.  */
      *incremented = (inptr > inptr_before);
      errno = EILSEQ;
      return (size_t)(-1);
    }
# endif

  if (res != (size_t)(-1))
    {
      *outbuf = outptr;
      *outbytesleft = outsize;
    }
  *incremented = false;
  return res;
}

/* utf8conv_carefully is like iconv, except that
     - it converts from UTF-8 to UTF-8,
     - it stops as soon as it encounters a conversion error, and it returns
       in *INCREMENTED a boolean telling whether it has incremented the input
       pointers past the error location,
     - if one_character_only is true, it stops after converting one
       character.  */
static size_t
utf8conv_carefully (bool one_character_only,
                    const char **inbuf, size_t *inbytesleft,
                    char **outbuf, size_t *outbytesleft,
                    bool *incremented)
{
  const char *inptr = *inbuf;
  size_t insize = *inbytesleft;
  char *outptr = *outbuf;
  size_t outsize = *outbytesleft;
  size_t res;

  res = 0;
  do
    {
      ucs4_t uc;
      int n;
      int m;

      n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize);
      if (n < 0)
        {
          errno = (n == -2 ? EINVAL : EILSEQ);
          n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize);
          inptr += n;
          insize -= n;
          res = (size_t)(-1);
          *incremented = true;
          break;
        }
      if (outsize == 0)
        {
          errno = E2BIG;
          res = (size_t)(-1);
          *incremented = false;
          break;
        }
      m = u8_uctomb ((uint8_t *) outptr, uc, outsize);
      if (m == -2)
        {
          errno = E2BIG;
          res = (size_t)(-1);
          *incremented = false;
          break;
        }
      inptr += n;
      insize -= n;
      if (m == -1)
        {
          errno = EILSEQ;
          res = (size_t)(-1);
          *incremented = true;
          break;
        }
      outptr += m;
      outsize -= m;
    }
  while (!one_character_only && insize > 0);

  *inbuf = inptr;
  *inbytesleft = insize;
  *outbuf = outptr;
  *outbytesleft = outsize;
  return res;
}

static int
mem_cd_iconveh_internal (const char *src, size_t srclen,
                         iconv_t cd, iconv_t cd1, iconv_t cd2,
                         enum iconv_ilseq_handler handler,
                         size_t extra_alloc,
                         size_t *offsets,
                         char **resultp, size_t *lengthp)
{
  /* When a conversion error occurs, we cannot start using CD1 and CD2 at
     this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
     Instead, we have to start afresh from the beginning of SRC.  */
  /* Use a temporary buffer, so that for small strings, a single malloc()
     call will be sufficient.  */
# define tmpbufsize 4096
  /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
     libiconv's UCS-4-INTERNAL encoding.  */
  union { unsigned int align; char buf[tmpbufsize]; } tmp;
# define tmpbuf tmp.buf

  char *initial_result;
  char *result;
  size_t allocated;
  size_t length;
  size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */

  if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
    {
      initial_result = *resultp;
      allocated = *lengthp;
    }
  else
    {
      initial_result = tmpbuf;
      allocated = sizeof (tmpbuf);
    }
  result = initial_result;

  /* Test whether a direct conversion is possible at all.  */
  if (cd == (iconv_t)(-1))
    goto indirectly;

  if (offsets != NULL)
    {
      size_t i;

      for (i = 0; i < srclen; i++)
        offsets[i] = (size_t)(-1);

      last_length = (size_t)(-1);
    }
  length = 0;

  /* First, try a direct conversion, and see whether a conversion error
     occurs at all.  */
  {
    const char *inptr = src;
    size_t insize = srclen;

    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
    /* Set to the initial state.  */
    iconv (cd, NULL, NULL, NULL, NULL);
# endif

    while (insize > 0)
      {
        char *outptr = result + length;
        size_t outsize = allocated - extra_alloc - length;
        bool incremented;
        size_t res;
        bool grow;

        if (offsets != NULL)
          {
            if (length != last_length) /* ensure that offset[] be increasing */
              {
                offsets[inptr - src] = length;
                last_length = length;
              }
            res = iconv_carefully_1 (cd,
                                     &inptr, &insize,
                                     &outptr, &outsize,
                                     &incremented);
          }
        else
          /* Use iconv_carefully instead of iconv here, because:
             - If TO_CODESET is UTF-8, we can do the error handling in this
               loop, no need for a second loop,
             - With iconv() implementations other than GNU libiconv and GNU
               libc, if we use iconv() in a big swoop, checking for an E2BIG
               return, we lose the number of irreversible conversions.  */
          res = iconv_carefully (cd,
                                 &inptr, &insize,
                                 &outptr, &outsize,
                                 &incremented);

        length = outptr - result;
        grow = (length + extra_alloc > allocated / 2);
        if (res == (size_t)(-1))
          {
            if (errno == E2BIG)
              grow = true;
            else if (errno == EINVAL)
              break;
            else if (errno == EILSEQ && handler != iconveh_error)
              {
                if (cd2 == (iconv_t)(-1))
                  {
                    /* TO_CODESET is UTF-8.  */
                    /* Error handling can produce up to 1 byte of output.  */
                    if (length + 1 + extra_alloc > allocated)
                      {
                        char *memory;

                        allocated = 2 * allocated;
                        if (length + 1 + extra_alloc > allocated)
                          abort ();
                        if (result == initial_result)
                          memory = (char *) malloc (allocated);
                        else
                          memory = (char *) realloc (result, allocated);
                        if (memory == NULL)
                          {
                            if (result != initial_result)
                              free (result);
                            errno = ENOMEM;
                            return -1;
                          }
                        if (result == initial_result)
                          memcpy (memory, initial_result, length);
                        result = memory;
                        grow = false;
                      }
                    /* The input is invalid in FROM_CODESET.  Eat up one byte
                       and emit a question mark.  */
                    if (!incremented)
                      {
                        if (insize == 0)
                          abort ();
                        inptr++;
                        insize--;
                      }
                    result[length] = '?';
                    length++;
                  }
                else
                  goto indirectly;
              }
            else
              {
                if (result != initial_result)
                  {
                    int saved_errno = errno;
                    free (result);
                    errno = saved_errno;
                  }
                return -1;
              }
          }
        if (insize == 0)
          break;
        if (grow)
          {
            char *memory;

            allocated = 2 * allocated;
            if (result == initial_result)
              memory = (char *) malloc (allocated);
            else
              memory = (char *) realloc (result, allocated);
            if (memory == NULL)
              {
                if (result != initial_result)
                  free (result);
                errno = ENOMEM;
                return -1;
              }
            if (result == initial_result)
              memcpy (memory, initial_result, length);
            result = memory;
          }
      }
  }

  /* Now get the conversion state back to the initial state.
     But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
#if defined _LIBICONV_VERSION \
    || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
         || defined __sun)
  for (;;)
    {
      char *outptr = result + length;
      size_t outsize = allocated - extra_alloc - length;
      size_t res;

      res = iconv (cd, NULL, NULL, &outptr, &outsize);
      length = outptr - result;
      if (res == (size_t)(-1))
        {
          if (errno == E2BIG)
            {
              char *memory;

              allocated = 2 * allocated;
              if (result == initial_result)
                memory = (char *) malloc (allocated);
              else
                memory = (char *) realloc (result, allocated);
              if (memory == NULL)
                {
                  if (result != initial_result)
                    free (result);
                  errno = ENOMEM;
                  return -1;
                }
              if (result == initial_result)
                memcpy (memory, initial_result, length);
              result = memory;
            }
          else
            {
              if (result != initial_result)
                {
                  int saved_errno = errno;
                  free (result);
                  errno = saved_errno;
                }
              return -1;
            }
        }
      else
        break;
    }
#endif

  /* The direct conversion succeeded.  */
  goto done;

 indirectly:
  /* The direct conversion failed.
     Use a conversion through UTF-8.  */
  if (offsets != NULL)
    {
      size_t i;

      for (i = 0; i < srclen; i++)
        offsets[i] = (size_t)(-1);

      last_length = (size_t)(-1);
    }
  length = 0;
  {
    const bool slowly = (offsets != NULL || handler == iconveh_error);
# define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
    char utf8buf[utf8bufsize + 1];
    size_t utf8len = 0;
    const char *in1ptr = src;
    size_t in1size = srclen;
    bool do_final_flush1 = true;
    bool do_final_flush2 = true;

    /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
    /* Set to the initial state.  */
    if (cd1 != (iconv_t)(-1))
      iconv (cd1, NULL, NULL, NULL, NULL);
    if (cd2 != (iconv_t)(-1))
      iconv (cd2, NULL, NULL, NULL, NULL);
# endif

    while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
      {
        char *out1ptr = utf8buf + utf8len;
        size_t out1size = utf8bufsize - utf8len;
        bool incremented1;
        size_t res1;
        int errno1;

        /* Conversion step 1: from FROM_CODESET to UTF-8.  */
        if (in1size > 0)
          {
            if (offsets != NULL
                && length != last_length) /* ensure that offset[] be increasing */
              {
                offsets[in1ptr - src] = length;
                last_length = length;
              }
            if (cd1 != (iconv_t)(-1))
              {
                if (slowly)
                  res1 = iconv_carefully_1 (cd1,
                                            &in1ptr, &in1size,
                                            &out1ptr, &out1size,
                                            &incremented1);
                else
                  res1 = iconv_carefully (cd1,
                                          &in1ptr, &in1size,
                                          &out1ptr, &out1size,
                                          &incremented1);
              }
            else
              {
                /* FROM_CODESET is UTF-8.  */
                res1 = utf8conv_carefully (slowly,
                                           &in1ptr, &in1size,
                                           &out1ptr, &out1size,
                                           &incremented1);
              }
          }
        else if (do_final_flush1)
          {
            /* Now get the conversion state of CD1 back to the initial state.
               But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
            if (cd1 != (iconv_t)(-1))
              res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
            else
# endif
              res1 = 0;
            do_final_flush1 = false;
            incremented1 = true;
          }
        else
          {
            res1 = 0;
            incremented1 = true;
          }
        if (res1 == (size_t)(-1)
            && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
          {
            if (result != initial_result)
              {
                int saved_errno = errno;
                free (result);
                errno = saved_errno;
              }
            return -1;
          }
        if (res1 == (size_t)(-1)
            && errno == EILSEQ && handler != iconveh_error)
          {
            /* The input is invalid in FROM_CODESET.  Eat up one byte and
               emit a question mark.  Room for the question mark was allocated
               at the end of utf8buf.  */
            if (!incremented1)
              {
                if (in1size == 0)
                  abort ();
                in1ptr++;
                in1size--;
              }
            *out1ptr++ = '?';
            res1 = 0;
          }
        errno1 = errno;
        utf8len = out1ptr - utf8buf;

        if (offsets != NULL
            || in1size == 0
            || utf8len > utf8bufsize / 2
            || (res1 == (size_t)(-1) && errno1 == E2BIG))
          {
            /* Conversion step 2: from UTF-8 to TO_CODESET.  */
            const char *in2ptr = utf8buf;
            size_t in2size = utf8len;

            while (in2size > 0
                   || (in1size == 0 && !do_final_flush1 && do_final_flush2))
              {
                char *out2ptr = result + length;
                size_t out2size = allocated - extra_alloc - length;
                bool incremented2;
                size_t res2;
                bool grow;

                if (in2size > 0)
                  {
                    if (cd2 != (iconv_t)(-1))
                      res2 = iconv_carefully (cd2,
                                              &in2ptr, &in2size,
                                              &out2ptr, &out2size,
                                              &incremented2);
                    else
                      /* TO_CODESET is UTF-8.  */
                      res2 = utf8conv_carefully (false,
                                                 &in2ptr, &in2size,
                                                 &out2ptr, &out2size,
                                                 &incremented2);
                  }
                else /* in1size == 0 && !do_final_flush1
                        && in2size == 0 && do_final_flush2 */
                  {
                    /* Now get the conversion state of CD1 back to the initial
                       state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
          || defined __sun)
                    if (cd2 != (iconv_t)(-1))
                      res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
                    else
# endif
                      res2 = 0;
                    do_final_flush2 = false;
                    incremented2 = true;
                  }

                length = out2ptr - result;
                grow = (length + extra_alloc > allocated / 2);
                if (res2 == (size_t)(-1))
                  {
                    if (errno == E2BIG)
                      grow = true;
                    else if (errno == EINVAL)
                      break;
                    else if (errno == EILSEQ && handler != iconveh_error)
                      {
                        /* Error handling can produce up to 10 bytes of ASCII
                           output.  But TO_CODESET may be UCS-2, UTF-16 or
                           UCS-4, so use CD2 here as well.  */
                        char scratchbuf[10];
                        size_t scratchlen;
                        ucs4_t uc;
                        const char *inptr;
                        size_t insize;
                        size_t res;

                        if (incremented2)
                          {
                            if (u8_prev (&uc, (const uint8_t *) in2ptr,
                                         (const uint8_t *) utf8buf)
                                == NULL)
                              abort ();
                          }
                        else
                          {
                            int n;
                            if (in2size == 0)
                              abort ();
                            n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
                                                  in2size);
                            in2ptr += n;
                            in2size -= n;
                          }

                        if (handler == iconveh_escape_sequence)
                          {
                            static char hex[16] = "0123456789ABCDEF";
                            scratchlen = 0;
                            scratchbuf[scratchlen++] = '\\';
                            if (uc < 0x10000)
                              scratchbuf[scratchlen++] = 'u';
                            else
                              {
                                scratchbuf[scratchlen++] = 'U';
                                scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
                                scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
                                scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
                                scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
                              }
                            scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
                            scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
                            scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
                            scratchbuf[scratchlen++] = hex[uc & 15];
                          }
                        else
                          {
                            scratchbuf[0] = '?';
                            scratchlen = 1;
                          }

                        inptr = scratchbuf;
                        insize = scratchlen;
                        if (cd2 != (iconv_t)(-1))
                          res = iconv (cd2,
                                       (ICONV_CONST char **) &inptr, &insize,
                                       &out2ptr, &out2size);
                        else
                          {
                            /* TO_CODESET is UTF-8.  */
                            if (out2size >= insize)
                              {
                                memcpy (out2ptr, inptr, insize);
                                out2ptr += insize;
                                out2size -= insize;
                                inptr += insize;
                                insize = 0;
                                res = 0;
                              }
                            else
                              {
                                errno = E2BIG;
                                res = (size_t)(-1);
                              }
                          }
                        length = out2ptr - result;
                        if (res == (size_t)(-1) && errno == E2BIG)
                          {
                            char *memory;

                            allocated = 2 * allocated;
                            if (length + 1 + extra_alloc > allocated)
                              abort ();
                            if (result == initial_result)
                              memory = (char *) malloc (allocated);
                            else
                              memory = (char *) realloc (result, allocated);
                            if (memory == NULL)
                              {
                                if (result != initial_result)
                                  free (result);
                                errno = ENOMEM;
                                return -1;
                              }
                            if (result == initial_result)
                              memcpy (memory, initial_result, length);
                            result = memory;
                            grow = false;

                            out2ptr = result + length;
                            out2size = allocated - extra_alloc - length;
                            if (cd2 != (iconv_t)(-1))
                              res = iconv (cd2,
                                           (ICONV_CONST char **) &inptr,
                                           &insize,
                                           &out2ptr, &out2size);
                            else
                              {
                                /* TO_CODESET is UTF-8.  */
                                if (!(out2size >= insize))
                                  abort ();
                                memcpy (out2ptr, inptr, insize);
                                out2ptr += insize;
                                out2size -= insize;
                                inptr += insize;
                                insize = 0;
                                res = 0;
                              }
                            length = out2ptr - result;
                          }
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
                        /* Irix iconv() inserts a NUL byte if it cannot convert.
                           NetBSD iconv() inserts a question mark if it cannot
                           convert.
                           Only GNU libiconv and GNU libc are known to prefer
                           to fail rather than doing a lossy conversion.  */
                        if (res != (size_t)(-1) && res > 0)
                          {
                            errno = EILSEQ;
                            res = (size_t)(-1);
                          }
# endif
                        if (res == (size_t)(-1))
                          {
                            /* Failure converting the ASCII replacement.  */
                            if (result != initial_result)
                              {
                                int saved_errno = errno;
                                free (result);
                                errno = saved_errno;
                              }
                            return -1;
                          }
                      }
                    else
                      {
                        if (result != initial_result)
                          {
                            int saved_errno = errno;
                            free (result);
                            errno = saved_errno;
                          }
                        return -1;
                      }
                  }
                if (!(in2size > 0
                      || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
                  break;
                if (grow)
                  {
                    char *memory;

                    allocated = 2 * allocated;
                    if (result == initial_result)
                      memory = (char *) malloc (allocated);
                    else
                      memory = (char *) realloc (result, allocated);
                    if (memory == NULL)
                      {
                        if (result != initial_result)
                          free (result);
                        errno = ENOMEM;
                        return -1;
                      }
                    if (result == initial_result)
                      memcpy (memory, initial_result, length);
                    result = memory;
                  }
              }

            /* Move the remaining bytes to the beginning of utf8buf.  */
            if (in2size > 0)
              memmove (utf8buf, in2ptr, in2size);
            utf8len = in2size;
          }

        if (res1 == (size_t)(-1))
          {
            if (errno1 == EINVAL)
              in1size = 0;
            else if (errno1 == EILSEQ)
              {
                if (result != initial_result)
                  free (result);
                errno = errno1;
                return -1;
              }
          }
      }
# undef utf8bufsize
  }

 done:
  /* Now the final memory allocation.  */
  if (result == tmpbuf)
    {
      size_t memsize = length + extra_alloc;

      if (*resultp != NULL && *lengthp >= memsize)
        result = *resultp;
      else
        {
          char *memory;

          memory = (char *) malloc (memsize > 0 ? memsize : 1);
          if (memory != NULL)
            result = memory;
          else
            {
              errno = ENOMEM;
              return -1;
            }
        }
      memcpy (result, tmpbuf, length);
    }
  else if (result != *resultp && length + extra_alloc < allocated)
    {
      /* Shrink the allocated memory if possible.  */
      size_t memsize = length + extra_alloc;
      char *memory;

      memory = (char *) realloc (result, memsize > 0 ? memsize : 1);
      if (memory != NULL)
        result = memory;
    }
  *resultp = result;
  *lengthp = length;
  return 0;
# undef tmpbuf
# undef tmpbufsize
}

int
mem_cd_iconveh (const char *src, size_t srclen,
                const iconveh_t *cd,
                enum iconv_ilseq_handler handler,
                size_t *offsets,
                char **resultp, size_t *lengthp)
{
  return mem_cd_iconveh_internal (src, srclen, cd->cd, cd->cd1, cd->cd2,
                                  handler, 0, offsets, resultp, lengthp);
}

char *
str_cd_iconveh (const char *src,
                const iconveh_t *cd,
                enum iconv_ilseq_handler handler)
{
  /* For most encodings, a trailing NUL byte in the input will be converted
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
     function is usable for UTF-7, we have to exclude the NUL byte from the
     conversion and add it by hand afterwards.  */
  char *result = NULL;
  size_t length = 0;
  int retval = mem_cd_iconveh_internal (src, strlen (src),
                                        cd->cd, cd->cd1, cd->cd2, handler, 1,
                                        NULL, &result, &length);

  if (retval < 0)
    {
      if (result != NULL)
        {
          int saved_errno = errno;
          free (result);
          errno = saved_errno;
        }
      return NULL;
    }

  /* Add the terminating NUL byte.  */
  result[length] = '\0';

  return result;
}

#endif

int
mem_iconveh (const char *src, size_t srclen,
             const char *from_codeset, const char *to_codeset,
             enum iconv_ilseq_handler handler,
             size_t *offsets,
             char **resultp, size_t *lengthp)
{
  if (srclen == 0)
    {
      /* Nothing to convert.  */
      *lengthp = 0;
      return 0;
    }
  else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
    {
      char *result;

      if (*resultp != NULL && *lengthp >= srclen)
        result = *resultp;
      else
        {
          result = (char *) malloc (srclen);
          if (result == NULL)
            {
              errno = ENOMEM;
              return -1;
            }
        }
      memcpy (result, src, srclen);
      *resultp = result;
      *lengthp = srclen;
      return 0;
    }
  else
    {
#if HAVE_ICONV
      iconveh_t cd;
      char *result;
      size_t length;
      int retval;

      if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
        return -1;

      result = *resultp;
      length = *lengthp;
      retval = mem_cd_iconveh (src, srclen, &cd, handler, offsets,
                               &result, &length);

      if (retval < 0)
        {
          /* Close cd, but preserve the errno from str_cd_iconv.  */
          int saved_errno = errno;
          iconveh_close (&cd);
          errno = saved_errno;
        }
      else
        {
          if (iconveh_close (&cd) < 0)
            {
              /* Return -1, but free the allocated memory, and while doing
                 that, preserve the errno from iconveh_close.  */
              int saved_errno = errno;
              if (result != *resultp && result != NULL)
                free (result);
              errno = saved_errno;
              return -1;
            }
          *resultp = result;
          *lengthp = length;
        }
      return retval;
#else
      /* This is a different error code than if iconv_open existed but didn't
         support from_codeset and to_codeset, so that the caller can emit
         an error message such as
           "iconv() is not supported. Installing GNU libiconv and
            then reinstalling this package would fix this."  */
      errno = ENOSYS;
      return -1;
#endif
    }
}

char *
str_iconveh (const char *src,
             const char *from_codeset, const char *to_codeset,
             enum iconv_ilseq_handler handler)
{
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
    {
      char *result = strdup (src);

      if (result == NULL)
        errno = ENOMEM;
      return result;
    }
  else
    {
#if HAVE_ICONV
      iconveh_t cd;
      char *result;

      if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
        return NULL;

      result = str_cd_iconveh (src, &cd, handler);

      if (result == NULL)
        {
          /* Close cd, but preserve the errno from str_cd_iconv.  */
          int saved_errno = errno;
          iconveh_close (&cd);
          errno = saved_errno;
        }
      else
        {
          if (iconveh_close (&cd) < 0)
            {
              /* Return NULL, but free the allocated memory, and while doing
                 that, preserve the errno from iconveh_close.  */
              int saved_errno = errno;
              free (result);
              errno = saved_errno;
              return NULL;
            }
        }
      return result;
#else
      /* This is a different error code than if iconv_open existed but didn't
         support from_codeset and to_codeset, so that the caller can emit
         an error message such as
           "iconv() is not supported. Installing GNU libiconv and
            then reinstalling this package would fix this."  */
      errno = ENOSYS;
      return NULL;
#endif
    }
}