Edit

thodg/libiconv/lib/loop_wchar.h

Branch :

  • Show log

    Commit

  • Author : Bruno Haible
    Date : 2009-07-08 00:22:57
    Hash : 2cf3ee65
    Message : Fix an endless loop in the conversion to wchar_t.

  • lib/loop_wchar.h
  • /*
     * Copyright (C) 2000-2002, 2005-2006, 2008-2009 Free Software Foundation, Inc.
     * This file is part of the GNU LIBICONV Library.
     *
     * The GNU LIBICONV Library is free software; you can redistribute it
     * and/or modify it under the terms of the GNU Library General Public
     * License as published by the Free Software Foundation; either version 2
     * of the License, or (at your option) any later version.
     *
     * The GNU LIBICONV Library is distributed in the hope that it will be
     * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Library General Public License for more details.
     *
     * You should have received a copy of the GNU Library General Public
     * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
     * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
     * Fifth Floor, Boston, MA 02110-1301, USA.
     */
    
    /* This file defines three conversion loops:
         - from wchar_t to anything else,
         - from anything else to wchar_t,
         - from wchar_t to wchar_t.
     */
    
    #if HAVE_WCRTOMB || HAVE_MBRTOWC
    # include <wchar.h>
    # define BUF_SIZE 64  /* assume MB_LEN_MAX <= 64 */
      /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
      extern size_t mbrtowc ();
    # ifdef mbstate_t
    #  define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
    #  define mbsinit(ps) 1
    # endif
    # ifndef mbsinit
    #  if !HAVE_MBSINIT
    #   define mbsinit(ps) 1
    #  endif
    # endif
    #endif
    
    /*
     * The first two conversion loops have an extended conversion descriptor.
     */
    struct wchar_conv_struct {
      struct conv_struct parent;
    #if HAVE_WCRTOMB || HAVE_MBRTOWC
      mbstate_t state;
    #endif
    };
    
    
    #if HAVE_WCRTOMB
    
    /* From wchar_t to anything else. */
    
    #ifndef LIBICONV_PLUG
    
    #if 0
    
    struct wc_to_mb_fallback_locals {
      struct wchar_conv_struct * l_wcd;
      char* l_outbuf;
      size_t l_outbytesleft;
      int l_errno;
    };
    
    /* A callback that writes a string given in the locale encoding. */
    static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
                                            void* callback_arg)
    {
      struct wc_to_mb_fallback_locals * plocals =
        (struct wc_to_mb_fallback_locals *) callback_arg;
      /* Do nothing if already encountered an error in a previous call. */
      if (plocals->l_errno == 0) {
        /* Attempt to convert the passed buffer to the target encoding.
           Here we don't support characters split across multiple calls. */
        const char* bufptr = buf;
        size_t bufleft = buflen;
        size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
                                          &bufptr,&bufleft,
                                          &plocals->l_outbuf,&plocals->l_outbytesleft);
        if (res == (size_t)(-1)) {
          if (errno == EILSEQ || errno == EINVAL)
            /* Invalid buf contents. */
            plocals->l_errno = EILSEQ;
          else if (errno == E2BIG)
            /* Output buffer too small. */
            plocals->l_errno = E2BIG;
          else 
            abort();
        } else {
          /* Successful conversion. */
          if (bufleft > 0)
            abort();
        }
      }
    }
    
    #else
    
    struct wc_to_mb_fallback_locals {
      char* l_outbuf;
      size_t l_outbytesleft;
      int l_errno;
    };
    
    /* A callback that writes a string given in the target encoding. */
    static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
                                            void* callback_arg)
    {
      struct wc_to_mb_fallback_locals * plocals =
        (struct wc_to_mb_fallback_locals *) callback_arg;
      /* Do nothing if already encountered an error in a previous call. */
      if (plocals->l_errno == 0) {
        /* Attempt to copy the passed buffer to the output buffer. */
        if (plocals->l_outbytesleft < buflen)
          plocals->l_errno = E2BIG;
        else {
          memcpy(plocals->l_outbuf, buf, buflen);
          plocals->l_outbuf += buflen;
          plocals->l_outbytesleft -= buflen;
        }
      }
    }
    
    #endif
    
    #endif /* !LIBICONV_PLUG */
    
    static size_t wchar_from_loop_convert (iconv_t icd,
                                           const char* * inbuf, size_t *inbytesleft,
                                           char* * outbuf, size_t *outbytesleft)
    {
      struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
      size_t result = 0;
      while (*inbytesleft >= sizeof(wchar_t)) {
        const wchar_t * inptr = (const wchar_t *) *inbuf;
        size_t inleft = *inbytesleft;
        char buf[BUF_SIZE];
        mbstate_t state = wcd->state;
        size_t bufcount = 0;
        while (inleft >= sizeof(wchar_t)) {
          /* Convert one wchar_t to multibyte representation. */
          size_t count = wcrtomb(buf+bufcount,*inptr,&state);
          if (count == (size_t)(-1)) {
            /* Invalid input. */
            if (wcd->parent.discard_ilseq) {
              count = 0;
            }
            #ifndef LIBICONV_PLUG
            else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
              /* Drop the contents of buf[] accumulated so far, and instead
                 pass all queued wide characters to the fallback handler. */
              struct wc_to_mb_fallback_locals locals;
              const wchar_t * fallback_inptr;
              #if 0
              locals.l_wcd = wcd;
              #endif
              locals.l_outbuf = *outbuf;
              locals.l_outbytesleft = *outbytesleft;
              locals.l_errno = 0;
              for (fallback_inptr = (const wchar_t *) *inbuf;
                   fallback_inptr <= inptr;
                   fallback_inptr++)
                wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
                                                        wc_to_mb_write_replacement,
                                                        &locals,
                                                        wcd->parent.fallbacks.data);
              if (locals.l_errno != 0) {
                errno = locals.l_errno;
                return -1;
              }
              wcd->state = state;
              *inbuf = (const char *) (inptr + 1);
              *inbytesleft = inleft - sizeof(wchar_t);
              *outbuf = locals.l_outbuf;
              *outbytesleft = locals.l_outbytesleft;
              result += 1;
              break;
            }
            #endif
            else {
              errno = EILSEQ;
              return -1;
            }
          }
          inptr++;
          inleft -= sizeof(wchar_t);
          bufcount += count;
          if (count == 0) {
            /* Continue, append next wchar_t. */
          } else {
            /* Attempt to convert the accumulated multibyte representations
               to the target encoding. */
            const char* bufptr = buf;
            size_t bufleft = bufcount;
            char* outptr = *outbuf;
            size_t outleft = *outbytesleft;
            size_t res = unicode_loop_convert(&wcd->parent,
                                              &bufptr,&bufleft,
                                              &outptr,&outleft);
            if (res == (size_t)(-1)) {
              if (errno == EILSEQ)
                /* Invalid input. */
                return -1;
              else if (errno == E2BIG)
                /* Output buffer too small. */
                return -1;
              else if (errno == EINVAL) {
                /* Continue, append next wchar_t, but avoid buffer overrun. */
                if (bufcount + MB_CUR_MAX > BUF_SIZE)
                  abort();
              } else
                abort();
            } else {
              /* Successful conversion. */
              wcd->state = state;
              *inbuf = (const char *) inptr;
              *inbytesleft = inleft;
              *outbuf = outptr;
              *outbytesleft = outleft;
              result += res;
              break;
            }
          }
        }
      }
      return result;
    }
    
    static size_t wchar_from_loop_reset (iconv_t icd,
                                         char* * outbuf, size_t *outbytesleft)
    {
      struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
      if (outbuf == NULL || *outbuf == NULL) {
        /* Reset the states. */
        memset(&wcd->state,'\0',sizeof(mbstate_t));
        return unicode_loop_reset(&wcd->parent,NULL,NULL);
      } else {
        if (!mbsinit(&wcd->state)) {
          mbstate_t state = wcd->state;
          char buf[BUF_SIZE];
          size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
          if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
            abort();
          else {
            const char* bufptr = buf;
            size_t bufleft = bufcount-1;
            char* outptr = *outbuf;
            size_t outleft = *outbytesleft;
            size_t res = unicode_loop_convert(&wcd->parent,
                                              &bufptr,&bufleft,
                                              &outptr,&outleft);
            if (res == (size_t)(-1)) {
              if (errno == E2BIG)
                return -1;
              else
                abort();
            } else {
              res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
              if (res == (size_t)(-1))
                return res;
              else {
                /* Successful. */
                wcd->state = state;
                *outbuf = outptr;
                *outbytesleft = outleft;
                return 0;
              }
            }
          }
        } else
          return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
      }
    }
    
    #endif
    
    
    #if HAVE_MBRTOWC
    
    /* From anything else to wchar_t. */
    
    #ifndef LIBICONV_PLUG
    
    struct mb_to_wc_fallback_locals {
      char* l_outbuf;
      size_t l_outbytesleft;
      int l_errno;
    };
    
    static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
                                            void* callback_arg)
    {
      struct mb_to_wc_fallback_locals * plocals =
        (struct mb_to_wc_fallback_locals *) callback_arg;
      /* Do nothing if already encountered an error in a previous call. */
      if (plocals->l_errno == 0) {
        /* Attempt to copy the passed buffer to the output buffer. */
        if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
          plocals->l_errno = E2BIG;
        else {
          for (; buflen > 0; buf++, buflen--) {
            *(wchar_t*) plocals->l_outbuf = *buf;
            plocals->l_outbuf += sizeof(wchar_t);
            plocals->l_outbytesleft -= sizeof(wchar_t);
          }
        }
      }
    }
    
    #endif /* !LIBICONV_PLUG */
    
    static size_t wchar_to_loop_convert (iconv_t icd,
                                         const char* * inbuf, size_t *inbytesleft,
                                         char* * outbuf, size_t *outbytesleft)
    {
      struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
      size_t result = 0;
      while (*inbytesleft > 0) {
        size_t incount;
        for (incount = 1; ; ) {
          /* Here incount <= *inbytesleft. */
          char buf[BUF_SIZE];
          const char* inptr = *inbuf;
          size_t inleft = incount;
          char* bufptr = buf;
          size_t bufleft = BUF_SIZE;
          size_t res = unicode_loop_convert(&wcd->parent,
                                            &inptr,&inleft,
                                            &bufptr,&bufleft);
          if (res == (size_t)(-1)) {
            if (errno == EILSEQ)
              /* Invalid input. */
              return -1;
            else if (errno == EINVAL) {
              /* Incomplete input. Next try with one more input byte. */
            } else
              /* E2BIG shouldn't occur. */
              abort();
          } else {
            /* Successful conversion. */
            size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
            mbstate_t state = wcd->state;
            wchar_t wc;
            res = mbrtowc(&wc,buf,bufcount,&state);
            if (res == (size_t)(-2)) {
              /* Next try with one more input byte. */
            } else {
              if (res == (size_t)(-1)) {
                /* Invalid input. */
                if (wcd->parent.discard_ilseq) {
                }
                #ifndef LIBICONV_PLUG
                else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
                  /* Drop the contents of buf[] accumulated so far, and instead
                     pass all queued chars to the fallback handler. */
                  struct mb_to_wc_fallback_locals locals;
                  locals.l_outbuf = *outbuf;
                  locals.l_outbytesleft = *outbytesleft;
                  locals.l_errno = 0;
                  wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
                                                          mb_to_wc_write_replacement,
                                                          &locals,
                                                          wcd->parent.fallbacks.data);
                  if (locals.l_errno != 0) {
                    errno = locals.l_errno;
                    return -1;
                  }
                  /* Restoring the state is not needed because it is the initial
                     state anyway: For all known locale encodings, the multibyte
                     to wchar_t conversion doesn't have shift state, and we have
                     excluded partial accumulated characters. */
                  /* wcd->state = state; */
                  *inbuf += incount;
                  *inbytesleft -= incount;
                  *outbuf = locals.l_outbuf;
                  *outbytesleft = locals.l_outbytesleft;
                  result += 1;
                  break;
                }
                #endif
                else
                  return -1;
              } else {
                if (*outbytesleft < sizeof(wchar_t)) {
                  errno = E2BIG;
                  return -1;
                }
                *(wchar_t*) *outbuf = wc;
                /* Restoring the state is not needed because it is the initial
                   state anyway: For all known locale encodings, the multibyte
                   to wchar_t conversion doesn't have shift state, and we have
                   excluded partial accumulated characters. */
                /* wcd->state = state; */
                *outbuf += sizeof(wchar_t);
                *outbytesleft -= sizeof(wchar_t);
              }
              *inbuf += incount;
              *inbytesleft -= incount;
              result += res;
              break;
            }
          }
          incount++;
          if (incount > *inbytesleft) {
            /* Incomplete input. */
            errno = EINVAL;
            return -1;
          }
        }
      }
      return result;
    }
    
    static size_t wchar_to_loop_reset (iconv_t icd,
                                       char* * outbuf, size_t *outbytesleft)
    {
      struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
      size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
      if (res == (size_t)(-1))
        return res;
      memset(&wcd->state,0,sizeof(mbstate_t));
      return 0;
    }
    
    #endif
    
    
    /* From wchar_t to wchar_t. */
    
    static size_t wchar_id_loop_convert (iconv_t icd,
                                         const char* * inbuf, size_t *inbytesleft,
                                         char* * outbuf, size_t *outbytesleft)
    {
      struct conv_struct * cd = (struct conv_struct *) icd;
      const wchar_t* inptr = (const wchar_t*) *inbuf;
      size_t inleft = *inbytesleft / sizeof(wchar_t);
      wchar_t* outptr = (wchar_t*) *outbuf;
      size_t outleft = *outbytesleft / sizeof(wchar_t);
      size_t count = (inleft <= outleft ? inleft : outleft);
      if (count > 0) {
        *inbytesleft -= count * sizeof(wchar_t);
        *outbytesleft -= count * sizeof(wchar_t);
        do {
          wchar_t wc = *inptr++;
          *outptr++ = wc;
          #ifndef LIBICONV_PLUG
          if (cd->hooks.wc_hook)
            (*cd->hooks.wc_hook)(wc, cd->hooks.data);
          #endif
        } while (--count > 0);
        *inbuf = (const char*) inptr;
        *outbuf = (char*) outptr;
      }
      return 0;
    }
    
    static size_t wchar_id_loop_reset (iconv_t icd,
                                       char* * outbuf, size_t *outbytesleft)
    {
      return 0;
    }