Edit

thodg/libiconv/src/iconv.c

Branch :

  • Show log

    Commit

  • Author : Bruno Haible
    Date : 2000-02-15 13:39:56
    Hash : d215f82d
    Message : Avoid compilation error with C++ compiler.

  • src/iconv.c
  • /* Copyright (C) 1999-2000 Free Software Foundation, Inc.
       This file is part of the GNU ICONV Library.
    
       The GNU ICONV Library is free software; you can redistribute it and/or
       modify it under the terms of the GNU Library General Public License as
       published by the Free Software Foundation; either version 2 of the
       License, or (at your option) any later version.
    
       The GNU ICONV Library is distributed in the hope that it will be useful,
       but WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       Library General Public License for more details.
    
       You should have received a copy of the GNU Library General Public
       License along with the GNU ICONV Library; see the file COPYING.LIB.  If not,
       write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
       Boston, MA 02111-1307, USA.  */
    
    #include <iconv.h>
    
    #include <stdlib.h>
    #include <string.h>
    #include "config.h"
    
    /*
     * Converters.
     */
    #include "converters.h"
    
    /*
     * Transliteration tables.
     */
    #include "cjk_variants.h"
    #include "translit.h"
    
    /*
     * Table of all supported encodings.
     */
    struct encoding {
      struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
      struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
      int oflags;                 /* flags for unicode -> multibyte conversion */
    };
    enum {
    #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
      ei_##xxx ,
    #include "encodings.def"
    #undef DEFENCODING
    ei_for_broken_compilers_that_dont_like_trailing_commas
    };
    #include "flags.h"
    static struct encoding const all_encodings[] = {
    #define DEFENCODING(xxx_names,xxx,xxx_ifuncs,xxx_ofuncs1,xxx_ofuncs2) \
      { xxx_ifuncs, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
    #include "encodings.def"
    #undef DEFENCODING
    };
    
    /*
     * Alias lookup function.
     * Defines
     *   struct alias { const char* name; unsigned int encoding_index; };
     *   const struct alias * aliases_lookup (const char *str, unsigned int len);
     *   #define MAX_WORD_LENGTH ...
     */
    #include "aliases.h"
    
    #if 0
    /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
       and the first string can be assumed to be in uppercase. */
    static int strequal (const char* str1, const char* str2)
    {
      unsigned char c1;
      unsigned char c2;
      for (;;) {
        c1 = * (unsigned char *) str1++;
        c2 = * (unsigned char *) str2++;
        if (c1 == 0)
          break;
        if (c2 >= 'a' && c2 <= 'z')
          c2 -= 'a'-'A';
        if (c1 != c2)
          break;
      }
      return (c1 == c2);
    }
    #endif
    
    iconv_t iconv_open (const char* tocode, const char* fromcode)
    {
      struct conv_struct * cd = (struct conv_struct *) malloc(sizeof(struct conv_struct));
      char buf[MAX_WORD_LENGTH+1];
      const char* cp;
      char* bp;
      const struct alias * ap;
      unsigned int count;
    
      if (cd == NULL) {
        errno = ENOMEM;
        return (iconv_t)(-1);
      }
      /* Before calling aliases_lookup, convert the input string to upper case,
       * and check whether it's entirely ASCII (we call gperf with option "-7"
       * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
       * or if it's too long, it is not a valid encoding name.
       */
      /* Search tocode in the table. */
      for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+1; ; cp++, bp++) {
        unsigned char c = * (unsigned char *) cp;
        if (c >= 0x80)
          goto invalid;
        if (c >= 'a' && c <= 'z')
          c -= 'a'-'A';
        *bp = c;
        if (c == '\0')
          break;
        if (--count == 0)
          goto invalid;
      }
      ap = aliases_lookup(buf,bp-buf);
      if (ap == NULL)
        goto invalid;
      cd->oindex = ap->encoding_index;
      cd->ofuncs = all_encodings[ap->encoding_index].ofuncs;
      cd->oflags = all_encodings[ap->encoding_index].oflags;
      /* Search fromcode in the table. */
      for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+1; ; cp++, bp++) {
        unsigned char c = * (unsigned char *) cp;
        if (c >= 0x80)
          goto invalid;
        if (c >= 'a' && c <= 'z')
          c -= 'a'-'A';
        *bp = c;
        if (c == '\0')
          break;
        if (--count == 0)
          goto invalid;
      }
      ap = aliases_lookup(buf,bp-buf);
      if (ap == NULL)
        goto invalid;
      cd->iindex = ap->encoding_index;
      cd->ifuncs = all_encodings[ap->encoding_index].ifuncs;
      /* Initialize the states. */
      memset(&cd->istate,'\0',sizeof(state_t));
      memset(&cd->ostate,'\0',sizeof(state_t));
      /* Initialize the operation flags. */
      cd->transliterate = 1;
      /* Done. */
      return (iconv_t)cd;
    invalid:
      errno = EINVAL;
      return (iconv_t)(-1);
    }
    
    size_t iconv (iconv_t icd,
                  ICONV_CONST char* * inbuf, size_t *inbytesleft,
                  char* * outbuf, size_t *outbytesleft)
    {
      conv_t cd = (conv_t) icd;
      if (inbuf == NULL || *inbuf == NULL) {
        if (outbuf == NULL || *outbuf == NULL) {
          /* Reset the states. */
          memset(&cd->istate,'\0',sizeof(state_t));
          memset(&cd->ostate,'\0',sizeof(state_t));
          return 0;
        } else {
          if (cd->ofuncs.xxx_reset) {
            int outcount =
              cd->ofuncs.xxx_reset(cd,(unsigned char*)*outbuf,*outbytesleft);
            if (outcount < 0) {
              errno = E2BIG;
              return -1;
            }
            *outbuf += outcount; *outbytesleft -= outcount;
          }
          memset(&cd->istate,'\0',sizeof(state_t));
          memset(&cd->ostate,'\0',sizeof(state_t));
          return 0;
        }
      } else {
        size_t result = 0;
        const unsigned char* inptr = (const unsigned char*) *inbuf;
        size_t inleft = *inbytesleft;
        unsigned char* outptr = (unsigned char*) *outbuf;
        size_t outleft = *outbytesleft;
        while (inleft > 0) {
          wchar_t wc;
          int incount;
          int outcount;
          incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
          if (incount <= 0) {
            if (incount == 0) {
              /* Case 1: invalid input */
              errno = EILSEQ;
              result = -1;
              break;
            }
            if (incount == -1) {
              /* Case 2: not enough bytes available to detect anything */
              errno = EINVAL;
              result = -1;
              break;
            }
            /* Case 3: k bytes read, but only a shift sequence */
            incount = -1-incount;
          } else {
            /* Case 4: k bytes read, making up a wide character */
            outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
            if (outcount != 0)
              goto outcount_ok;
            /* Try transliteration. */
            result++;
            if (cd->transliterate) {
              if (cd->oflags & HAVE_HANGUL_JAMO) {
                /* Decompose Hangul into Jamo. Use double-width Jamo (contained
                   in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
                   (contained in Unicode only). */
                wchar_t buf[3];
                int ret = johab_hangul_decompose(cd,buf,wc);
                if (ret != RET_ILSEQ) {
                  /* we know 1 <= ret <= 3 */
                  state_t backup_state = cd->ostate;
                  unsigned char* backup_outptr = outptr;
                  size_t backup_outleft = outleft;
                  int i, sub_outcount;
                  for (i = 0; i < ret; i++) {
                    if (outleft == 0) {
                      sub_outcount = RET_TOOSMALL;
                      goto johab_hangul_failed;
                    }
                    sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                    if (sub_outcount <= 0)
                      goto johab_hangul_failed;
                    if (!(sub_outcount <= outleft)) abort();
                    outptr += sub_outcount; outleft -= sub_outcount;
                  }
                  goto char_done;
                johab_hangul_failed:
                  cd->ostate = backup_state;
                  outptr = backup_outptr;
                  outleft = backup_outleft;
                  if (sub_outcount < 0) {
                    errno = E2BIG;
                    result = -1;
                    break;
                  }
                }
              }
              {
                /* Try to use a variant, but postfix it with
                   U+303E IDEOGRAPHIC VARIATION INDICATOR
                   (cf. Ken Lunde's "CJKV information processing", p. 188). */
                int indx = -1;
                if (wc == 0x3006)
                  indx = 0;
                else if (wc == 0x30f6)
                  indx = 1;
                else if (wc >= 0x4e00 && wc < 0xa000)
                  indx = cjk_variants_indx[wc-0x4e00];
                if (indx >= 0) {
                  for (;; indx++) {
                    wchar_t buf[2];
                    unsigned short variant = cjk_variants[indx];
                    unsigned short last = variant & 0x8000;
                    variant &= 0x7fff;
                    variant += 0x3000;
                    buf[0] = variant; buf[1] = 0x303e;
                    {
                      state_t backup_state = cd->ostate;
                      unsigned char* backup_outptr = outptr;
                      size_t backup_outleft = outleft;
                      int i, sub_outcount;
                      for (i = 0; i < 2; i++) {
                        if (outleft == 0) {
                          sub_outcount = RET_TOOSMALL;
                          goto variant_failed;
                        }
                        sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                        if (sub_outcount <= 0)
                          goto variant_failed;
                        if (!(sub_outcount <= outleft)) abort();
                        outptr += sub_outcount; outleft -= sub_outcount;
                      }
                      goto char_done;
                    variant_failed:
                      cd->ostate = backup_state;
                      outptr = backup_outptr;
                      outleft = backup_outleft;
                      if (sub_outcount < 0) {
                        errno = E2BIG;
                        result = -1;
                        break;
                      }
                    }
                    if (last)
                      break;
                  }
                }
              }
              if (wc >= 0x2018 && wc <= 0x201a) {
                /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
                wchar_t substitute =
                  (cd->oflags & HAVE_QUOTATION_MARKS
                   ? (wc == 0x201a ? 0x2018 : wc)
                   : (cd->oflags & HAVE_ACCENTS
                      ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
                      : 0x0027 /* use apostrophe */
                  )  );
                outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
                if (outcount != 0)
                  goto outcount_ok;
              }
              {
                /* Use the transliteration table. */
                int indx = translit_index(wc);
                if (indx >= 0) {
                  const unsigned char * cp = &translit_data[indx];
                  unsigned int num = *cp++;
                  state_t backup_state = cd->ostate;
                  unsigned char* backup_outptr = outptr;
                  size_t backup_outleft = outleft;
                  unsigned int i;
                  int sub_outcount;
                  for (i = 0; i < num; i++) {
                    if (outleft == 0) {
                      sub_outcount = RET_TOOSMALL;
                      goto translit_failed;
                    }
                    sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
                    if (sub_outcount <= 0)
                      goto translit_failed;
                    if (!(sub_outcount <= outleft)) abort();
                    outptr += sub_outcount; outleft -= sub_outcount;
                  }
                  goto char_done;
                translit_failed:
                  cd->ostate = backup_state;
                  outptr = backup_outptr;
                  outleft = backup_outleft;
                  if (sub_outcount < 0) {
                    errno = E2BIG;
                    result = -1;
                    break;
                  }
                }
              }
            }
            outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
            if (outcount != 0)
              goto outcount_ok;
            errno = EILSEQ;
            result = -1;
            break;
          outcount_ok:
            if (outcount < 0) {
              errno = E2BIG;
              result = -1;
              break;
            }
            if (!(outcount <= outleft)) abort();
            outptr += outcount; outleft -= outcount;
          char_done:
            ;
          }
          if (!(incount <= inleft)) abort();
          inptr += incount; inleft -= incount;
        }
        *inbuf = (ICONV_CONST char*) inptr;
        *inbytesleft = inleft;
        *outbuf = (char*) outptr;
        *outbytesleft = outleft;
        return result;
      }
    }
    
    int iconv_close (iconv_t icd)
    {
      conv_t cd = (conv_t) icd;
      free(cd);
      return 0;
    }
    
    #ifndef LIBICONV_PLUG
    
    int iconvctl (iconv_t icd, int request, void* argument)
    {
      conv_t cd = (conv_t) icd;
      switch (request) {
        case ICONV_TRIVIALP:
          *(int *)argument = (cd->iindex == cd->oindex ? 1 : 0);
          return 0;
        case ICONV_GET_TRANSLITERATE:
          *(int *)argument = cd->transliterate;
          return 0;
        case ICONV_SET_TRANSLITERATE:
          cd->transliterate = (*(const int *)argument ? 1 : 0);
          return 0;
        default:
          errno = EINVAL;
          return -1;
      }
    }
    
    #endif