Edit

thodg/libiconv/lib/loop_unicode.h

Branch :

  • Show log

    Commit

  • Author : Bruno Haible
    Date : 2001-02-05 19:36:41
    Hash : da1f6493
    Message : Many more transliterations.

  • lib/loop_unicode.h
  • /* Copyright (C) 1999-2001 Free Software Foundation, Inc.
       This file is part of the GNU ICONV Library.
    
       The GNU ICONV Library is free software; you can redistribute it and/or
       modify it under the terms of the GNU Library General Public License as
       published by the Free Software Foundation; either version 2 of the
       License, or (at your option) any later version.
    
       The GNU ICONV Library is distributed in the hope that it will be useful,
       but WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       Library General Public License for more details.
    
       You should have received a copy of the GNU Library General Public
       License along with the GNU ICONV Library; see the file COPYING.LIB.  If not,
       write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
       Boston, MA 02111-1307, USA.  */
    
    /* This file defines the conversion loop via Unicode as a pivot encoding. */
    
    static size_t unicode_loop_convert (iconv_t icd,
                                        const char* * inbuf, size_t *inbytesleft,
                                        char* * outbuf, size_t *outbytesleft)
    {
      conv_t cd = (conv_t) icd;
      size_t result = 0;
      const unsigned char* inptr = (const unsigned char*) *inbuf;
      size_t inleft = *inbytesleft;
      unsigned char* outptr = (unsigned char*) *outbuf;
      size_t outleft = *outbytesleft;
      while (inleft > 0) {
        ucs4_t wc;
        int incount;
        int outcount;
        incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
        if (incount <= 0) {
          if (incount == 0) {
            /* Case 1: invalid input */
            errno = EILSEQ;
            result = -1;
            break;
          }
          if (incount == -1) {
            /* Case 2: not enough bytes available to detect anything */
            errno = EINVAL;
            result = -1;
            break;
          }
          /* Case 3: k bytes read, but only a shift sequence */
          incount = -1-incount;
        } else {
          /* Case 4: k bytes read, making up a wide character */
          if (outleft == 0) {
            errno = E2BIG;
            result = -1;
            break;
          }
          outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
          if (outcount != 0)
            goto outcount_ok;
          /* Try transliteration. */
          result++;
          if (cd->transliterate) {
            if (cd->oflags & HAVE_HANGUL_JAMO) {
              /* Decompose Hangul into Jamo. Use double-width Jamo (contained
                 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
                 (contained in Unicode only). */
              ucs4_t buf[3];
              int ret = johab_hangul_decompose(cd,buf,wc);
              if (ret != RET_ILSEQ) {
                /* we know 1 <= ret <= 3 */
                state_t backup_state = cd->ostate;
                unsigned char* backup_outptr = outptr;
                size_t backup_outleft = outleft;
                int i, sub_outcount;
                for (i = 0; i < ret; i++) {
                  if (outleft == 0) {
                    sub_outcount = RET_TOOSMALL;
                    goto johab_hangul_failed;
                  }
                  sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                  if (sub_outcount <= 0)
                    goto johab_hangul_failed;
                  if (!(sub_outcount <= outleft)) abort();
                  outptr += sub_outcount; outleft -= sub_outcount;
                }
                goto char_done;
              johab_hangul_failed:
                cd->ostate = backup_state;
                outptr = backup_outptr;
                outleft = backup_outleft;
                if (sub_outcount < 0) {
                  errno = E2BIG;
                  result = -1;
                  break;
                }
              }
            }
            {
              /* Try to use a variant, but postfix it with
                 U+303E IDEOGRAPHIC VARIATION INDICATOR
                 (cf. Ken Lunde's "CJKV information processing", p. 188). */
              int indx = -1;
              if (wc == 0x3006)
                indx = 0;
              else if (wc == 0x30f6)
                indx = 1;
              else if (wc >= 0x4e00 && wc < 0xa000)
                indx = cjk_variants_indx[wc-0x4e00];
              if (indx >= 0) {
                for (;; indx++) {
                  ucs4_t buf[2];
                  unsigned short variant = cjk_variants[indx];
                  unsigned short last = variant & 0x8000;
                  variant &= 0x7fff;
                  variant += 0x3000;
                  buf[0] = variant; buf[1] = 0x303e;
                  {
                    state_t backup_state = cd->ostate;
                    unsigned char* backup_outptr = outptr;
                    size_t backup_outleft = outleft;
                    int i, sub_outcount;
                    for (i = 0; i < 2; i++) {
                      if (outleft == 0) {
                        sub_outcount = RET_TOOSMALL;
                        goto variant_failed;
                      }
                      sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                      if (sub_outcount <= 0)
                        goto variant_failed;
                      if (!(sub_outcount <= outleft)) abort();
                      outptr += sub_outcount; outleft -= sub_outcount;
                    }
                    goto char_done;
                  variant_failed:
                    cd->ostate = backup_state;
                    outptr = backup_outptr;
                    outleft = backup_outleft;
                    if (sub_outcount < 0) {
                      errno = E2BIG;
                      result = -1;
                      break;
                    }
                  }
                  if (last)
                    break;
                }
              }
            }
            if (wc >= 0x2018 && wc <= 0x201a) {
              /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
              ucs4_t substitute =
                (cd->oflags & HAVE_QUOTATION_MARKS
                 ? (wc == 0x201a ? 0x2018 : wc)
                 : (cd->oflags & HAVE_ACCENTS
                    ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
                    : 0x0027 /* use apostrophe */
                )  );
              outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
              if (outcount != 0)
                goto outcount_ok;
            }
            {
              /* Use the transliteration table. */
              int indx = translit_index(wc);
              if (indx >= 0) {
                const unsigned short * cp = &translit_data[indx];
                unsigned int num = *cp++;
                state_t backup_state = cd->ostate;
                unsigned char* backup_outptr = outptr;
                size_t backup_outleft = outleft;
                unsigned int i;
                int sub_outcount;
                for (i = 0; i < num; i++) {
                  if (outleft == 0) {
                    sub_outcount = RET_TOOSMALL;
                    goto translit_failed;
                  }
                  sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
                  if (sub_outcount <= 0)
                    goto translit_failed;
                  if (!(sub_outcount <= outleft)) abort();
                  outptr += sub_outcount; outleft -= sub_outcount;
                }
                goto char_done;
              translit_failed:
                cd->ostate = backup_state;
                outptr = backup_outptr;
                outleft = backup_outleft;
                if (sub_outcount < 0) {
                  errno = E2BIG;
                  result = -1;
                  break;
                }
              }
            }
          }
          outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
          if (outcount != 0)
            goto outcount_ok;
          errno = EILSEQ;
          result = -1;
          break;
        outcount_ok:
          if (outcount < 0) {
            errno = E2BIG;
            result = -1;
            break;
          }
          if (!(outcount <= outleft)) abort();
          outptr += outcount; outleft -= outcount;
        char_done:
          ;
        }
        if (!(incount <= inleft)) abort();
        inptr += incount; inleft -= incount;
      }
      *inbuf = (const char*) inptr;
      *inbytesleft = inleft;
      *outbuf = (char*) outptr;
      *outbytesleft = outleft;
      return result;
    }
    
    static size_t unicode_loop_reset (iconv_t icd,
                                      char* * outbuf, size_t *outbytesleft)
    {
      conv_t cd = (conv_t) icd;
      if (outbuf == NULL || *outbuf == NULL) {
        /* Reset the states. */
        memset(&cd->istate,'\0',sizeof(state_t));
        memset(&cd->ostate,'\0',sizeof(state_t));
        return 0;
      } else {
        if (cd->ofuncs.xxx_reset) {
          int outcount =
            cd->ofuncs.xxx_reset(cd, (unsigned char *) *outbuf, *outbytesleft);
          if (outcount < 0) {
            errno = E2BIG;
            return -1;
          }
          *outbuf += outcount; *outbytesleft -= outcount;
        }
        memset(&cd->istate,'\0',sizeof(state_t));
        memset(&cd->ostate,'\0',sizeof(state_t));
        return 0;
      }
    }