Edit

kc3-lang/libiconv/lib/loop_unicode.h

Branch :

  • Show log

    Commit

  • Author : Bruno Haible
    Date : 2021-06-06 11:51:12
    Hash : 91f96be0
    Message : Change the license of the library from LGPL 2.0 to LGPL 2.1.

  • lib/loop_unicode.h
  • /*
     * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
     * This file is part of the GNU LIBICONV Library.
     *
     * The GNU LIBICONV Library is free software; you can redistribute it
     * and/or modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either version 2.1
     * of the License, or (at your option) any later version.
     *
     * The GNU LIBICONV Library is distributed in the hope that it will be
     * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
     *
     * You should have received a copy of the GNU Lesser General Public
     * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
     * If not, see <https://www.gnu.org/licenses/>.
     */
    
    /* This file defines the conversion loop via Unicode as a pivot encoding. */
    
    /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
    static int unicode_transliterate (conv_t cd, ucs4_t wc,
                                      unsigned char* outptr, size_t outleft)
    {
      if (cd->oflags & HAVE_HANGUL_JAMO) {
        /* Decompose Hangul into Jamo. Use double-width Jamo (contained
           in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
           (contained in Unicode only). */
        ucs4_t buf[3];
        int ret = johab_hangul_decompose(cd,buf,wc);
        if (ret != RET_ILUNI) {
          /* we know 1 <= ret <= 3 */
          state_t backup_state = cd->ostate;
          unsigned char* backup_outptr = outptr;
          size_t backup_outleft = outleft;
          int i, sub_outcount;
          for (i = 0; i < ret; i++) {
            if (outleft == 0) {
              sub_outcount = RET_TOOSMALL;
              goto johab_hangul_failed;
            }
            sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
            if (sub_outcount <= RET_ILUNI)
              goto johab_hangul_failed;
            if (!(sub_outcount <= outleft)) abort();
            outptr += sub_outcount; outleft -= sub_outcount;
          }
          return outptr-backup_outptr;
        johab_hangul_failed:
          cd->ostate = backup_state;
          outptr = backup_outptr;
          outleft = backup_outleft;
          if (sub_outcount != RET_ILUNI)
            return RET_TOOSMALL;
        }
      }
      {
        /* Try to use a variant, but postfix it with
           U+303E IDEOGRAPHIC VARIATION INDICATOR
           (cf. Ken Lunde's "CJKV information processing", p. 188). */
        int indx = -1;
        if (wc == 0x3006)
          indx = 0;
        else if (wc == 0x30f6)
          indx = 1;
        else if (wc >= 0x4e00 && wc < 0xa000)
          indx = cjk_variants_indx[wc-0x4e00];
        if (indx >= 0) {
          for (;; indx++) {
            ucs4_t buf[2];
            unsigned short variant = cjk_variants[indx];
            unsigned short last = variant & 0x8000;
            variant &= 0x7fff;
            variant += 0x3000;
            buf[0] = variant; buf[1] = 0x303e;
            {
              state_t backup_state = cd->ostate;
              unsigned char* backup_outptr = outptr;
              size_t backup_outleft = outleft;
              int i, sub_outcount;
              for (i = 0; i < 2; i++) {
                if (outleft == 0) {
                  sub_outcount = RET_TOOSMALL;
                  goto variant_failed;
                }
                sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
                if (sub_outcount <= RET_ILUNI)
                  goto variant_failed;
                if (!(sub_outcount <= outleft)) abort();
                outptr += sub_outcount; outleft -= sub_outcount;
              }
              return outptr-backup_outptr;
            variant_failed:
              cd->ostate = backup_state;
              outptr = backup_outptr;
              outleft = backup_outleft;
              if (sub_outcount != RET_ILUNI)
                return RET_TOOSMALL;
            }
            if (last)
              break;
          }
        }
      }
      if (wc >= 0x2018 && wc <= 0x201a) {
        /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
        ucs4_t substitute =
          (cd->oflags & HAVE_QUOTATION_MARKS
           ? (wc == 0x201a ? 0x2018 : wc)
           : (cd->oflags & HAVE_ACCENTS
              ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
              : 0x0027 /* use apostrophe */
          )  );
        int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
        if (outcount != RET_ILUNI)
          return outcount;
      }
      {
        /* Use the transliteration table. */
        int indx = translit_index(wc);
        if (indx >= 0) {
          const unsigned int * cp = &translit_data[indx];
          unsigned int num = *cp++;
          state_t backup_state = cd->ostate;
          unsigned char* backup_outptr = outptr;
          size_t backup_outleft = outleft;
          unsigned int i;
          int sub_outcount;
          for (i = 0; i < num; i++) {
            if (outleft == 0) {
              sub_outcount = RET_TOOSMALL;
              goto translit_failed;
            }
            sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
            if (sub_outcount == RET_ILUNI)
              /* Recursive transliteration. */
              sub_outcount = unicode_transliterate(cd,cp[i],outptr,outleft);
            if (sub_outcount <= RET_ILUNI)
              goto translit_failed;
            if (!(sub_outcount <= outleft)) abort();
            outptr += sub_outcount; outleft -= sub_outcount;
          }
          return outptr-backup_outptr;
        translit_failed:
          cd->ostate = backup_state;
          outptr = backup_outptr;
          outleft = backup_outleft;
          if (sub_outcount != RET_ILUNI)
            return RET_TOOSMALL;
        }
      }
      return RET_ILUNI;
    }
    
    #ifndef LIBICONV_PLUG
    
    struct uc_to_mb_fallback_locals {
      unsigned char* l_outbuf;
      size_t l_outbytesleft;
      int l_errno;
    };
    
    static void uc_to_mb_write_replacement (const char *buf, size_t buflen,
                                            void* callback_arg)
    {
      struct uc_to_mb_fallback_locals * plocals =
        (struct uc_to_mb_fallback_locals *) callback_arg;
      /* Do nothing if already encountered an error in a previous call. */
      if (plocals->l_errno == 0) {
        /* Attempt to copy the passed buffer to the output buffer. */
        if (plocals->l_outbytesleft < buflen)
          plocals->l_errno = E2BIG;
        else {
          memcpy(plocals->l_outbuf, buf, buflen);
          plocals->l_outbuf += buflen;
          plocals->l_outbytesleft -= buflen;
        }
      }
    }
    
    struct mb_to_uc_fallback_locals {
      conv_t l_cd;
      unsigned char* l_outbuf;
      size_t l_outbytesleft;
      int l_errno;
    };
    
    static void mb_to_uc_write_replacement (const unsigned int *buf, size_t buflen,
                                            void* callback_arg)
    {
      struct mb_to_uc_fallback_locals * plocals =
        (struct mb_to_uc_fallback_locals *) callback_arg;
      /* Do nothing if already encountered an error in a previous call. */
      if (plocals->l_errno == 0) {
        /* Attempt to convert the passed buffer to the target encoding. */
        conv_t cd = plocals->l_cd;
        unsigned char* outptr = plocals->l_outbuf;
        size_t outleft = plocals->l_outbytesleft;
        for (; buflen > 0; buf++, buflen--) {
          ucs4_t wc = *buf;
          int outcount;
          if (outleft == 0) {
            plocals->l_errno = E2BIG;
            break;
          }
          outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
          if (outcount != RET_ILUNI)
            goto outcount_ok;
          /* Handle Unicode tag characters (range U+E0000..U+E007F). */
          if ((wc >> 7) == (0xe0000 >> 7))
            goto outcount_zero;
          /* Try transliteration. */
          if (cd->transliterate) {
            outcount = unicode_transliterate(cd,wc,outptr,outleft);
            if (outcount != RET_ILUNI)
              goto outcount_ok;
          }
          if (cd->discard_ilseq) {
            outcount = 0;
            goto outcount_ok;
          }
          #ifndef LIBICONV_PLUG
          else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
            struct uc_to_mb_fallback_locals locals;
            locals.l_outbuf = outptr;
            locals.l_outbytesleft = outleft;
            locals.l_errno = 0;
            cd->fallbacks.uc_to_mb_fallback(wc,
                                            uc_to_mb_write_replacement,
                                            &locals,
                                            cd->fallbacks.data);
            if (locals.l_errno != 0) {
              plocals->l_errno = locals.l_errno;
              break;
            }
            outptr = locals.l_outbuf;
            outleft = locals.l_outbytesleft;
            outcount = 0;
            goto outcount_ok;
          }
          #endif
          outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
          if (outcount != RET_ILUNI)
            goto outcount_ok;
          plocals->l_errno = EILSEQ;
          break;
        outcount_ok:
          if (outcount < 0) {
            plocals->l_errno = E2BIG;
            break;
          }
          #ifndef LIBICONV_PLUG
          if (cd->hooks.uc_hook)
            (*cd->hooks.uc_hook)(wc, cd->hooks.data);
          #endif
          if (!(outcount <= outleft)) abort();
          outptr += outcount; outleft -= outcount;
        outcount_zero: ;
        }
        plocals->l_outbuf = outptr;
        plocals->l_outbytesleft = outleft;
      }
    }
    
    #endif /* !LIBICONV_PLUG */
    
    static size_t unicode_loop_convert (iconv_t icd,
                                        const char* * inbuf, size_t *inbytesleft,
                                        char* * outbuf, size_t *outbytesleft)
    {
      conv_t cd = (conv_t) icd;
      size_t result = 0;
      const unsigned char* inptr = (const unsigned char*) *inbuf;
      size_t inleft = *inbytesleft;
      unsigned char* outptr = (unsigned char*) *outbuf;
      size_t outleft = *outbytesleft;
      while (inleft > 0) {
        state_t last_istate = cd->istate;
        ucs4_t wc;
        int incount;
        int outcount;
        incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
        if (incount < 0) {
          if ((unsigned int)(-1-incount) % 2 == (unsigned int)(-1-RET_ILSEQ) % 2) {
            /* Case 1: invalid input, possibly after a shift sequence */
            incount = DECODE_SHIFT_ILSEQ(incount);
            if (cd->discard_ilseq) {
              switch (cd->iindex) {
                case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
                case ei_utf32: case ei_utf32be: case ei_utf32le:
                case ei_ucs4internal: case ei_ucs4swapped:
                  incount += 4; break;
                case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
                case ei_utf16: case ei_utf16be: case ei_utf16le:
                case ei_ucs2internal: case ei_ucs2swapped:
                  incount += 2; break;
                default:
                  incount += 1; break;
              }
              goto outcount_zero;
            }
            #ifndef LIBICONV_PLUG
            else if (cd->fallbacks.mb_to_uc_fallback != NULL) {
              unsigned int incount2;
              struct mb_to_uc_fallback_locals locals;
              switch (cd->iindex) {
                case ei_ucs4: case ei_ucs4be: case ei_ucs4le:
                case ei_utf32: case ei_utf32be: case ei_utf32le:
                case ei_ucs4internal: case ei_ucs4swapped:
                  incount2 = 4; break;
                case ei_ucs2: case ei_ucs2be: case ei_ucs2le:
                case ei_utf16: case ei_utf16be: case ei_utf16le:
                case ei_ucs2internal: case ei_ucs2swapped:
                  incount2 = 2; break;
                default:
                  incount2 = 1; break;
              }
              locals.l_cd = cd;
              locals.l_outbuf = outptr;
              locals.l_outbytesleft = outleft;
              locals.l_errno = 0;
              cd->fallbacks.mb_to_uc_fallback((const char*)inptr+incount, incount2,
                                              mb_to_uc_write_replacement,
                                              &locals,
                                              cd->fallbacks.data);
              if (locals.l_errno != 0) {
                inptr += incount; inleft -= incount;
                errno = locals.l_errno;
                result = -1;
                break;
              }
              incount += incount2;
              outptr = locals.l_outbuf;
              outleft = locals.l_outbytesleft;
              result += 1;
              goto outcount_zero;
            }
            #endif
            inptr += incount; inleft -= incount;
            errno = EILSEQ;
            result = -1;
            break;
          }
          if (incount == RET_TOOFEW(0)) {
            /* Case 2: not enough bytes available to detect anything */
            errno = EINVAL;
            result = -1;
            break;
          }
          /* Case 3: k bytes read, but only a shift sequence */
          incount = DECODE_TOOFEW(incount);
        } else {
          /* Case 4: k bytes read, making up a wide character */
          if (outleft == 0) {
            cd->istate = last_istate;
            errno = E2BIG;
            result = -1;
            break;
          }
          outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
          if (outcount != RET_ILUNI)
            goto outcount_ok;
          /* Handle Unicode tag characters (range U+E0000..U+E007F). */
          if ((wc >> 7) == (0xe0000 >> 7))
            goto outcount_zero;
          /* Try transliteration. */
          result++;
          if (cd->transliterate) {
            outcount = unicode_transliterate(cd,wc,outptr,outleft);
            if (outcount != RET_ILUNI)
              goto outcount_ok;
          }
          if (cd->discard_ilseq) {
            outcount = 0;
            goto outcount_ok;
          }
          #ifndef LIBICONV_PLUG
          else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
            struct uc_to_mb_fallback_locals locals;
            locals.l_outbuf = outptr;
            locals.l_outbytesleft = outleft;
            locals.l_errno = 0;
            cd->fallbacks.uc_to_mb_fallback(wc,
                                            uc_to_mb_write_replacement,
                                            &locals,
                                            cd->fallbacks.data);
            if (locals.l_errno != 0) {
              cd->istate = last_istate;
              errno = locals.l_errno;
              return -1;
            }
            outptr = locals.l_outbuf;
            outleft = locals.l_outbytesleft;
            outcount = 0;
            goto outcount_ok;
          }
          #endif
          outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
          if (outcount != RET_ILUNI)
            goto outcount_ok;
          cd->istate = last_istate;
          errno = EILSEQ;
          result = -1;
          break;
        outcount_ok:
          if (outcount < 0) {
            cd->istate = last_istate;
            errno = E2BIG;
            result = -1;
            break;
          }
          #ifndef LIBICONV_PLUG
          if (cd->hooks.uc_hook)
            (*cd->hooks.uc_hook)(wc, cd->hooks.data);
          #endif
          if (!(outcount <= outleft)) abort();
          outptr += outcount; outleft -= outcount;
        }
      outcount_zero:
        if (!(incount <= inleft)) abort();
        inptr += incount; inleft -= incount;
      }
      *inbuf = (const char*) inptr;
      *inbytesleft = inleft;
      *outbuf = (char*) outptr;
      *outbytesleft = outleft;
      return result;
    }
    
    static size_t unicode_loop_reset (iconv_t icd,
                                      char* * outbuf, size_t *outbytesleft)
    {
      conv_t cd = (conv_t) icd;
      if (outbuf == NULL || *outbuf == NULL) {
        /* Reset the states. */
        memset(&cd->istate,'\0',sizeof(state_t));
        memset(&cd->ostate,'\0',sizeof(state_t));
        return 0;
      } else {
        size_t result = 0;
        if (cd->ifuncs.xxx_flushwc) {
          state_t last_istate = cd->istate;
          ucs4_t wc;
          if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
            unsigned char* outptr = (unsigned char*) *outbuf;
            size_t outleft = *outbytesleft;
            int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
            if (outcount != RET_ILUNI)
              goto outcount_ok;
            /* Handle Unicode tag characters (range U+E0000..U+E007F). */
            if ((wc >> 7) == (0xe0000 >> 7))
              goto outcount_zero;
            /* Try transliteration. */
            result++;
            if (cd->transliterate) {
              outcount = unicode_transliterate(cd,wc,outptr,outleft);
              if (outcount != RET_ILUNI)
                goto outcount_ok;
            }
            if (cd->discard_ilseq) {
              outcount = 0;
              goto outcount_ok;
            }
            #ifndef LIBICONV_PLUG
            else if (cd->fallbacks.uc_to_mb_fallback != NULL) {
              struct uc_to_mb_fallback_locals locals;
              locals.l_outbuf = outptr;
              locals.l_outbytesleft = outleft;
              locals.l_errno = 0;
              cd->fallbacks.uc_to_mb_fallback(wc,
                                              uc_to_mb_write_replacement,
                                              &locals,
                                              cd->fallbacks.data);
              if (locals.l_errno != 0) {
                cd->istate = last_istate;
                errno = locals.l_errno;
                return -1;
              }
              outptr = locals.l_outbuf;
              outleft = locals.l_outbytesleft;
              outcount = 0;
              goto outcount_ok;
            }
            #endif
            outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
            if (outcount != RET_ILUNI)
              goto outcount_ok;
            cd->istate = last_istate;
            errno = EILSEQ;
            return -1;
          outcount_ok:
            if (outcount < 0) {
              cd->istate = last_istate;
              errno = E2BIG;
              return -1;
            }
            #ifndef LIBICONV_PLUG
            if (cd->hooks.uc_hook)
              (*cd->hooks.uc_hook)(wc, cd->hooks.data);
            #endif
            if (!(outcount <= outleft)) abort();
            outptr += outcount;
            outleft -= outcount;
          outcount_zero:
            *outbuf = (char*) outptr;
            *outbytesleft = outleft;
          }
        }
        if (cd->ofuncs.xxx_reset) {
          unsigned char* outptr = (unsigned char*) *outbuf;
          size_t outleft = *outbytesleft;
          int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
          if (outcount < 0) {
            errno = E2BIG;
            return -1;
          }
          if (!(outcount <= outleft)) abort();
          *outbuf = (char*) (outptr + outcount);
          *outbytesleft = outleft - outcount;
        }
        memset(&cd->istate,'\0',sizeof(state_t));
        memset(&cd->ostate,'\0',sizeof(state_t));
        return result;
      }
    }