Edit

thodg/libiconv/src/cp950.h

Branch :

  • Show log

    Commit

  • Author : Bruno Haible
    Date : 2000-01-04 21:56:56
    Hash : 00e960fc
    Message : Upgrade to libiconv-1.0.

  • src/cp950.h
  • /*
     * CP950
     */
    
    /*
     * Microsoft CP950 is a slightly extended and slightly modified version of
     * BIG5. The differences between the EASTASIA/OTHER/BIG5.TXT and
     * VENDORS/MICSFT/WINDOWS/CP950.TXT tables found on ftp.unicode.org are
     * as follows:
     *
     * 1. Some characters in the BIG5 range are defined differently:
     *
     *     code   BIG5.TXT                       CP950.TXT
     *    0xA145  0x2022 # BULLET                0x2027 # HYPHENATION POINT
     *    0xA14E  0xFF64 # HALFWIDTH IDEOGRAPHIC COMMA
     *                                           0xFE51 # SMALL IDEOGRAPHIC COMMA
     *    0xA15A    ---                          0x2574 # BOX DRAWINGS LIGHT LEFT
     *    0xA1C2  0x203E # OVERLINE              0x00AF # MACRON
     *    0xA1C3    ---                          0xFFE3 # FULLWIDTH MACRON
     *    0xA1C5    ---                          0x02CD # MODIFIER LETTER LOW MACRON
     *    0xA1E3  0x223C # TILDE OPERATOR        0xFF5E # FULLWIDTH TILDE
     *    0xA1F2  0x2641 # EARTH                 0x2295 # CIRCLED PLUS
     *    0xA1F3  0x2609 # SUN                   0x2299 # CIRCLED DOT OPERATOR
     *    0xA1FE    ---                          0xFF0F # FULLWIDTH SOLIDUS
     *    0xA240    ---                          0xFF3C # FULLWIDTH REVERSE SOLIDUS
     *    0xA241  0xFF0F # FULLWIDTH SOLIDUS     0x2215 # DIVISION SLASH
     *    0xA242  0xFF3C # FULLWIDTH REVERSE SOLIDUS
     *                                           0xFE68 # SMALL REVERSE SOLIDUS
     *    0xA244  0x00A5 # YEN SIGN              0xFFE5 # FULLWIDTH YEN SIGN
     *    0xA246  0x00A2 # CENT SIGN             0xFFE0 # FULLWIDTH CENT SIGN
     *    0xA247  0x00A3 # POUND SIGN            0xFFE1 # FULLWIDTH POUND SIGN
     *    0xA2CC    ---                          0x5341
     *    0xA2CE    ---                          0x5345
     *
     *    We don't implement these changes.
     *
     * 2. A small new row. See cp950ext.h.
     *
     * 3. CP950.TXT is lacking the range 0xC6A1..0xC7FC (Hiragana, Katakana,
     *    Cyrillic, circled digits, parenthesized digits).
     *
     *    We implement this omission, because said range is marked "uncertain"
     *    in the unicode.org BIG5 table.
     */
    
    #include "cp950ext.h"
    
    static int
    cp950_mbtowc (conv_t conv, wchar_t *pwc, const unsigned char *s, int n)
    {
      unsigned char c = *s;
      /* Code set 0 (ASCII) */
      if (c < 0x80)
        return ascii_mbtowc(conv,pwc,s,n);
      /* Code set 1 (BIG5 extended) */
      if (c >= 0xa1 && c < 0xff) {
        if (n < 2)
          return RET_TOOFEW(0);
        {
          unsigned char c2 = s[1];
          if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0xa1 && c2 < 0xff)) {
            if (!((c == 0xc6 && c2 >= 0xa1) || c == 0xc7)) {
              int ret = big5_mbtowc(conv,pwc,s,2);
              if (ret != RET_ILSEQ)
                return ret;
            }
          }
        }
        if (c == 0xf9) {
          int ret = cp950ext_mbtowc(conv,pwc,s,2);
          if (ret != RET_ILSEQ)
            return ret;
        }
      }
      return RET_ILSEQ;
    }
    
    static int
    cp950_wctomb (conv_t conv, unsigned char *r, wchar_t wc, int n)
    {
      unsigned char buf[2];
      int ret;
    
      /* Code set 0 (ASCII) */
      ret = ascii_wctomb(conv,r,wc,n);
      if (ret != RET_ILSEQ)
        return ret;
    
      /* Code set 1 (BIG5 extended) */
      ret = big5_wctomb(conv,buf,wc,2);
      if (ret != RET_ILSEQ) {
        if (ret != 2) abort();
        if (!((buf[0] == 0xc6 && buf[1] >= 0xa1) || buf[0] == 0xc7)) {
          if (n < 2)
            return RET_TOOSMALL;
          r[0] = buf[0];
          r[1] = buf[1];
          return 2;
        }
      }
      ret = cp950ext_wctomb(conv,buf,wc,2);
      if (ret != RET_ILSEQ) {
        if (ret != 2) abort();
        if (n < 2)
          return RET_TOOSMALL;
        r[0] = buf[0];
        r[1] = buf[1];
        return 2;
      }
    
      return RET_ILSEQ;
    }