Edit

kc3-lang/brotli/dec/transform.h

Branch :

  • Show log

    Commit

  • Author : Eugene Kliuchnikov
    Date : 2016-06-03 10:51:04
    Hash : f1c9ab29
    Message : Extract common parts: constants, dictionary, etc.

  • dec/transform.h
  • /* Copyright 2013 Google Inc. All Rights Reserved.
    
       Distributed under MIT license.
       See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
    */
    
    /* Transformations on dictionary words. */
    
    #ifndef BROTLI_DEC_TRANSFORM_H_
    #define BROTLI_DEC_TRANSFORM_H_
    
    #include "../common/types.h"
    #include "./port.h"
    
    #if defined(__cplusplus) || defined(c_plusplus)
    extern "C" {
    #endif
    
    enum WordTransformType {
      kIdentity = 0,
      kOmitLast1 = 1,
      kOmitLast2 = 2,
      kOmitLast3 = 3,
      kOmitLast4 = 4,
      kOmitLast5 = 5,
      kOmitLast6 = 6,
      kOmitLast7 = 7,
      kOmitLast8 = 8,
      kOmitLast9 = 9,
      kUppercaseFirst = 10,
      kUppercaseAll = 11,
      kOmitFirst1 = 12,
      kOmitFirst2 = 13,
      kOmitFirst3 = 14,
      kOmitFirst4 = 15,
      kOmitFirst5 = 16,
      kOmitFirst6 = 17,
      kOmitFirst7 = 18,
      kOmitFirst8 = 19,
      kOmitFirst9 = 20
    };
    
    typedef struct {
      const uint8_t prefix_id;
      const uint8_t transform;
      const uint8_t suffix_id;
    } Transform;
    
    static const char kPrefixSuffix[208] =
        "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
        " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
        " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
        " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
    
    enum {
      /* EMPTY = ""
         SP = " "
         DQUOT = "\""
         SQUOT = "'"
         CLOSEBR = "]"
         OPEN = "("
         SLASH = "/"
         NBSP = non-breaking space "\0xc2\xa0"
      */
      kPFix_EMPTY = 0,
      kPFix_SP = 1,
      kPFix_COMMASP = 3,
      kPFix_SPofSPtheSP = 6,
      kPFix_SPtheSP = 9,
      kPFix_eSP = 12,
      kPFix_SPofSP = 15,
      kPFix_sSP = 20,
      kPFix_DOT = 23,
      kPFix_SPandSP = 25,
      kPFix_SPinSP = 31,
      kPFix_DQUOT = 36,
      kPFix_SPtoSP = 38,
      kPFix_DQUOTGT = 43,
      kPFix_NEWLINE = 46,
      kPFix_DOTSP = 48,
      kPFix_CLOSEBR = 51,
      kPFix_SPforSP = 53,
      kPFix_SPaSP = 59,
      kPFix_SPthatSP = 63,
      kPFix_SQUOT = 70,
      kPFix_SPwithSP = 72,
      kPFix_SPfromSP = 79,
      kPFix_SPbySP = 86,
      kPFix_OPEN = 91,
      kPFix_DOTSPTheSP = 93,
      kPFix_SPonSP = 100,
      kPFix_SPasSP = 105,
      kPFix_SPisSP = 110,
      kPFix_ingSP = 115,
      kPFix_NEWLINETAB = 120,
      kPFix_COLON = 123,
      kPFix_edSP = 125,
      kPFix_EQDQUOT = 129,
      kPFix_SPatSP = 132,
      kPFix_lySP = 137,
      kPFix_COMMA = 141,
      kPFix_EQSQUOT = 143,
      kPFix_DOTcomSLASH = 146,
      kPFix_DOTSPThisSP = 152,
      kPFix_SPnotSP = 160,
      kPFix_erSP = 166,
      kPFix_alSP = 170,
      kPFix_fulSP = 174,
      kPFix_iveSP = 179,
      kPFix_lessSP = 184,
      kPFix_estSP = 190,
      kPFix_izeSP = 195,
      kPFix_NBSP = 200,
      kPFix_ousSP = 203
    };
    
    static const Transform kTransforms[] = {
      { kPFix_EMPTY, kIdentity, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_SP },
      { kPFix_SP, kIdentity, kPFix_SP },
      { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
      { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
      { kPFix_SP, kIdentity, kPFix_EMPTY },
      { kPFix_sSP, kIdentity, kPFix_SP },
      { kPFix_EMPTY, kIdentity, kPFix_SPofSP },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_SPandSP },
      { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
      { kPFix_COMMASP, kIdentity, kPFix_SP },
      { kPFix_EMPTY, kIdentity, kPFix_COMMASP },
      { kPFix_SP, kUppercaseFirst, kPFix_SP },
      { kPFix_EMPTY, kIdentity, kPFix_SPinSP },
      { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
      { kPFix_eSP, kIdentity, kPFix_SP },
      { kPFix_EMPTY, kIdentity, kPFix_DQUOT },
      { kPFix_EMPTY, kIdentity, kPFix_DOT },
      { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
      { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
      { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
      { kPFix_EMPTY, kIdentity, kPFix_SPforSP },
      { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_SPaSP },
      { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
      { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_DOTSP },
      { kPFix_DOT, kIdentity, kPFix_EMPTY },
      { kPFix_SP, kIdentity, kPFix_COMMASP },
      { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
      { kPFix_EMPTY, kIdentity, kPFix_SQUOT },
      { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
      { kPFix_EMPTY, kIdentity, kPFix_SPbySP },
      { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
      { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
      { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_SPonSP },
      { kPFix_EMPTY, kIdentity, kPFix_SPasSP },
      { kPFix_EMPTY, kIdentity, kPFix_SPisSP },
      { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
      { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
      { kPFix_EMPTY, kIdentity, kPFix_COLON },
      { kPFix_SP, kIdentity, kPFix_DOTSP },
      { kPFix_EMPTY, kIdentity, kPFix_edSP },
      { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_OPEN },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
      { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_SPatSP },
      { kPFix_EMPTY, kIdentity, kPFix_lySP },
      { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
      { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
      { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
      { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
      { kPFix_DOT, kIdentity, kPFix_OPEN },
      { kPFix_EMPTY, kUppercaseAll, kPFix_SP },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
      { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
      { kPFix_SP, kIdentity, kPFix_DOT },
      { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
      { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
      { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
      { kPFix_EMPTY, kIdentity, kPFix_COMMA },
      { kPFix_DOT, kIdentity, kPFix_SP },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
      { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
      { kPFix_SP, kIdentity, kPFix_EQDQUOT },
      { kPFix_EMPTY, kIdentity, kPFix_erSP },
      { kPFix_SP, kUppercaseAll, kPFix_SP },
      { kPFix_EMPTY, kIdentity, kPFix_alSP },
      { kPFix_SP, kUppercaseAll, kPFix_EMPTY },
      { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
      { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
      { kPFix_SP, kIdentity, kPFix_OPEN },
      { kPFix_EMPTY, kIdentity, kPFix_fulSP },
      { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
      { kPFix_EMPTY, kIdentity, kPFix_iveSP },
      { kPFix_EMPTY, kIdentity, kPFix_lessSP },
      { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
      { kPFix_EMPTY, kIdentity, kPFix_estSP },
      { kPFix_SP, kUppercaseFirst, kPFix_DOT },
      { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
      { kPFix_SP, kIdentity, kPFix_EQSQUOT },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
      { kPFix_EMPTY, kIdentity, kPFix_izeSP },
      { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
      { kPFix_NBSP, kIdentity, kPFix_EMPTY },
      { kPFix_SP, kIdentity, kPFix_COMMA },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
      { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
      { kPFix_EMPTY, kIdentity, kPFix_ousSP },
      { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
      { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
      { kPFix_SP, kUppercaseFirst, kPFix_COMMA },
      { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
      { kPFix_SP, kUppercaseAll, kPFix_COMMASP },
      { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
      { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
      { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
      { kPFix_SP, kUppercaseAll, kPFix_DOT },
      { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
      { kPFix_SP, kUppercaseAll, kPFix_DOTSP },
      { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
      { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
      { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
    };
    
    static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
    
    static int ToUpperCase(uint8_t* p) {
      if (p[0] < 0xc0) {
        if (p[0] >= 'a' && p[0] <= 'z') {
          p[0] ^= 32;
        }
        return 1;
      }
      /* An overly simplified uppercasing model for utf-8. */
      if (p[0] < 0xe0) {
        p[1] ^= 32;
        return 2;
      }
      /* An arbitrary transform for three byte characters. */
      p[2] ^= 5;
      return 3;
    }
    
    static BROTLI_NOINLINE int TransformDictionaryWord(
        uint8_t* dst, const uint8_t* word, int len, int transform) {
      int idx = 0;
      {
        const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
        while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
      }
      {
        const int t = kTransforms[transform].transform;
        int i = 0;
        int skip = t - (kOmitFirst1 - 1);
        if (skip > 0) {
          word += skip;
          len -= skip;
        } else if (t <= kOmitLast9) {
          len -= t;
        }
        while (i < len) { dst[idx++] = word[i++]; }
        if (t == kUppercaseFirst) {
          ToUpperCase(&dst[idx - len]);
        } else if (t == kUppercaseAll) {
          uint8_t* uppercase = &dst[idx - len];
          while (len > 0) {
            int step = ToUpperCase(uppercase);
            uppercase += step;
            len -= step;
          }
        }
      }
      {
        const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
        while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
        return idx;
      }
    }
    
    #if defined(__cplusplus) || defined(c_plusplus)
    }  /* extern "C" */
    #endif
    
    #endif  /* BROTLI_DEC_TRANSFORM_H_ */