Edit

kc3-lang/brotli/enc/context.h

Branch :

  • Show log

    Commit

  • Author : Eugene Kliuchnikov
    Date : 2016-09-21 17:20:36
    Hash : 0a63f99d
    Message : Update encoder * move `common/port.h` to `includes/port.h` * replace magic more magic numbers with constants * artificially limit window size to 2^18 for quality 0 and 1 * use fixed shifts for quality 0 and 1 hashes * removed `BrotliEncoderWriteMetadata` * added `BROTLI_OPERATION_EMIT_METADATA` instead * deprecated low-level API * fixed MSVC warnings

  • enc/context.h
  • /* Copyright 2013 Google Inc. All Rights Reserved.
    
       Distributed under MIT license.
       See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
    */
    
    /* Functions to map previous bytes into a context id. */
    
    #ifndef BROTLI_ENC_CONTEXT_H_
    #define BROTLI_ENC_CONTEXT_H_
    
    #include <brotli/port.h>
    #include <brotli/types.h>
    
    #if defined(__cplusplus) || defined(c_plusplus)
    extern "C" {
    #endif
    
    /* Second-order context lookup table for UTF8 byte streams.
    
       If p1 and p2 are the previous two bytes, we calculate the context as
    
         context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
    
       If the previous two bytes are ASCII characters (i.e. < 128), this will be
       equivalent to
    
         context = 4 * context1(p1) + context2(p2),
    
       where context1 is based on the previous byte in the following way:
    
         0  : non-ASCII control
         1  : \t, \n, \r
         2  : space
         3  : other punctuation
         4  : " '
         5  : %
         6  : ( < [ {
         7  : ) > ] }
         8  : , ; :
         9  : .
         10 : =
         11 : number
         12 : upper-case vowel
         13 : upper-case consonant
         14 : lower-case vowel
         15 : lower-case consonant
    
       and context2 is based on the second last byte:
    
         0 : control, space
         1 : punctuation
         2 : upper-case letter, number
         3 : lower-case letter
    
       If the last byte is ASCII, and the second last byte is not (in a valid UTF8
       stream it will be a continuation byte, value between 128 and 191), the
       context is the same as if the second last byte was an ASCII control or space.
    
       If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
       be a continuation byte and the context id is 2 or 3 depending on the LSB of
       the last byte and to a lesser extent on the second last byte if it is ASCII.
    
       If the last byte is a UTF8 continuation byte, the second last byte can be:
         - continuation byte: the next byte is probably ASCII or lead byte (assuming
           4-byte UTF8 characters are rare) and the context id is 0 or 1.
         - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
         - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
    
       The possible value combinations of the previous two bytes, the range of
       context ids and the type of the next byte is summarized in the table below:
    
       |--------\-----------------------------------------------------------------|
       |         \                         Last byte                              |
       | Second   \---------------------------------------------------------------|
       | last byte \    ASCII            |   cont. byte        |   lead byte      |
       |            \   (0-127)          |   (128-191)         |   (192-)         |
       |=============|===================|=====================|==================|
       |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
       |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
       |-------------|-------------------|---------------------|------------------|
       |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
       |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
       |-------------|-------------------|---------------------|------------------|
       |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
       |  (192-207)  |                   |  context: 0 - 1     |                  |
       |-------------|-------------------|---------------------|------------------|
       |  lead byte  | not valid         |  next: cont.        |  not valid       |
       |  (208-)     |                   |  context: 2 - 3     |                  |
       |-------------|-------------------|---------------------|------------------|
    */
    static const uint8_t kUTF8ContextLookup[512] = {
      /* Last byte. */
      /* */
      /* ASCII range. */
       0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
       8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
      44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
      12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
      52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
      12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
      60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
      /* UTF8 continuation byte range. */
      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
      0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
      /* UTF8 lead byte range. */
      2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
      2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
      2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
      2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
      /* Second last byte. */
      /* */
      /* ASCII range. */
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
      1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
      1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
      /* UTF8 continuation byte range. */
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      /* UTF8 lead byte range. */
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    };
    
    /* Context lookup table for small signed integers. */
    static const uint8_t kSigned3BitContextLookup[] = {
      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
    };
    
    typedef enum ContextType {
      CONTEXT_LSB6         = 0,
      CONTEXT_MSB6         = 1,
      CONTEXT_UTF8         = 2,
      CONTEXT_SIGNED       = 3
    } ContextType;
    
    static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
      switch (mode) {
        case CONTEXT_LSB6:
          return p1 & 0x3f;
        case CONTEXT_MSB6:
          return (uint8_t)(p1 >> 2);
        case CONTEXT_UTF8:
          return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
        case CONTEXT_SIGNED:
          return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
                           kSigned3BitContextLookup[p2]);
        default:
          return 0;
      }
    }
    
    #if defined(__cplusplus) || defined(c_plusplus)
    }  /* extern "C" */
    #endif
    
    #endif  /* BROTLI_ENC_CONTEXT_H_ */