Hash :
1428d541
Author :
Date :
2015-04-01T16:35:52
Proof-of-concept encoder for parallel compression. Add a version of the brotli encoder that compresses each meta-block independently, only using the original input data from previous meta-blocks and nothing from the compressor state. This is a proof-of-concept to show that the current format is flexible enough to support parallel multi-threaded compression.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Implementation of parallel Brotli compressor.
#include "./encode_parallel.h"
#include <algorithm>
#include <limits>
#include "./backward_references.h"
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./brotli_bit_stream.h"
#include "./cluster.h"
#include "./context.h"
#include "./metablock.h"
#include "./transform.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./hash.h"
#include "./histogram.h"
#include "./literal_cost.h"
#include "./prefix.h"
#include "./write_bits.h"
namespace brotli {
namespace {
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
// ASCII
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
// 2-byte UTF8
if (size > 1 &&
(input[0] & 0xe0) == 0xc0 &&
(input[1] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x1f) << 6) |
(input[1] & 0x3f));
if (*symbol > 0x7f) {
return 2;
}
}
// 3-byte UFT8
if (size > 2 &&
(input[0] & 0xf0) == 0xe0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x0f) << 12) |
((input[1] & 0x3f) << 6) |
(input[2] & 0x3f));
if (*symbol > 0x7ff) {
return 3;
}
}
// 4-byte UFT8
if (size > 3 &&
(input[0] & 0xf8) == 0xf0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80 &&
(input[3] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3f) << 12) |
((input[2] & 0x3f) << 6) |
(input[3] & 0x3f));
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
return 4;
}
}
// Not UTF8, emit a special symbol above the UTF8-code space
*symbol = 0x110000 | input[0];
return 1;
}
// Returns true if at least min_fraction of the data is UTF8-encoded.
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
size_t size_utf8 = 0;
for (size_t pos = 0; pos < length; ) {
int symbol;
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
pos += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}
return size_utf8 > min_fraction * length;
}
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
int num_direct_distance_codes,
int distance_postfix_bits) {
if (num_direct_distance_codes == 0 &&
distance_postfix_bits == 0) {
return;
}
for (int i = 0; i < cmds->size(); ++i) {
Command* cmd = &(*cmds)[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
distance_postfix_bits,
&cmd->dist_prefix_,
&cmd->dist_extra_);
}
}
}
bool WriteMetaBlockParallel(const BrotliParams& params,
const size_t block_size,
const uint8_t* input_buffer,
const size_t prefix_size,
const uint8_t* prefix_buffer,
const StaticDictionary* static_dict,
const bool is_first,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (block_size == 0 || (!is_last && block_size == 1)) {
return false;
}
const size_t input_size = is_last ? block_size : block_size - 1;
// Copy prefix + next input block into a continuous area.
size_t input_pos = prefix_size;
std::vector<uint8_t> input(prefix_size + input_size);
memcpy(&input[0], prefix_buffer, prefix_size);
memcpy(&input[input_pos], input_buffer, input_size);
// Since we don't have a ringbuffer, masking is a no-op.
// We use one less bit than the full range because some of the code uses
// mask + 1 as the size of the ringbuffer.
const size_t mask = std::numeric_limits<size_t>::max() >> 1;
// Decide about UTF8 mode.
static const double kMinUTF8Ratio = 0.75;
bool utf8_mode = IsMostlyUTF8(&input[input_pos], input_size, kMinUTF8Ratio);
// Compute literal costs.
std::vector<float> literal_cost(prefix_size + input_size);
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(input_pos, input_size, mask, mask,
&input[0], &literal_cost[0]);
} else {
EstimateBitCostsForLiterals(input_pos, input_size, mask, mask,
&input[0], &literal_cost[0]);
}
// Initialize hashers.
int hash_type = 9;
switch (params.mode) {
case BrotliParams::MODE_TEXT: hash_type = 8; break;
case BrotliParams::MODE_FONT: hash_type = 9; break;
default: break;
}
std::unique_ptr<Hashers> hashers(new Hashers());
hashers->Init(hash_type);
hashers->SetStaticDictionary(static_dict);
// Compute backward references.
int last_insert_len = 0;
int num_commands = 0;
double base_min_score = 8.115;
int max_backward_distance = (1 << params.lgwin) - 16;
int dist_cache[4] = { -4, -4, -4, -4 };
std::vector<Command> commands((input_size + 1) >> 1);
CreateBackwardReferences(
input_size, input_pos,
&input[0], mask,
&literal_cost[0], mask,
max_backward_distance,
base_min_score,
params.quality,
hashers.get(),
hash_type,
dist_cache,
&last_insert_len,
&commands[0],
&num_commands);
commands.resize(num_commands);
if (last_insert_len > 0) {
commands.push_back(Command(last_insert_len));
}
// Build the meta-block.
MetaBlockSplit mb;
int num_direct_distance_codes =
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
int distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
if (params.greedy_block_split) {
BuildMetaBlockGreedy(&input[0], input_pos, mask,
commands.data(), commands.size(), params.quality,
&mb);
} else {
RecomputeDistancePrefixes(&commands,
num_direct_distance_codes,
distance_postfix_bits);
BuildMetaBlock(&input[0], input_pos, mask,
commands,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
&mb);
}
// Set up the temporary output storage.
const size_t max_out_size = 2 * input_size + 500;
std::vector<uint8_t> storage(max_out_size);
int first_byte = 0;
int first_byte_bits = 0;
if (is_first) {
if (params.lgwin == 16) {
first_byte = 0;
first_byte_bits = 1;
} else {
first_byte = ((params.lgwin - 17) << 1) | 1;
first_byte_bits = 4;
}
}
storage[0] = first_byte;
int storage_ix = first_byte_bits;
// Store the meta-block to the temporary output.
if (!StoreMetaBlock(&input[0], input_pos, input_size, mask,
is_last, params.quality,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands.data(), commands.size(),
mb,
&storage_ix, &storage[0])) {
return false;
}
// If this is not the last meta-block, store a one-byte uncompressed
// meta-block so that the meta-block will end at a byte boundary.
if (!is_last &&
!StoreUncompressedMetaBlock(is_last, &input_buffer[input_size],
0, mask, 1,
&storage_ix, &storage[0])) {
return false;
}
// If the compressed data is too large, fall back to an uncompressed
// meta-block.
size_t output_size = storage_ix >> 3;
if (input_size + 4 < output_size) {
storage[0] = first_byte;
storage_ix = first_byte_bits;
if (!StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
input_size,
&storage_ix, &storage[0])) {
return false;
}
output_size = storage_ix >> 3;
}
// Copy the temporary output with size-check to the output.
if (output_size > *encoded_size) {
return false;
}
memcpy(encoded_buffer, &storage[0], output_size);
*encoded_size = output_size;
return true;
}
} // namespace
int BrotliCompressBufferParallel(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (*encoded_size == 0) {
// Output buffer needs at least one byte.
return 0;
} else if (input_size == 0) {
encoded_buffer[0] = 6;
*encoded_size = 1;
return 1;
}
// Sanitize params.
if (params.lgwin < kMinWindowBits) {
params.lgwin = kMinWindowBits;
} else if (params.lgwin > kMaxWindowBits) {
params.lgwin = kMaxWindowBits;
}
if (params.lgblock == 0) {
params.lgblock = 16;
if (params.quality >= 9 && params.lgwin > params.lgblock) {
params.lgblock = std::min(21, params.lgwin);
}
} else if (params.lgblock < kMinInputBlockBits) {
params.lgblock = kMinInputBlockBits;
} else if (params.lgblock > kMaxInputBlockBits) {
params.lgblock = kMaxInputBlockBits;
}
size_t max_input_block_size = 1 << params.lgblock;
std::vector<std::vector<uint8_t> > compressed_pieces;
StaticDictionary dict;
dict.Fill(params.enable_transforms);
// Compress block-by-block independently.
for (size_t pos = 0; pos < input_size; ) {
size_t input_block_size = std::min(max_input_block_size, input_size - pos);
size_t out_size = 1.2 * input_block_size + 1024;
std::vector<uint8_t> out(out_size);
if (!WriteMetaBlockParallel(params,
input_block_size,
&input_buffer[pos],
pos,
input_buffer,
&dict,
pos == 0,
pos + input_block_size == input_size,
&out_size,
&out[0])) {
return false;
}
out.resize(out_size);
compressed_pieces.push_back(out);
pos += input_block_size;
}
// Piece together the output.
size_t out_pos = 0;
for (int i = 0; i < compressed_pieces.size(); ++i) {
const std::vector<uint8_t>& out = compressed_pieces[i];
if (out_pos + out.size() > *encoded_size) {
return false;
}
memcpy(&encoded_buffer[out_pos], &out[0], out.size());
out_pos += out.size();
}
*encoded_size = out_pos;
return true;
}
} // namespace brotli