blob: daf85bc44d5010c4b7d720eac3855d9e2fc2de5d [file] [log] [blame]
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "http2/hpack/huffman/hpack_huffman_encoder.h"
#include "http2/hpack/huffman/huffman_spec_tables.h"
#include "http2/platform/api/http2_logging.h"
namespace http2 {
size_t HuffmanSize(absl::string_view plain) {
size_t bits = 0;
for (const uint8_t c : plain) {
bits += HuffmanSpecTables::kCodeLengths[c];
}
return (bits + 7) / 8;
}
void HuffmanEncode(absl::string_view plain,
size_t encoded_size,
std::string* huffman) {
QUICHE_DCHECK(huffman != nullptr);
huffman->reserve(huffman->size() + encoded_size);
uint64_t bit_buffer = 0; // High-bit is next bit to output. Not clear if that
// is more performant than having the low-bit be the
// last to be output.
size_t bits_unused = 64; // Number of bits available for the next code.
for (uint8_t c : plain) {
size_t code_length = HuffmanSpecTables::kCodeLengths[c];
if (bits_unused < code_length) {
// There isn't enough room in bit_buffer for the code of c.
// Flush until bits_unused > 56 (i.e. 64 - 8).
do {
char h = static_cast<char>(bit_buffer >> 56);
bit_buffer <<= 8;
bits_unused += 8;
// Perhaps would be more efficient if we populated an array of chars,
// so we don't have to call push_back each time. Reconsider if used
// for production.
huffman->push_back(h);
} while (bits_unused <= 56);
}
uint64_t code = HuffmanSpecTables::kRightCodes[c];
size_t shift_by = bits_unused - code_length;
bit_buffer |= (code << shift_by);
bits_unused -= code_length;
}
// bit_buffer contains (64-bits_unused) bits that still need to be flushed.
// Output whole bytes until we don't have any whole bytes left.
size_t bits_used = 64 - bits_unused;
while (bits_used >= 8) {
char h = static_cast<char>(bit_buffer >> 56);
bit_buffer <<= 8;
bits_used -= 8;
huffman->push_back(h);
}
if (bits_used > 0) {
// We have less than a byte left to output. The spec calls for padding out
// the final byte with the leading bits of the EOS symbol (30 1-bits).
constexpr uint64_t leading_eos_bits = 0b11111111;
bit_buffer |= (leading_eos_bits << (56 - bits_used));
char h = static_cast<char>(bit_buffer >> 56);
huffman->push_back(h);
}
}
void HuffmanEncodeFast(absl::string_view input,
size_t encoded_size,
std::string* output) {
const size_t original_size = output->size();
const size_t final_size = original_size + encoded_size;
// Reserve an extra four bytes to avoid accessing unallocated memory (even
// though it would only be OR'd with zeros and thus not modified).
output->resize(final_size + 4, 0);
// Pointer to first appended byte.
char* const first = &*output->begin() + original_size;
size_t bit_counter = 0;
for (uint8_t c : input) {
// Align the Huffman code to byte boundaries as it needs to be written.
// The longest Huffman code is 30 bits long, and it can be shifted by up to
// 7 bits, requiring 37 bits in total. The most significant 25 bits and
// least significant 2 bits of |code| are always zero.
uint64_t code = static_cast<uint64_t>(HuffmanSpecTables::kLeftCodes[c])
<< (8 - (bit_counter % 8));
// The byte where the first bit of |code| needs to be written.
char* const current = first + (bit_counter / 8);
bit_counter += HuffmanSpecTables::kCodeLengths[c];
*current |= code >> 32;
// Do not check if this write is zero before executing it, because with
// uniformly random shifts and an ideal random input distribution
// corresponding to the Huffman tree it would only be zero in 29% of the
// cases.
*(current + 1) |= (code >> 24) & 0xff;
// Continue to next input character if there is nothing else to write.
// (If next byte is zero, then rest must also be zero.)
if ((code & 0xff0000) == 0) {
continue;
}
*(current + 2) |= (code >> 16) & 0xff;
// Continue to next input character if there is nothing else to write.
// (If next byte is zero, then rest must also be zero.)
if ((code & 0xff00) == 0) {
continue;
}
*(current + 3) |= (code >> 8) & 0xff;
// Do not check if this write is zero, because the check would probably be
// as expensive as the write.
*(current + 4) |= code & 0xff;
}
QUICHE_DCHECK_EQ(encoded_size, (bit_counter + 7) / 8);
// EOF
if (bit_counter % 8 != 0) {
*(first + encoded_size - 1) |= 0xff >> (bit_counter & 7);
}
output->resize(final_size);
}
} // namespace http2