Add faster Huffman encoding implementation, not yet used in production.
PiperOrigin-RevId: 335094006
Change-Id: I93187fb8a22cfb58dd919fb8deb1bcb906589adf
diff --git a/http2/hpack/huffman/hpack_huffman_encoder.cc b/http2/hpack/huffman/hpack_huffman_encoder.cc
index 305bb5a..5cfe2c9 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder.cc
+++ b/http2/hpack/huffman/hpack_huffman_encoder.cc
@@ -110,4 +110,44 @@
}
}
+void HuffmanEncodeFast(quiche::QuicheStringPiece input,
+ size_t encoded_size,
+ std::string* output) {
+ const size_t original_size = output->size();
+ const size_t final_size = original_size + encoded_size;
+ // Reserve an extra four bytes to avoid accessing unallocated memory (even
+ // though it would only be OR'd with zeros and thus not modified).
+ output->resize(final_size + 4, 0);
+
+ // Pointer to first appended byte.
+ char* const first = output->data() + original_size;
+ size_t bit_counter = 0;
+ for (uint8_t c : input) {
+ // Align the Huffman code to byte boundaries as it needs to be written.
+ // The longest Huffman code is 30 bits long, and it can be shifted by up to
+ // 7 bits, requiring 37 bits in total. The most significant 24 bits and
+ // least significant 3 bits of |code| are always zero.
+ uint64_t code = static_cast<uint64_t>(HuffmanSpecTables::kLeftCodes[c])
+ << (8 - (bit_counter % 8));
+ // The byte where the first bit of |code| needs to be written.
+ char* const current = first + (bit_counter / 8);
+ *current |= code >> 32;
+ *(current + 1) |= (code >> 24) & 0xff;
+ *(current + 2) |= (code >> 16) & 0xff;
+ *(current + 3) |= (code >> 8) & 0xff;
+ *(current + 4) |= code & 0xff;
+
+ bit_counter += HuffmanSpecTables::kCodeLengths[c];
+ }
+
+ DCHECK_EQ(encoded_size, (bit_counter + 7) / 8);
+
+ // EOF
+ if (bit_counter % 8 != 0) {
+ *(first + encoded_size - 1) |= 0xff >> (bit_counter & 7);
+ }
+
+ output->resize(final_size);
+}
+
} // namespace http2
diff --git a/http2/hpack/huffman/hpack_huffman_encoder.h b/http2/hpack/huffman/hpack_huffman_encoder.h
index b7012b4..fd9912c 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder.h
+++ b/http2/hpack/huffman/hpack_huffman_encoder.h
@@ -38,6 +38,13 @@
size_t encoded_size,
std::string* huffman);
+// Encode |input| with the Huffman encoding defined RFC7541, used in HPACK and
+// QPACK. |encoded_size| must be the value returned by ExactHuffmanSize().
+// Appends the result to the end of |*output|.
+QUICHE_EXPORT_PRIVATE void HuffmanEncodeFast(quiche::QuicheStringPiece input,
+ size_t encoded_size,
+ std::string* output);
+
} // namespace http2
#endif // QUICHE_HTTP2_HPACK_HUFFMAN_HPACK_HUFFMAN_ENCODER_H_
diff --git a/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc b/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc
index 9e4163f..2870bec 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc
+++ b/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc
@@ -9,14 +9,36 @@
//
// Benchmark Time(ns) CPU(ns) Allocs Iterations
// -----------------------------------------------------------------------------
-// BM_EncodeSmallStrings 256 256 0 2614132 0.000B peak-mem
-// BM_EncodeLargeString/1k 4295 4296 1 164072 656.000B peak-mem
-// BM_EncodeLargeString/4k 17077 17078 1 40336 2.516kB peak-mem
-// BM_EncodeLargeString/32k 136586 136584 1 5126 20.016kB peak-mem
-// BM_EncodeLargeString/256k 1094913 1094867 1 632 160.016kB peak-mem
-// BM_EncodeLargeString/2M 8771916 8773555 1 80 1.250MB peak-mem
-// BM_EncodeLargeString/16M 72575563 72590767 1 10 10.000MB peak-mem
-// BM_EncodeLargeString/128M 602461676 602487805 1 1 80.000MB peak-mem
+// BM_EncodeSmallStrings 255 255 0 2557738 0.000B peak-mem
+// BM_E.SmallStringsFast 247 247 0 2849201 0.000B peak-mem
+// BM_EncodeLargeString/1k 4270 4271 1 166430 656.000B peak-mem
+// BM_EncodeLargeString/4k 16819 16820 1 40483 2.516kB peak-mem
+// BM_EncodeLargeString/32k 135426 135416 1 5155 20.016kB peak-mem
+// BM_EncodeLargeString/256k 1080893 1080922 1 647 160.016kB peak-mem
+// BM_EncodeLargeString/2M 8698878 8699261 1 77 1.250MB peak-mem
+// BM_EncodeLargeString/16M 70013626 70009631 1 10 10.000MB peak-mem
+// BM_EncodeLargeString/128M 581697663 581739687 1 1 80.000MB peak-mem
+// BM_E.LargeStringFast/1k 3820 3820 1 184203 656.000B peak-mem
+// BM_E.LargeStringFast/4k 15148 15146 1 46341 2.516kB peak-mem
+// BM_E.LargeStringFast/32k 120409 120426 1 5803 20.016kB peak-mem
+// BM_E.LargeStringFast/256k 968802 968841 1 725 160.016kB peak-mem
+// BM_E.LargeStringFast/2M 7769441 7767875 1 90 1.250MB peak-mem
+// BM_E.LargeStringFast/16M 62571561 62581958 1 10 10.000MB peak-mem
+// BM_E.LargeStringFast/128M 527393576 527376986 1 1 80.000MB peak-mem
+// BM_EncodeLongCode/1k 15197 15200 1 45281 3.766kB peak-mem
+// BM_EncodeLongCode/4k 60782 60775 1 10000 15.016kB peak-mem
+// BM_EncodeLongCode/32k 489516 489692 1 1441 120.016kB peak-mem
+// BM_EncodeLongCode/256k 3902949 3905536 1 179 960.016kB peak-mem
+// BM_EncodeLongCode/2M 31275026 31281987 1 23 7.500MB peak-mem
+// BM_EncodeLongCode/16M 258391322 258361112 1 3 60.000MB peak-mem
+// BM_EncodeLongCode/128M 2098369854 2098398258 1 1 480.000MB peak-mem
+// BM_E.LongCodeFast/1k 3915 3915 1 179602 3.766kB peak-mem
+// BM_E.LongCodeFast/4k 15433 15435 1 45491 15.016kB peak-mem
+// BM_E.LongCodeFast/32k 124756 124750 1 5689 120.016kB peak-mem
+// BM_E.LongCodeFast/256k 1007523 1008291 1 700 960.016kB peak-mem
+// BM_E.LongCodeFast/2M 8129166 8132764 1 87 7.500MB peak-mem
+// BM_E.LongCodeFast/16M 72076964 72079949 1 9 60.000MB peak-mem
+// BM_E.LongCodeFast/128M 631963502 631948474 1 1 480.000MB peak-mem
//
#include <string>
@@ -44,6 +66,18 @@
}
}
+void BM_EncodeSmallStringsFast(benchmark::State& state) {
+ const std::vector<const std::string> inputs{
+ ":method", ":path", "cookie", "set-cookie", "vary", "accept-encoding"};
+ for (auto s : state) {
+ for (const auto& input : inputs) {
+ size_t encoded_size = ExactHuffmanSize(input);
+ std::string result;
+ HuffmanEncodeFast(input, encoded_size, &result);
+ }
+ }
+}
+
void BM_EncodeLargeString(benchmark::State& state) {
const std::string input(state.range(0), 'a');
for (auto s : state) {
@@ -53,8 +87,43 @@
}
}
+void BM_EncodeLargeStringFast(benchmark::State& state) {
+ const std::string input(state.range(0), 'a');
+ for (auto s : state) {
+ size_t encoded_size = ExactHuffmanSize(input);
+ std::string result;
+ HuffmanEncodeFast(input, encoded_size, &result);
+ }
+}
+
+// 13 is one of the characters with the longest encoding: 30 bits.
+// This will never be run in production, because HuffmanEncode is only called on
+// strings that become shorter when encoded, but it gives an idea of compression
+// speed when many characters in the input are encoded with long codes.
+void BM_EncodeLongCode(benchmark::State& state) {
+ const std::string input(state.range(0), 13);
+ for (auto s : state) {
+ size_t encoded_size = ExactHuffmanSize(input);
+ std::string result;
+ HuffmanEncode(input, encoded_size, &result);
+ }
+}
+
+void BM_EncodeLongCodeFast(benchmark::State& state) {
+ const std::string input(state.range(0), 13);
+ for (auto s : state) {
+ size_t encoded_size = ExactHuffmanSize(input);
+ std::string result;
+ HuffmanEncodeFast(input, encoded_size, &result);
+ }
+}
+
BENCHMARK(BM_EncodeSmallStrings);
BENCHMARK(BM_EncodeLargeString)->Range(1024, 128 * 1024 * 1024);
+BENCHMARK(BM_EncodeLongCode)->Range(1024, 128 * 1024 * 1024);
+BENCHMARK(BM_EncodeSmallStringsFast);
+BENCHMARK(BM_EncodeLargeStringFast)->Range(1024, 128 * 1024 * 1024);
+BENCHMARK(BM_EncodeLongCodeFast)->Range(1024, 128 * 1024 * 1024);
} // namespace
} // namespace http2
diff --git a/http2/hpack/huffman/hpack_huffman_encoder_test.cc b/http2/hpack/huffman/hpack_huffman_encoder_test.cc
index 5528408..d73179c 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder_test.cc
+++ b/http2/hpack/huffman/hpack_huffman_encoder_test.cc
@@ -11,7 +11,34 @@
namespace http2 {
namespace {
-TEST(HuffmanEncoderTest, SpecRequestExamples) {
+class HuffmanEncoderTest : public ::testing::TestWithParam<bool> {
+ protected:
+ HuffmanEncoderTest() : use_fast_encoder_(GetParam()) {}
+ virtual ~HuffmanEncoderTest() = default;
+
+ void Encode(quiche::QuicheStringPiece input,
+ size_t encoded_size,
+ std::string* output) {
+ use_fast_encoder_ ? HuffmanEncodeFast(input, encoded_size, output)
+ : HuffmanEncode(input, encoded_size, output);
+ }
+
+ const bool use_fast_encoder_;
+};
+
+INSTANTIATE_TEST_SUITE_P(TwoEncoders, HuffmanEncoderTest, ::testing::Bool());
+
+TEST_P(HuffmanEncoderTest, Empty) {
+ std::string empty("");
+ size_t encoded_size = ExactHuffmanSize(empty);
+ EXPECT_EQ(0u, encoded_size);
+
+ std::string buffer;
+ Encode(empty, encoded_size, &buffer);
+ EXPECT_EQ("", buffer);
+}
+
+TEST_P(HuffmanEncoderTest, SpecRequestExamples) {
std::string test_table[] = {
Http2HexDecode("f1e3c2e5f23a6ba0ab90f4ff"),
"www.example.com",
@@ -30,12 +57,12 @@
EXPECT_EQ(BoundedHuffmanSize(plain_string), encoded_size);
std::string buffer;
buffer.reserve();
- HuffmanEncode(plain_string, encoded_size, &buffer);
+ Encode(plain_string, encoded_size, &buffer);
EXPECT_EQ(buffer, huffman_encoded) << "Error encoding " << plain_string;
}
}
-TEST(HuffmanEncoderTest, SpecResponseExamples) {
+TEST_P(HuffmanEncoderTest, SpecResponseExamples) {
// clang-format off
std::string test_table[] = {
Http2HexDecode("6402"),
@@ -61,12 +88,12 @@
EXPECT_EQ(huffman_encoded.size(), encoded_size);
EXPECT_EQ(BoundedHuffmanSize(plain_string), encoded_size);
std::string buffer;
- HuffmanEncode(plain_string, encoded_size, &buffer);
+ Encode(plain_string, encoded_size, &buffer);
EXPECT_EQ(buffer, huffman_encoded) << "Error encoding " << plain_string;
}
}
-TEST(HuffmanEncoderTest, EncodedSizeAgreesWithEncodeString) {
+TEST_P(HuffmanEncoderTest, EncodedSizeAgreesWithEncodeString) {
std::string test_table[] = {
"",
"Mon, 21 Oct 2013 20:13:21 GMT",
@@ -85,12 +112,30 @@
const std::string& plain_string = test_table[i];
size_t encoded_size = ExactHuffmanSize(plain_string);
std::string huffman_encoded;
- HuffmanEncode(plain_string, encoded_size, &huffman_encoded);
+ Encode(plain_string, encoded_size, &huffman_encoded);
EXPECT_EQ(encoded_size, huffman_encoded.size());
EXPECT_LE(BoundedHuffmanSize(plain_string), plain_string.size());
EXPECT_LE(BoundedHuffmanSize(plain_string), ExactHuffmanSize(plain_string));
}
}
+TEST_P(HuffmanEncoderTest, AppendToOutput) {
+ size_t encoded_size = ExactHuffmanSize("foo");
+ std::string buffer;
+ Encode("foo", encoded_size, &buffer);
+ EXPECT_EQ(Http2HexDecode("94e7"), buffer);
+
+ encoded_size = ExactHuffmanSize("bar");
+ Encode("bar", encoded_size, &buffer);
+
+ if (use_fast_encoder_) {
+ // HuffmanEncodeFast() appends to output allowing callers to eliminate copy.
+ EXPECT_EQ(Http2HexDecode("94e78c767f"), buffer);
+ } else {
+ // HuffmanEncode() clears output before encoding.
+ EXPECT_EQ(Http2HexDecode("8c767f"), buffer);
+ }
+}
+
} // namespace
} // namespace http2
diff --git a/http2/hpack/huffman/huffman_spec_tables.cc b/http2/hpack/huffman/huffman_spec_tables.cc
index 27707b9..2b1d8bd 100644
--- a/http2/hpack/huffman/huffman_spec_tables.cc
+++ b/http2/hpack/huffman/huffman_spec_tables.cc
@@ -46,9 +46,6 @@
// TODO(jamessynge): Remove use of binary literals, that is a C++ 14 feature.
-// Uncomment these codes if needed for generating Huffman output, as opposed
-// to decoding Huffman input.
-/*
// The encoding of each symbol, left justified (as printed), which means that
// the first bit of the encoding is the high-order bit of the uint32.
// static
@@ -311,7 +308,6 @@
0b11111111111111111111101110000000, // 0xff
0b11111111111111111111111111111100, // 0x100
};
-*/
// static
const uint32_t HuffmanSpecTables::kRightCodes[] = {
diff --git a/http2/hpack/huffman/huffman_spec_tables.h b/http2/hpack/huffman/huffman_spec_tables.h
index 6cd8726..d1b144b 100644
--- a/http2/hpack/huffman/huffman_spec_tables.h
+++ b/http2/hpack/huffman/huffman_spec_tables.h
@@ -18,6 +18,10 @@
// The encoding of each symbol, right justified (as printed), which means that
// the last bit of the encoding is the low-order bit of the uint32.
static const uint32_t kRightCodes[257];
+
+ // The encoding of each symbol, left justified (as printed), which means that
+ // the first bit of the encoding is the high-order bit of the uint32.
+ static const uint32_t kLeftCodes[257];
};
} // namespace http2