Add faster Huffman encoding implementation, not yet used in production.

PiperOrigin-RevId: 335094006
Change-Id: I93187fb8a22cfb58dd919fb8deb1bcb906589adf
diff --git a/http2/hpack/huffman/hpack_huffman_encoder.cc b/http2/hpack/huffman/hpack_huffman_encoder.cc
index 305bb5a..5cfe2c9 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder.cc
+++ b/http2/hpack/huffman/hpack_huffman_encoder.cc
@@ -110,4 +110,44 @@
   }
 }
 
+void HuffmanEncodeFast(quiche::QuicheStringPiece input,
+                       size_t encoded_size,
+                       std::string* output) {
+  const size_t original_size = output->size();
+  const size_t final_size = original_size + encoded_size;
+  // Reserve an extra four bytes to avoid accessing unallocated memory (even
+  // though it would only be OR'd with zeros and thus not modified).
+  output->resize(final_size + 4, 0);
+
+  // Pointer to first appended byte.
+  char* const first = output->data() + original_size;
+  size_t bit_counter = 0;
+  for (uint8_t c : input) {
+    // Align the Huffman code to byte boundaries as it needs to be written.
+    // The longest Huffman code is 30 bits long, and it can be shifted by up to
+    // 7 bits, requiring 37 bits in total.  The most significant 24 bits and
+    // least significant 3 bits of |code| are always zero.
+    uint64_t code = static_cast<uint64_t>(HuffmanSpecTables::kLeftCodes[c])
+                    << (8 - (bit_counter % 8));
+    // The byte where the first bit of |code| needs to be written.
+    char* const current = first + (bit_counter / 8);
+    *current |= code >> 32;
+    *(current + 1) |= (code >> 24) & 0xff;
+    *(current + 2) |= (code >> 16) & 0xff;
+    *(current + 3) |= (code >> 8) & 0xff;
+    *(current + 4) |= code & 0xff;
+
+    bit_counter += HuffmanSpecTables::kCodeLengths[c];
+  }
+
+  DCHECK_EQ(encoded_size, (bit_counter + 7) / 8);
+
+  // EOF
+  if (bit_counter % 8 != 0) {
+    *(first + encoded_size - 1) |= 0xff >> (bit_counter & 7);
+  }
+
+  output->resize(final_size);
+}
+
 }  // namespace http2
diff --git a/http2/hpack/huffman/hpack_huffman_encoder.h b/http2/hpack/huffman/hpack_huffman_encoder.h
index b7012b4..fd9912c 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder.h
+++ b/http2/hpack/huffman/hpack_huffman_encoder.h
@@ -38,6 +38,13 @@
                                          size_t encoded_size,
                                          std::string* huffman);
 
+// Encode |input| with the Huffman encoding defined RFC7541, used in HPACK and
+// QPACK.  |encoded_size| must be the value returned by ExactHuffmanSize().
+// Appends the result to the end of |*output|.
+QUICHE_EXPORT_PRIVATE void HuffmanEncodeFast(quiche::QuicheStringPiece input,
+                                             size_t encoded_size,
+                                             std::string* output);
+
 }  // namespace http2
 
 #endif  // QUICHE_HTTP2_HPACK_HUFFMAN_HPACK_HUFFMAN_ENCODER_H_
diff --git a/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc b/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc
index 9e4163f..2870bec 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc
+++ b/http2/hpack/huffman/hpack_huffman_encoder_benchmark.cc
@@ -9,14 +9,36 @@
 //
 // Benchmark                  Time(ns)  CPU(ns) Allocs Iterations
 // -----------------------------------------------------------------------------
-// BM_EncodeSmallStrings           256       256   0  2614132   0.000B  peak-mem
-// BM_EncodeLargeString/1k        4295      4296   1   164072 656.000B  peak-mem
-// BM_EncodeLargeString/4k       17077     17078   1    40336   2.516kB peak-mem
-// BM_EncodeLargeString/32k     136586    136584   1     5126  20.016kB peak-mem
-// BM_EncodeLargeString/256k   1094913   1094867   1      632 160.016kB peak-mem
-// BM_EncodeLargeString/2M     8771916   8773555   1       80   1.250MB peak-mem
-// BM_EncodeLargeString/16M   72575563  72590767   1       10  10.000MB peak-mem
-// BM_EncodeLargeString/128M 602461676 602487805   1        1  80.000MB peak-mem
+// BM_EncodeSmallStrings           255        255  0  2557738    0.000B peak-mem
+// BM_E.SmallStringsFast           247        247  0  2849201    0.000B peak-mem
+// BM_EncodeLargeString/1k        4270       4271  1   166430  656.000B peak-mem
+// BM_EncodeLargeString/4k       16819      16820  1    40483   2.516kB peak-mem
+// BM_EncodeLargeString/32k     135426     135416  1     5155  20.016kB peak-mem
+// BM_EncodeLargeString/256k   1080893    1080922  1      647 160.016kB peak-mem
+// BM_EncodeLargeString/2M     8698878    8699261  1       77   1.250MB peak-mem
+// BM_EncodeLargeString/16M   70013626   70009631  1       10  10.000MB peak-mem
+// BM_EncodeLargeString/128M 581697663  581739687  1        1  80.000MB peak-mem
+// BM_E.LargeStringFast/1k        3820       3820  1   184203  656.000B peak-mem
+// BM_E.LargeStringFast/4k       15148      15146  1    46341   2.516kB peak-mem
+// BM_E.LargeStringFast/32k     120409     120426  1     5803  20.016kB peak-mem
+// BM_E.LargeStringFast/256k    968802     968841  1      725 160.016kB peak-mem
+// BM_E.LargeStringFast/2M     7769441    7767875  1       90   1.250MB peak-mem
+// BM_E.LargeStringFast/16M   62571561   62581958  1       10  10.000MB peak-mem
+// BM_E.LargeStringFast/128M 527393576  527376986  1        1  80.000MB peak-mem
+// BM_EncodeLongCode/1k          15197      15200  1    45281   3.766kB peak-mem
+// BM_EncodeLongCode/4k          60782      60775  1    10000  15.016kB peak-mem
+// BM_EncodeLongCode/32k        489516     489692  1     1441 120.016kB peak-mem
+// BM_EncodeLongCode/256k      3902949    3905536  1      179 960.016kB peak-mem
+// BM_EncodeLongCode/2M       31275026   31281987  1       23   7.500MB peak-mem
+// BM_EncodeLongCode/16M     258391322  258361112  1        3  60.000MB peak-mem
+// BM_EncodeLongCode/128M   2098369854 2098398258  1        1 480.000MB peak-mem
+// BM_E.LongCodeFast/1k           3915       3915  1   179602   3.766kB peak-mem
+// BM_E.LongCodeFast/4k          15433      15435  1    45491  15.016kB peak-mem
+// BM_E.LongCodeFast/32k        124756     124750  1     5689 120.016kB peak-mem
+// BM_E.LongCodeFast/256k      1007523    1008291  1      700 960.016kB peak-mem
+// BM_E.LongCodeFast/2M        8129166    8132764  1       87   7.500MB peak-mem
+// BM_E.LongCodeFast/16M      72076964   72079949  1        9  60.000MB peak-mem
+// BM_E.LongCodeFast/128M    631963502  631948474  1        1 480.000MB peak-mem
 //
 
 #include <string>
@@ -44,6 +66,18 @@
   }
 }
 
+void BM_EncodeSmallStringsFast(benchmark::State& state) {
+  const std::vector<const std::string> inputs{
+      ":method", ":path", "cookie", "set-cookie", "vary", "accept-encoding"};
+  for (auto s : state) {
+    for (const auto& input : inputs) {
+      size_t encoded_size = ExactHuffmanSize(input);
+      std::string result;
+      HuffmanEncodeFast(input, encoded_size, &result);
+    }
+  }
+}
+
 void BM_EncodeLargeString(benchmark::State& state) {
   const std::string input(state.range(0), 'a');
   for (auto s : state) {
@@ -53,8 +87,43 @@
   }
 }
 
+void BM_EncodeLargeStringFast(benchmark::State& state) {
+  const std::string input(state.range(0), 'a');
+  for (auto s : state) {
+    size_t encoded_size = ExactHuffmanSize(input);
+    std::string result;
+    HuffmanEncodeFast(input, encoded_size, &result);
+  }
+}
+
+// 13 is one of the characters with the longest encoding: 30 bits.
+// This will never be run in production, because HuffmanEncode is only called on
+// strings that become shorter when encoded, but it gives an idea of compression
+// speed when many characters in the input are encoded with long codes.
+void BM_EncodeLongCode(benchmark::State& state) {
+  const std::string input(state.range(0), 13);
+  for (auto s : state) {
+    size_t encoded_size = ExactHuffmanSize(input);
+    std::string result;
+    HuffmanEncode(input, encoded_size, &result);
+  }
+}
+
+void BM_EncodeLongCodeFast(benchmark::State& state) {
+  const std::string input(state.range(0), 13);
+  for (auto s : state) {
+    size_t encoded_size = ExactHuffmanSize(input);
+    std::string result;
+    HuffmanEncodeFast(input, encoded_size, &result);
+  }
+}
+
 BENCHMARK(BM_EncodeSmallStrings);
 BENCHMARK(BM_EncodeLargeString)->Range(1024, 128 * 1024 * 1024);
+BENCHMARK(BM_EncodeLongCode)->Range(1024, 128 * 1024 * 1024);
+BENCHMARK(BM_EncodeSmallStringsFast);
+BENCHMARK(BM_EncodeLargeStringFast)->Range(1024, 128 * 1024 * 1024);
+BENCHMARK(BM_EncodeLongCodeFast)->Range(1024, 128 * 1024 * 1024);
 
 }  // namespace
 }  // namespace http2
diff --git a/http2/hpack/huffman/hpack_huffman_encoder_test.cc b/http2/hpack/huffman/hpack_huffman_encoder_test.cc
index 5528408..d73179c 100644
--- a/http2/hpack/huffman/hpack_huffman_encoder_test.cc
+++ b/http2/hpack/huffman/hpack_huffman_encoder_test.cc
@@ -11,7 +11,34 @@
 namespace http2 {
 namespace {
 
-TEST(HuffmanEncoderTest, SpecRequestExamples) {
+class HuffmanEncoderTest : public ::testing::TestWithParam<bool> {
+ protected:
+  HuffmanEncoderTest() : use_fast_encoder_(GetParam()) {}
+  virtual ~HuffmanEncoderTest() = default;
+
+  void Encode(quiche::QuicheStringPiece input,
+              size_t encoded_size,
+              std::string* output) {
+    use_fast_encoder_ ? HuffmanEncodeFast(input, encoded_size, output)
+                      : HuffmanEncode(input, encoded_size, output);
+  }
+
+  const bool use_fast_encoder_;
+};
+
+INSTANTIATE_TEST_SUITE_P(TwoEncoders, HuffmanEncoderTest, ::testing::Bool());
+
+TEST_P(HuffmanEncoderTest, Empty) {
+  std::string empty("");
+  size_t encoded_size = ExactHuffmanSize(empty);
+  EXPECT_EQ(0u, encoded_size);
+
+  std::string buffer;
+  Encode(empty, encoded_size, &buffer);
+  EXPECT_EQ("", buffer);
+}
+
+TEST_P(HuffmanEncoderTest, SpecRequestExamples) {
   std::string test_table[] = {
       Http2HexDecode("f1e3c2e5f23a6ba0ab90f4ff"),
       "www.example.com",
@@ -30,12 +57,12 @@
     EXPECT_EQ(BoundedHuffmanSize(plain_string), encoded_size);
     std::string buffer;
     buffer.reserve();
-    HuffmanEncode(plain_string, encoded_size, &buffer);
+    Encode(plain_string, encoded_size, &buffer);
     EXPECT_EQ(buffer, huffman_encoded) << "Error encoding " << plain_string;
   }
 }
 
-TEST(HuffmanEncoderTest, SpecResponseExamples) {
+TEST_P(HuffmanEncoderTest, SpecResponseExamples) {
   // clang-format off
   std::string test_table[] = {
     Http2HexDecode("6402"),
@@ -61,12 +88,12 @@
     EXPECT_EQ(huffman_encoded.size(), encoded_size);
     EXPECT_EQ(BoundedHuffmanSize(plain_string), encoded_size);
     std::string buffer;
-    HuffmanEncode(plain_string, encoded_size, &buffer);
+    Encode(plain_string, encoded_size, &buffer);
     EXPECT_EQ(buffer, huffman_encoded) << "Error encoding " << plain_string;
   }
 }
 
-TEST(HuffmanEncoderTest, EncodedSizeAgreesWithEncodeString) {
+TEST_P(HuffmanEncoderTest, EncodedSizeAgreesWithEncodeString) {
   std::string test_table[] = {
       "",
       "Mon, 21 Oct 2013 20:13:21 GMT",
@@ -85,12 +112,30 @@
     const std::string& plain_string = test_table[i];
     size_t encoded_size = ExactHuffmanSize(plain_string);
     std::string huffman_encoded;
-    HuffmanEncode(plain_string, encoded_size, &huffman_encoded);
+    Encode(plain_string, encoded_size, &huffman_encoded);
     EXPECT_EQ(encoded_size, huffman_encoded.size());
     EXPECT_LE(BoundedHuffmanSize(plain_string), plain_string.size());
     EXPECT_LE(BoundedHuffmanSize(plain_string), ExactHuffmanSize(plain_string));
   }
 }
 
+TEST_P(HuffmanEncoderTest, AppendToOutput) {
+  size_t encoded_size = ExactHuffmanSize("foo");
+  std::string buffer;
+  Encode("foo", encoded_size, &buffer);
+  EXPECT_EQ(Http2HexDecode("94e7"), buffer);
+
+  encoded_size = ExactHuffmanSize("bar");
+  Encode("bar", encoded_size, &buffer);
+
+  if (use_fast_encoder_) {
+    // HuffmanEncodeFast() appends to output allowing callers to eliminate copy.
+    EXPECT_EQ(Http2HexDecode("94e78c767f"), buffer);
+  } else {
+    // HuffmanEncode() clears output before encoding.
+    EXPECT_EQ(Http2HexDecode("8c767f"), buffer);
+  }
+}
+
 }  // namespace
 }  // namespace http2
diff --git a/http2/hpack/huffman/huffman_spec_tables.cc b/http2/hpack/huffman/huffman_spec_tables.cc
index 27707b9..2b1d8bd 100644
--- a/http2/hpack/huffman/huffman_spec_tables.cc
+++ b/http2/hpack/huffman/huffman_spec_tables.cc
@@ -46,9 +46,6 @@
 
 // TODO(jamessynge): Remove use of binary literals, that is a C++ 14 feature.
 
-// Uncomment these codes if needed for generating Huffman output, as opposed
-// to decoding Huffman input.
-/*
 // The encoding of each symbol, left justified (as printed), which means that
 // the first bit of the encoding is the high-order bit of the uint32.
 // static
@@ -311,7 +308,6 @@
     0b11111111111111111111101110000000,  // 0xff
     0b11111111111111111111111111111100,  // 0x100
 };
-*/
 
 // static
 const uint32_t HuffmanSpecTables::kRightCodes[] = {
diff --git a/http2/hpack/huffman/huffman_spec_tables.h b/http2/hpack/huffman/huffman_spec_tables.h
index 6cd8726..d1b144b 100644
--- a/http2/hpack/huffman/huffman_spec_tables.h
+++ b/http2/hpack/huffman/huffman_spec_tables.h
@@ -18,6 +18,10 @@
   // The encoding of each symbol, right justified (as printed), which means that
   // the last bit of the encoding is the low-order bit of the uint32.
   static const uint32_t kRightCodes[257];
+
+  // The encoding of each symbol, left justified (as printed), which means that
+  // the first bit of the encoding is the high-order bit of the uint32.
+  static const uint32_t kLeftCodes[257];
 };
 
 }  // namespace http2