QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 1 | // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "net/third_party/quiche/src/http2/hpack/huffman/hpack_huffman_decoder.h" |
| 6 | |
| 7 | #include <bitset> |
| 8 | #include <limits> |
| 9 | |
QUICHE team | 61940b4 | 2019-03-07 23:32:27 -0500 | [diff] [blame] | 10 | #include "net/third_party/quiche/src/http2/platform/api/http2_logging.h" |
QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 11 | |
| 12 | // Terminology: |
| 13 | // |
| 14 | // Symbol - a plain text (unencoded) character (uint8), or the End-of-String |
| 15 | // (EOS) symbol, 256. |
| 16 | // |
| 17 | // Code - the sequence of bits used to encode a symbol, varying in length from |
| 18 | // 5 bits for the most common symbols (e.g. '0', '1', and 'a'), to |
| 19 | // 30 bits for the least common (e.g. the EOS symbol). |
| 20 | // For those symbols whose codes have the same length, their code values |
| 21 | // are sorted such that the lower symbol value has a lower code value. |
| 22 | // |
| 23 | // Canonical - a symbol's cardinal value when sorted first by code length, and |
| 24 | // then by symbol value. For example, canonical 0 is for ASCII '0' |
| 25 | // (uint8 value 0x30), which is the first of the symbols whose code |
| 26 | // is 5 bits long, and the last canonical is EOS, which is the last |
| 27 | // of the symbols whose code is 30 bits long. |
| 28 | |
| 29 | // TODO(jamessynge): Remove use of binary literals, that is a C++ 14 feature. |
| 30 | |
| 31 | namespace http2 { |
| 32 | namespace { |
| 33 | |
| 34 | // HuffmanCode is used to store the codes associated with symbols (a pattern of |
| 35 | // from 5 to 30 bits). |
| 36 | typedef uint32_t HuffmanCode; |
| 37 | |
| 38 | // HuffmanCodeBitCount is used to store a count of bits in a code. |
| 39 | typedef uint16_t HuffmanCodeBitCount; |
| 40 | |
| 41 | // HuffmanCodeBitSet is used for producing a string version of a code because |
| 42 | // std::bitset logs nicely. |
| 43 | typedef std::bitset<32> HuffmanCodeBitSet; |
| 44 | typedef std::bitset<64> HuffmanAccumulatorBitSet; |
| 45 | |
| 46 | static constexpr HuffmanCodeBitCount kMinCodeBitCount = 5; |
| 47 | static constexpr HuffmanCodeBitCount kMaxCodeBitCount = 30; |
| 48 | static constexpr HuffmanCodeBitCount kHuffmanCodeBitCount = |
| 49 | std::numeric_limits<HuffmanCode>::digits; |
| 50 | |
| 51 | static_assert(std::numeric_limits<HuffmanCode>::digits >= kMaxCodeBitCount, |
| 52 | "HuffmanCode isn't big enough."); |
| 53 | |
| 54 | static_assert(std::numeric_limits<HuffmanAccumulator>::digits >= |
| 55 | kMaxCodeBitCount, |
| 56 | "HuffmanAccumulator isn't big enough."); |
| 57 | |
| 58 | static constexpr HuffmanAccumulatorBitCount kHuffmanAccumulatorBitCount = |
| 59 | std::numeric_limits<HuffmanAccumulator>::digits; |
| 60 | static constexpr HuffmanAccumulatorBitCount kExtraAccumulatorBitCount = |
| 61 | kHuffmanAccumulatorBitCount - kHuffmanCodeBitCount; |
| 62 | |
| 63 | // PrefixInfo holds info about a group of codes that are all of the same length. |
| 64 | struct PrefixInfo { |
| 65 | // Given the leading bits (32 in this case) of the encoded string, and that |
| 66 | // they start with a code of length |code_length|, return the corresponding |
| 67 | // canonical for that leading code. |
| 68 | uint32_t DecodeToCanonical(HuffmanCode bits) const { |
| 69 | // What is the position of the canonical symbol being decoded within |
| 70 | // the canonical symbols of |length|? |
| 71 | HuffmanCode ordinal_in_length = |
| 72 | ((bits - first_code) >> (kHuffmanCodeBitCount - code_length)); |
| 73 | |
| 74 | // Combined with |canonical| to produce the position of the canonical symbol |
| 75 | // being decoded within all of the canonical symbols. |
| 76 | return first_canonical + ordinal_in_length; |
| 77 | } |
| 78 | |
| 79 | const HuffmanCode first_code; // First code of this length, left justified in |
| 80 | // the field (i.e. the first bit of the code is |
| 81 | // the high-order bit). |
| 82 | const uint16_t code_length; // Length of the prefix code |base|. |
| 83 | const uint16_t first_canonical; // First canonical symbol of this length. |
| 84 | }; |
| 85 | |
| 86 | inline std::ostream& operator<<(std::ostream& out, const PrefixInfo& v) { |
| 87 | return out << "{first_code: " << HuffmanCodeBitSet(v.first_code) |
| 88 | << ", code_length: " << v.code_length |
| 89 | << ", first_canonical: " << v.first_canonical << "}"; |
| 90 | } |
| 91 | |
| 92 | // Given |value|, a sequence of the leading bits remaining to be decoded, |
| 93 | // figure out which group of canonicals (by code length) that value starts |
| 94 | // with. This function was generated. |
| 95 | PrefixInfo PrefixToInfo(HuffmanCode value) { |
| 96 | if (value < 0b10111000000000000000000000000000) { |
| 97 | if (value < 0b01010000000000000000000000000000) { |
| 98 | return {0b00000000000000000000000000000000, 5, 0}; |
| 99 | } else { |
| 100 | return {0b01010000000000000000000000000000, 6, 10}; |
| 101 | } |
| 102 | } else { |
| 103 | if (value < 0b11111110000000000000000000000000) { |
| 104 | if (value < 0b11111000000000000000000000000000) { |
| 105 | return {0b10111000000000000000000000000000, 7, 36}; |
| 106 | } else { |
| 107 | return {0b11111000000000000000000000000000, 8, 68}; |
| 108 | } |
| 109 | } else { |
| 110 | if (value < 0b11111111110000000000000000000000) { |
| 111 | if (value < 0b11111111101000000000000000000000) { |
| 112 | if (value < 0b11111111010000000000000000000000) { |
| 113 | return {0b11111110000000000000000000000000, 10, 74}; |
| 114 | } else { |
| 115 | return {0b11111111010000000000000000000000, 11, 79}; |
| 116 | } |
| 117 | } else { |
| 118 | return {0b11111111101000000000000000000000, 12, 82}; |
| 119 | } |
| 120 | } else { |
| 121 | if (value < 0b11111111111111100000000000000000) { |
| 122 | if (value < 0b11111111111110000000000000000000) { |
| 123 | if (value < 0b11111111111100000000000000000000) { |
| 124 | return {0b11111111110000000000000000000000, 13, 84}; |
| 125 | } else { |
| 126 | return {0b11111111111100000000000000000000, 14, 90}; |
| 127 | } |
| 128 | } else { |
| 129 | return {0b11111111111110000000000000000000, 15, 92}; |
| 130 | } |
| 131 | } else { |
| 132 | if (value < 0b11111111111111110100100000000000) { |
| 133 | if (value < 0b11111111111111101110000000000000) { |
| 134 | if (value < 0b11111111111111100110000000000000) { |
| 135 | return {0b11111111111111100000000000000000, 19, 95}; |
| 136 | } else { |
| 137 | return {0b11111111111111100110000000000000, 20, 98}; |
| 138 | } |
| 139 | } else { |
| 140 | return {0b11111111111111101110000000000000, 21, 106}; |
| 141 | } |
| 142 | } else { |
| 143 | if (value < 0b11111111111111111110101000000000) { |
| 144 | if (value < 0b11111111111111111011000000000000) { |
| 145 | return {0b11111111111111110100100000000000, 22, 119}; |
| 146 | } else { |
| 147 | return {0b11111111111111111011000000000000, 23, 145}; |
| 148 | } |
| 149 | } else { |
| 150 | if (value < 0b11111111111111111111101111000000) { |
| 151 | if (value < 0b11111111111111111111100000000000) { |
| 152 | if (value < 0b11111111111111111111011000000000) { |
| 153 | return {0b11111111111111111110101000000000, 24, 174}; |
| 154 | } else { |
| 155 | return {0b11111111111111111111011000000000, 25, 186}; |
| 156 | } |
| 157 | } else { |
| 158 | return {0b11111111111111111111100000000000, 26, 190}; |
| 159 | } |
| 160 | } else { |
| 161 | if (value < 0b11111111111111111111111111110000) { |
| 162 | if (value < 0b11111111111111111111111000100000) { |
| 163 | return {0b11111111111111111111101111000000, 27, 205}; |
| 164 | } else { |
| 165 | return {0b11111111111111111111111000100000, 28, 224}; |
| 166 | } |
| 167 | } else { |
| 168 | return {0b11111111111111111111111111110000, 30, 253}; |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | } |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | // Mapping from canonical symbol (0 to 255) to actual symbol. |
| 180 | // clang-format off |
| 181 | constexpr unsigned char kCanonicalToSymbol[] = { |
| 182 | '0', '1', '2', 'a', 'c', 'e', 'i', 'o', |
| 183 | 's', 't', 0x20, '%', '-', '.', '/', '3', |
| 184 | '4', '5', '6', '7', '8', '9', '=', 'A', |
| 185 | '_', 'b', 'd', 'f', 'g', 'h', 'l', 'm', |
| 186 | 'n', 'p', 'r', 'u', ':', 'B', 'C', 'D', |
| 187 | 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', |
| 188 | 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', |
| 189 | 'U', 'V', 'W', 'Y', 'j', 'k', 'q', 'v', |
| 190 | 'w', 'x', 'y', 'z', '&', '*', ',', ';', |
| 191 | 'X', 'Z', '!', '\"', '(', ')', '?', '\'', |
| 192 | '+', '|', '#', '>', 0x00, '$', '@', '[', |
| 193 | ']', '~', '^', '}', '<', '`', '{', '\\', |
| 194 | 0xc3, 0xd0, 0x80, 0x82, 0x83, 0xa2, 0xb8, 0xc2, |
| 195 | 0xe0, 0xe2, 0x99, 0xa1, 0xa7, 0xac, 0xb0, 0xb1, |
| 196 | 0xb3, 0xd1, 0xd8, 0xd9, 0xe3, 0xe5, 0xe6, 0x81, |
| 197 | 0x84, 0x85, 0x86, 0x88, 0x92, 0x9a, 0x9c, 0xa0, |
| 198 | 0xa3, 0xa4, 0xa9, 0xaa, 0xad, 0xb2, 0xb5, 0xb9, |
| 199 | 0xba, 0xbb, 0xbd, 0xbe, 0xc4, 0xc6, 0xe4, 0xe8, |
| 200 | 0xe9, 0x01, 0x87, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, |
| 201 | 0x8f, 0x93, 0x95, 0x96, 0x97, 0x98, 0x9b, 0x9d, |
| 202 | 0x9e, 0xa5, 0xa6, 0xa8, 0xae, 0xaf, 0xb4, 0xb6, |
| 203 | 0xb7, 0xbc, 0xbf, 0xc5, 0xe7, 0xef, 0x09, 0x8e, |
| 204 | 0x90, 0x91, 0x94, 0x9f, 0xab, 0xce, 0xd7, 0xe1, |
| 205 | 0xec, 0xed, 0xc7, 0xcf, 0xea, 0xeb, 0xc0, 0xc1, |
| 206 | 0xc8, 0xc9, 0xca, 0xcd, 0xd2, 0xd5, 0xda, 0xdb, |
| 207 | 0xee, 0xf0, 0xf2, 0xf3, 0xff, 0xcb, 0xcc, 0xd3, |
| 208 | 0xd4, 0xd6, 0xdd, 0xde, 0xdf, 0xf1, 0xf4, 0xf5, |
| 209 | 0xf6, 0xf7, 0xf8, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, |
| 210 | 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0b, |
| 211 | 0x0c, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, |
| 212 | 0x15, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, |
| 213 | 0x1e, 0x1f, 0x7f, 0xdc, 0xf9, 0x0a, 0x0d, 0x16, |
| 214 | }; |
| 215 | // clang-format on |
| 216 | |
| 217 | constexpr size_t kShortCodeTableSize = 124; |
| 218 | struct ShortCodeInfo { |
| 219 | uint8_t symbol; |
| 220 | uint8_t length; |
| 221 | } kShortCodeTable[kShortCodeTableSize] = { |
| 222 | {0x30, 5}, // Match: 0b0000000, Symbol: 0 |
| 223 | {0x30, 5}, // Match: 0b0000001, Symbol: 0 |
| 224 | {0x30, 5}, // Match: 0b0000010, Symbol: 0 |
| 225 | {0x30, 5}, // Match: 0b0000011, Symbol: 0 |
| 226 | {0x31, 5}, // Match: 0b0000100, Symbol: 1 |
| 227 | {0x31, 5}, // Match: 0b0000101, Symbol: 1 |
| 228 | {0x31, 5}, // Match: 0b0000110, Symbol: 1 |
| 229 | {0x31, 5}, // Match: 0b0000111, Symbol: 1 |
| 230 | {0x32, 5}, // Match: 0b0001000, Symbol: 2 |
| 231 | {0x32, 5}, // Match: 0b0001001, Symbol: 2 |
| 232 | {0x32, 5}, // Match: 0b0001010, Symbol: 2 |
| 233 | {0x32, 5}, // Match: 0b0001011, Symbol: 2 |
| 234 | {0x61, 5}, // Match: 0b0001100, Symbol: a |
| 235 | {0x61, 5}, // Match: 0b0001101, Symbol: a |
| 236 | {0x61, 5}, // Match: 0b0001110, Symbol: a |
| 237 | {0x61, 5}, // Match: 0b0001111, Symbol: a |
| 238 | {0x63, 5}, // Match: 0b0010000, Symbol: c |
| 239 | {0x63, 5}, // Match: 0b0010001, Symbol: c |
| 240 | {0x63, 5}, // Match: 0b0010010, Symbol: c |
| 241 | {0x63, 5}, // Match: 0b0010011, Symbol: c |
| 242 | {0x65, 5}, // Match: 0b0010100, Symbol: e |
| 243 | {0x65, 5}, // Match: 0b0010101, Symbol: e |
| 244 | {0x65, 5}, // Match: 0b0010110, Symbol: e |
| 245 | {0x65, 5}, // Match: 0b0010111, Symbol: e |
| 246 | {0x69, 5}, // Match: 0b0011000, Symbol: i |
| 247 | {0x69, 5}, // Match: 0b0011001, Symbol: i |
| 248 | {0x69, 5}, // Match: 0b0011010, Symbol: i |
| 249 | {0x69, 5}, // Match: 0b0011011, Symbol: i |
| 250 | {0x6f, 5}, // Match: 0b0011100, Symbol: o |
| 251 | {0x6f, 5}, // Match: 0b0011101, Symbol: o |
| 252 | {0x6f, 5}, // Match: 0b0011110, Symbol: o |
| 253 | {0x6f, 5}, // Match: 0b0011111, Symbol: o |
| 254 | {0x73, 5}, // Match: 0b0100000, Symbol: s |
| 255 | {0x73, 5}, // Match: 0b0100001, Symbol: s |
| 256 | {0x73, 5}, // Match: 0b0100010, Symbol: s |
| 257 | {0x73, 5}, // Match: 0b0100011, Symbol: s |
| 258 | {0x74, 5}, // Match: 0b0100100, Symbol: t |
| 259 | {0x74, 5}, // Match: 0b0100101, Symbol: t |
| 260 | {0x74, 5}, // Match: 0b0100110, Symbol: t |
| 261 | {0x74, 5}, // Match: 0b0100111, Symbol: t |
| 262 | {0x20, 6}, // Match: 0b0101000, Symbol: (space) |
| 263 | {0x20, 6}, // Match: 0b0101001, Symbol: (space) |
| 264 | {0x25, 6}, // Match: 0b0101010, Symbol: % |
| 265 | {0x25, 6}, // Match: 0b0101011, Symbol: % |
| 266 | {0x2d, 6}, // Match: 0b0101100, Symbol: - |
| 267 | {0x2d, 6}, // Match: 0b0101101, Symbol: - |
| 268 | {0x2e, 6}, // Match: 0b0101110, Symbol: . |
| 269 | {0x2e, 6}, // Match: 0b0101111, Symbol: . |
| 270 | {0x2f, 6}, // Match: 0b0110000, Symbol: / |
| 271 | {0x2f, 6}, // Match: 0b0110001, Symbol: / |
| 272 | {0x33, 6}, // Match: 0b0110010, Symbol: 3 |
| 273 | {0x33, 6}, // Match: 0b0110011, Symbol: 3 |
| 274 | {0x34, 6}, // Match: 0b0110100, Symbol: 4 |
| 275 | {0x34, 6}, // Match: 0b0110101, Symbol: 4 |
| 276 | {0x35, 6}, // Match: 0b0110110, Symbol: 5 |
| 277 | {0x35, 6}, // Match: 0b0110111, Symbol: 5 |
| 278 | {0x36, 6}, // Match: 0b0111000, Symbol: 6 |
| 279 | {0x36, 6}, // Match: 0b0111001, Symbol: 6 |
| 280 | {0x37, 6}, // Match: 0b0111010, Symbol: 7 |
| 281 | {0x37, 6}, // Match: 0b0111011, Symbol: 7 |
| 282 | {0x38, 6}, // Match: 0b0111100, Symbol: 8 |
| 283 | {0x38, 6}, // Match: 0b0111101, Symbol: 8 |
| 284 | {0x39, 6}, // Match: 0b0111110, Symbol: 9 |
| 285 | {0x39, 6}, // Match: 0b0111111, Symbol: 9 |
| 286 | {0x3d, 6}, // Match: 0b1000000, Symbol: = |
| 287 | {0x3d, 6}, // Match: 0b1000001, Symbol: = |
| 288 | {0x41, 6}, // Match: 0b1000010, Symbol: A |
| 289 | {0x41, 6}, // Match: 0b1000011, Symbol: A |
| 290 | {0x5f, 6}, // Match: 0b1000100, Symbol: _ |
| 291 | {0x5f, 6}, // Match: 0b1000101, Symbol: _ |
| 292 | {0x62, 6}, // Match: 0b1000110, Symbol: b |
| 293 | {0x62, 6}, // Match: 0b1000111, Symbol: b |
| 294 | {0x64, 6}, // Match: 0b1001000, Symbol: d |
| 295 | {0x64, 6}, // Match: 0b1001001, Symbol: d |
| 296 | {0x66, 6}, // Match: 0b1001010, Symbol: f |
| 297 | {0x66, 6}, // Match: 0b1001011, Symbol: f |
| 298 | {0x67, 6}, // Match: 0b1001100, Symbol: g |
| 299 | {0x67, 6}, // Match: 0b1001101, Symbol: g |
| 300 | {0x68, 6}, // Match: 0b1001110, Symbol: h |
| 301 | {0x68, 6}, // Match: 0b1001111, Symbol: h |
| 302 | {0x6c, 6}, // Match: 0b1010000, Symbol: l |
| 303 | {0x6c, 6}, // Match: 0b1010001, Symbol: l |
| 304 | {0x6d, 6}, // Match: 0b1010010, Symbol: m |
| 305 | {0x6d, 6}, // Match: 0b1010011, Symbol: m |
| 306 | {0x6e, 6}, // Match: 0b1010100, Symbol: n |
| 307 | {0x6e, 6}, // Match: 0b1010101, Symbol: n |
| 308 | {0x70, 6}, // Match: 0b1010110, Symbol: p |
| 309 | {0x70, 6}, // Match: 0b1010111, Symbol: p |
| 310 | {0x72, 6}, // Match: 0b1011000, Symbol: r |
| 311 | {0x72, 6}, // Match: 0b1011001, Symbol: r |
| 312 | {0x75, 6}, // Match: 0b1011010, Symbol: u |
| 313 | {0x75, 6}, // Match: 0b1011011, Symbol: u |
| 314 | {0x3a, 7}, // Match: 0b1011100, Symbol: : |
| 315 | {0x42, 7}, // Match: 0b1011101, Symbol: B |
| 316 | {0x43, 7}, // Match: 0b1011110, Symbol: C |
| 317 | {0x44, 7}, // Match: 0b1011111, Symbol: D |
| 318 | {0x45, 7}, // Match: 0b1100000, Symbol: E |
| 319 | {0x46, 7}, // Match: 0b1100001, Symbol: F |
| 320 | {0x47, 7}, // Match: 0b1100010, Symbol: G |
| 321 | {0x48, 7}, // Match: 0b1100011, Symbol: H |
| 322 | {0x49, 7}, // Match: 0b1100100, Symbol: I |
| 323 | {0x4a, 7}, // Match: 0b1100101, Symbol: J |
| 324 | {0x4b, 7}, // Match: 0b1100110, Symbol: K |
| 325 | {0x4c, 7}, // Match: 0b1100111, Symbol: L |
| 326 | {0x4d, 7}, // Match: 0b1101000, Symbol: M |
| 327 | {0x4e, 7}, // Match: 0b1101001, Symbol: N |
| 328 | {0x4f, 7}, // Match: 0b1101010, Symbol: O |
| 329 | {0x50, 7}, // Match: 0b1101011, Symbol: P |
| 330 | {0x51, 7}, // Match: 0b1101100, Symbol: Q |
| 331 | {0x52, 7}, // Match: 0b1101101, Symbol: R |
| 332 | {0x53, 7}, // Match: 0b1101110, Symbol: S |
| 333 | {0x54, 7}, // Match: 0b1101111, Symbol: T |
| 334 | {0x55, 7}, // Match: 0b1110000, Symbol: U |
| 335 | {0x56, 7}, // Match: 0b1110001, Symbol: V |
| 336 | {0x57, 7}, // Match: 0b1110010, Symbol: W |
| 337 | {0x59, 7}, // Match: 0b1110011, Symbol: Y |
| 338 | {0x6a, 7}, // Match: 0b1110100, Symbol: j |
| 339 | {0x6b, 7}, // Match: 0b1110101, Symbol: k |
| 340 | {0x71, 7}, // Match: 0b1110110, Symbol: q |
| 341 | {0x76, 7}, // Match: 0b1110111, Symbol: v |
| 342 | {0x77, 7}, // Match: 0b1111000, Symbol: w |
| 343 | {0x78, 7}, // Match: 0b1111001, Symbol: x |
| 344 | {0x79, 7}, // Match: 0b1111010, Symbol: y |
| 345 | {0x7a, 7}, // Match: 0b1111011, Symbol: z |
| 346 | }; |
| 347 | |
| 348 | } // namespace |
| 349 | |
| 350 | HuffmanBitBuffer::HuffmanBitBuffer() { |
| 351 | Reset(); |
| 352 | } |
| 353 | |
| 354 | void HuffmanBitBuffer::Reset() { |
| 355 | accumulator_ = 0; |
| 356 | count_ = 0; |
| 357 | } |
| 358 | |
| 359 | size_t HuffmanBitBuffer::AppendBytes(Http2StringPiece input) { |
| 360 | HuffmanAccumulatorBitCount free_cnt = free_count(); |
| 361 | size_t bytes_available = input.size(); |
| 362 | if (free_cnt < 8 || bytes_available == 0) { |
| 363 | return 0; |
| 364 | } |
| 365 | |
| 366 | // Top up |accumulator_| until there isn't room for a whole byte. |
| 367 | size_t bytes_used = 0; |
| 368 | auto* ptr = reinterpret_cast<const uint8_t*>(input.data()); |
| 369 | do { |
| 370 | auto b = static_cast<HuffmanAccumulator>(*ptr++); |
| 371 | free_cnt -= 8; |
| 372 | accumulator_ |= (b << free_cnt); |
| 373 | ++bytes_used; |
| 374 | } while (free_cnt >= 8 && bytes_used < bytes_available); |
| 375 | count_ += (bytes_used * 8); |
| 376 | return bytes_used; |
| 377 | } |
| 378 | |
| 379 | HuffmanAccumulatorBitCount HuffmanBitBuffer::free_count() const { |
| 380 | return kHuffmanAccumulatorBitCount - count_; |
| 381 | } |
| 382 | |
| 383 | void HuffmanBitBuffer::ConsumeBits(HuffmanAccumulatorBitCount code_length) { |
| 384 | DCHECK_LE(code_length, count_); |
| 385 | accumulator_ <<= code_length; |
| 386 | count_ -= code_length; |
| 387 | } |
| 388 | |
| 389 | bool HuffmanBitBuffer::InputProperlyTerminated() const { |
| 390 | auto cnt = count(); |
| 391 | if (cnt < 8) { |
| 392 | if (cnt == 0) { |
| 393 | return true; |
| 394 | } |
| 395 | HuffmanAccumulator expected = ~(~HuffmanAccumulator() >> cnt); |
| 396 | // We expect all the bits below the high order |cnt| bits of accumulator_ |
| 397 | // to be cleared as we perform left shift operations while decoding. |
| 398 | DCHECK_EQ(accumulator_ & ~expected, 0u) |
| 399 | << "\n expected: " << HuffmanAccumulatorBitSet(expected) << "\n " |
| 400 | << *this; |
| 401 | return accumulator_ == expected; |
| 402 | } |
| 403 | return false; |
| 404 | } |
| 405 | |
| 406 | Http2String HuffmanBitBuffer::DebugString() const { |
| 407 | std::stringstream ss; |
| 408 | ss << "{accumulator: " << HuffmanAccumulatorBitSet(accumulator_) |
| 409 | << "; count: " << count_ << "}"; |
| 410 | return ss.str(); |
| 411 | } |
| 412 | |
| 413 | HpackHuffmanDecoder::HpackHuffmanDecoder() = default; |
| 414 | |
| 415 | HpackHuffmanDecoder::~HpackHuffmanDecoder() = default; |
| 416 | |
| 417 | bool HpackHuffmanDecoder::Decode(Http2StringPiece input, Http2String* output) { |
QUICHE team | 61940b4 | 2019-03-07 23:32:27 -0500 | [diff] [blame] | 418 | HTTP2_DVLOG(1) << "HpackHuffmanDecoder::Decode"; |
QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 419 | |
| 420 | // Fill bit_buffer_ from input. |
| 421 | input.remove_prefix(bit_buffer_.AppendBytes(input)); |
| 422 | |
| 423 | while (true) { |
QUICHE team | 61940b4 | 2019-03-07 23:32:27 -0500 | [diff] [blame] | 424 | HTTP2_DVLOG(3) << "Enter Decode Loop, bit_buffer_: " << bit_buffer_; |
QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 425 | if (bit_buffer_.count() >= 7) { |
| 426 | // Get high 7 bits of the bit buffer, see if that contains a complete |
| 427 | // code of 5, 6 or 7 bits. |
| 428 | uint8_t short_code = |
| 429 | bit_buffer_.value() >> (kHuffmanAccumulatorBitCount - 7); |
| 430 | DCHECK_LT(short_code, 128); |
| 431 | if (short_code < kShortCodeTableSize) { |
| 432 | ShortCodeInfo info = kShortCodeTable[short_code]; |
| 433 | bit_buffer_.ConsumeBits(info.length); |
| 434 | output->push_back(static_cast<char>(info.symbol)); |
| 435 | continue; |
| 436 | } |
| 437 | // The code is more than 7 bits long. Use PrefixToInfo, etc. to decode |
| 438 | // longer codes. |
| 439 | } else { |
| 440 | // We may have (mostly) drained bit_buffer_. If we can top it up, try |
| 441 | // using the table decoder above. |
| 442 | size_t byte_count = bit_buffer_.AppendBytes(input); |
| 443 | if (byte_count > 0) { |
| 444 | input.remove_prefix(byte_count); |
| 445 | continue; |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | HuffmanCode code_prefix = bit_buffer_.value() >> kExtraAccumulatorBitCount; |
QUICHE team | 61940b4 | 2019-03-07 23:32:27 -0500 | [diff] [blame] | 450 | HTTP2_DVLOG(3) << "code_prefix: " << HuffmanCodeBitSet(code_prefix); |
QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 451 | |
| 452 | PrefixInfo prefix_info = PrefixToInfo(code_prefix); |
QUICHE team | 61940b4 | 2019-03-07 23:32:27 -0500 | [diff] [blame] | 453 | HTTP2_DVLOG(3) << "prefix_info: " << prefix_info; |
QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 454 | DCHECK_LE(kMinCodeBitCount, prefix_info.code_length); |
| 455 | DCHECK_LE(prefix_info.code_length, kMaxCodeBitCount); |
| 456 | |
| 457 | if (prefix_info.code_length <= bit_buffer_.count()) { |
| 458 | // We have enough bits for one code. |
| 459 | uint32_t canonical = prefix_info.DecodeToCanonical(code_prefix); |
| 460 | if (canonical < 256) { |
| 461 | // Valid code. |
| 462 | char c = kCanonicalToSymbol[canonical]; |
| 463 | output->push_back(c); |
| 464 | bit_buffer_.ConsumeBits(prefix_info.code_length); |
| 465 | continue; |
| 466 | } |
| 467 | // Encoder is not supposed to explicity encode the EOS symbol. |
QUICHE team | 61940b4 | 2019-03-07 23:32:27 -0500 | [diff] [blame] | 468 | HTTP2_DLOG(ERROR) << "EOS explicitly encoded!\n " << bit_buffer_ << "\n " |
| 469 | << prefix_info; |
QUICHE team | fd50a40 | 2018-12-07 22:54:05 -0500 | [diff] [blame] | 470 | return false; |
| 471 | } |
| 472 | // bit_buffer_ doesn't have enough bits in it to decode the next symbol. |
| 473 | // Append to it as many bytes as are available AND fit. |
| 474 | size_t byte_count = bit_buffer_.AppendBytes(input); |
| 475 | if (byte_count == 0) { |
| 476 | DCHECK_EQ(input.size(), 0u); |
| 477 | return true; |
| 478 | } |
| 479 | input.remove_prefix(byte_count); |
| 480 | } |
| 481 | } |
| 482 | |
| 483 | Http2String HpackHuffmanDecoder::DebugString() const { |
| 484 | return bit_buffer_.DebugString(); |
| 485 | } |
| 486 | |
| 487 | } // namespace http2 |