Internal change PiperOrigin-RevId: 442098384
diff --git a/quiche/common/balsa/balsa_enums.cc b/quiche/common/balsa/balsa_enums.cc new file mode 100644 index 0000000..1bafa62 --- /dev/null +++ b/quiche/common/balsa/balsa_enums.cc
@@ -0,0 +1,115 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/balsa_enums.h" + +namespace quiche { + +const char* BalsaFrameEnums::ParseStateToString( + BalsaFrameEnums::ParseState error_code) { + switch (error_code) { + case ERROR: + return "ERROR"; + case READING_HEADER_AND_FIRSTLINE: + return "READING_HEADER_AND_FIRSTLINE"; + case READING_CHUNK_LENGTH: + return "READING_CHUNK_LENGTH"; + case READING_CHUNK_EXTENSION: + return "READING_CHUNK_EXTENSION"; + case READING_CHUNK_DATA: + return "READING_CHUNK_DATA"; + case READING_CHUNK_TERM: + return "READING_CHUNK_TERM"; + case READING_LAST_CHUNK_TERM: + return "READING_LAST_CHUNK_TERM"; + case READING_TRAILER: + return "READING_TRAILER"; + case READING_UNTIL_CLOSE: + return "READING_UNTIL_CLOSE"; + case READING_CONTENT: + return "READING_CONTENT"; + case MESSAGE_FULLY_READ: + return "MESSAGE_FULLY_READ"; + case NUM_STATES: + return "UNKNOWN_STATE"; + } + return "UNKNOWN_STATE"; +} + +const char* BalsaFrameEnums::ErrorCodeToString( + BalsaFrameEnums::ErrorCode error_code) { + switch (error_code) { + case NO_ERROR: + return "NO_ERROR"; + case NO_STATUS_LINE_IN_RESPONSE: + return "NO_STATUS_LINE_IN_RESPONSE"; + case NO_REQUEST_LINE_IN_REQUEST: + return "NO_REQUEST_LINE_IN_REQUEST"; + case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION: + return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION"; + case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD: + return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD"; + case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE: + return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE"; + case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI: + return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI"; + case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE: + return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE"; + case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION: + return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION"; + case FAILED_CONVERTING_STATUS_CODE_TO_INT: + return "FAILED_CONVERTING_STATUS_CODE_TO_INT"; + case HEADERS_TOO_LONG: + return "HEADERS_TOO_LONG"; + case UNPARSABLE_CONTENT_LENGTH: + return "UNPARSABLE_CONTENT_LENGTH"; + case HTTP2_CONTENT_LENGTH_ERROR: + return "HTTP2_CONTENT_LENGTH_ERROR"; + case MAYBE_BODY_BUT_NO_CONTENT_LENGTH: + return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH"; + case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH: + return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH"; + case HEADER_MISSING_COLON: + return "HEADER_MISSING_COLON"; + case INVALID_CHUNK_LENGTH: + return "INVALID_CHUNK_LENGTH"; + case CHUNK_LENGTH_OVERFLOW: + return "CHUNK_LENGTH_OVERFLOW"; + case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO: + return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO"; + case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT: + return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT"; + case MULTIPLE_CONTENT_LENGTH_KEYS: + return "MULTIPLE_CONTENT_LENGTH_KEYS"; + case MULTIPLE_TRANSFER_ENCODING_KEYS: + return "MULTIPLE_TRANSFER_ENCODING_KEYS"; + case UNKNOWN_TRANSFER_ENCODING: + return "UNKNOWN_TRANSFER_ENCODING"; + case BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH: + return "BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH"; + case INVALID_HEADER_FORMAT: + return "INVALID_HEADER_FORMAT"; + case HTTP2_INVALID_HEADER_FORMAT: + return "HTTP2_INVALID_HEADER_FORMAT"; + case INVALID_TRAILER_FORMAT: + return "INVALID_TRAILER_FORMAT"; + case TRAILER_TOO_LONG: + return "TRAILER_TOO_LONG"; + case TRAILER_MISSING_COLON: + return "TRAILER_MISSING_COLON"; + case INTERNAL_LOGIC_ERROR: + return "INTERNAL_LOGIC_ERROR"; + case INVALID_HEADER_CHARACTER: + return "INVALID_HEADER_CHARACTER"; + case INVALID_HEADER_NAME_CHARACTER: + return "INVALID_HEADER_NAME_CHARACTER"; + case INVALID_TRAILER_NAME_CHARACTER: + return "INVALID_TRAILER_NAME_CHARACTER"; + case NUM_ERROR_CODES: + return "UNKNOWN_ERROR"; + } + return "UNKNOWN_ERROR"; +} + +} // namespace quiche
diff --git a/quiche/common/balsa/balsa_enums.h b/quiche/common/balsa/balsa_enums.h new file mode 100644 index 0000000..0136305 --- /dev/null +++ b/quiche/common/balsa/balsa_enums.h
@@ -0,0 +1,126 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_BALSA_ENUMS_H_ +#define QUICHE_COMMON_BALSA_BALSA_ENUMS_H_ + +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche { + +struct QUICHE_EXPORT_PRIVATE BalsaFrameEnums { + enum ParseState { + ERROR, + READING_HEADER_AND_FIRSTLINE, + READING_CHUNK_LENGTH, + READING_CHUNK_EXTENSION, + READING_CHUNK_DATA, + READING_CHUNK_TERM, + READING_LAST_CHUNK_TERM, + READING_TRAILER, + READING_UNTIL_CLOSE, + READING_CONTENT, + MESSAGE_FULLY_READ, + NUM_STATES, + }; + + enum ErrorCode { + // A sentinel value for convenience, none of the callbacks should ever see + // this error code. + NO_ERROR = 0, + + // Header parsing errors + // Note that adding one to many of the REQUEST errors yields the + // appropriate RESPONSE error. + // Particularly, when parsing the first line of a request or response, + // there are three sequences of non-whitespace regardless of whether or + // not it is a request or response. These are listed below, in order. + // + // firstline_a firstline_b firstline_c + // REQ: method request_uri version + // RESP: version statuscode reason + // + // As you can see, the first token is the 'method' field for a request, + // and 'version' field for a response. We call the first non whitespace + // token firstline_a, the second firstline_b, and the third token + // followed by [^\r\n]*) firstline_c. + // + // This organization is important, as it lets us determine the error code + // to use without a branch based on is_response. Instead, we simply add + // is_response to the response error code-- If is_response is true, then + // we'll get the response error code, thanks to the fact that the error + // code numbers are organized to ensure that response error codes always + // precede request error codes. + // | Triggered + // | while processing + // | this NONWS + // | sequence... + NO_STATUS_LINE_IN_RESPONSE, // | + NO_REQUEST_LINE_IN_REQUEST, // | + FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION, // | firstline_a + FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD, // | firstline_a + FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE, // | firstline_b + FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI, // | firstline_b + FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE, // | firstline_c + FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION, // | firstline_c + + FAILED_CONVERTING_STATUS_CODE_TO_INT, + + HEADERS_TOO_LONG, + UNPARSABLE_CONTENT_LENGTH, + // Warning: there may be a body but there was no content-length/chunked + // encoding + MAYBE_BODY_BUT_NO_CONTENT_LENGTH, + + // This is used if a body is required for a request. + REQUIRED_BODY_BUT_NO_CONTENT_LENGTH, + + HEADER_MISSING_COLON, + + // Chunking errors + INVALID_CHUNK_LENGTH, + CHUNK_LENGTH_OVERFLOW, + + // Other errors. + CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO, + CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT, + MULTIPLE_CONTENT_LENGTH_KEYS, + MULTIPLE_TRANSFER_ENCODING_KEYS, + UNKNOWN_TRANSFER_ENCODING, + BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH, + INVALID_HEADER_FORMAT, + HTTP2_INVALID_HEADER_FORMAT, + HTTP2_CONTENT_LENGTH_ERROR, + + // Trailer errors. + INVALID_TRAILER_FORMAT, + TRAILER_TOO_LONG, + TRAILER_MISSING_COLON, + + // A detected internal inconsistency was found. + INTERNAL_LOGIC_ERROR, + + // A control character was found in a header key or value + INVALID_HEADER_CHARACTER, + INVALID_HEADER_NAME_CHARACTER, + INVALID_TRAILER_NAME_CHARACTER, + + NUM_ERROR_CODES + }; + static const char* ParseStateToString(ParseState error_code); + static const char* ErrorCodeToString(ErrorCode error_code); +}; + +struct QUICHE_EXPORT_PRIVATE BalsaHeadersEnums { + enum ContentLengthStatus { + INVALID_CONTENT_LENGTH, + CONTENT_LENGTH_OVERFLOW, + NO_CONTENT_LENGTH, + VALID_CONTENT_LENGTH, + }; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_BALSA_ENUMS_H_
diff --git a/quiche/common/balsa/balsa_frame.cc b/quiche/common/balsa/balsa_frame.cc new file mode 100644 index 0000000..39853e3 --- /dev/null +++ b/quiche/common/balsa/balsa_frame.cc
@@ -0,0 +1,1301 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/balsa_frame.h" + +#include <algorithm> +#include <array> +#include <cstdint> +#include <cstring> +#include <limits> +#include <memory> +#include <ostream> +#include <string> +#include <utility> + +#include "absl/strings/match.h" +#include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" +#include "quiche/common/balsa/balsa_enums.h" +#include "quiche/common/balsa/balsa_headers.h" +#include "quiche/common/balsa/balsa_visitor_interface.h" +#include "quiche/common/balsa/header_properties.h" +#include "quiche/common/platform/api/quiche_logging.h" + +namespace quiche { + +namespace { + +const size_t kContinueStatusCode = 100; + +constexpr absl::string_view kChunked = "chunked"; +constexpr absl::string_view kContentLength = "content-length"; +constexpr absl::string_view kIdentity = "identity"; +constexpr absl::string_view kTransferEncoding = "transfer-encoding"; + +std::array<bool, 256> buildInvalidHeaderKeyCharLookupTable() { + std::array<bool, 256> invalidCharTable; + invalidCharTable.fill(false); + for (char c : BalsaFrame::kInvalidHeaderKeyCharList) { + invalidCharTable[c] = true; + } + return invalidCharTable; +} + +inline bool IsInvalidHeaderKeyChar(char c) { + static const std::array<bool, 256> invalidHeaderKeyCharTable = + buildInvalidHeaderKeyCharLookupTable(); + + return invalidHeaderKeyCharTable[c]; +} + +} // namespace + +void BalsaFrame::Reset() { + last_char_was_slash_r_ = false; + saw_non_newline_char_ = false; + start_was_space_ = true; + chunk_length_character_extracted_ = false; + // is_request_ = true; // not reset between messages. + allow_reading_until_close_for_request_ = false; + // request_was_head_ = false; // not reset between messages. + // max_header_length_ = 16 * 1024; // not reset between messages. + // visitor_ = &do_nothing_visitor_; // not reset between messages. + chunk_length_remaining_ = 0; + content_length_remaining_ = 0; + last_slash_n_loc_ = nullptr; + last_recorded_slash_n_loc_ = nullptr; + last_slash_n_idx_ = 0; + term_chars_ = 0; + parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE; + last_error_ = BalsaFrameEnums::NO_ERROR; + invalid_chars_.clear(); + lines_.clear(); + if (continue_headers_ != nullptr) { + continue_headers_->Clear(); + } + if (headers_ != nullptr) { + headers_->Clear(); + } + trailer_lines_.clear(); + start_of_trailer_line_ = 0; + trailer_length_ = 0; + if (trailer_ != nullptr) { + trailer_->Clear(); + } +} + +namespace { + +// Within the line bounded by [current, end), parses a single "island", +// comprising a (possibly empty) span of whitespace followed by a (possibly +// empty) span of non-whitespace. +// +// Returns a pointer to the first whitespace character beyond this island, or +// returns end if no additional whitespace characters are present after this +// island. (I.e., returnvalue == end || *returnvalue > ' ') +// +// Upon return, the whitespace span are the characters +// whose indices fall in [*first_whitespace, *first_nonwhite), while the +// non-whitespace span are the characters whose indices fall in +// [*first_nonwhite, returnvalue - begin). +inline const char* ParseOneIsland(const char* current, const char* begin, + const char* end, size_t* first_whitespace, + size_t* first_nonwhite) { + *first_whitespace = current - begin; + while (current < end && *current <= ' ') { + ++current; + } + *first_nonwhite = current - begin; + while (current<end&& * current> ' ') { + ++current; + } + return current; +} + +} // namespace + +// Summary: +// Parses the first line of either a request or response. +// Note that in the case of a detected warning, error_code will be set +// but the function will not return false. +// Exactly zero or one warning or error (but not both) may be detected +// by this function. +// Note that this function will not write the data of the first-line +// into the header's buffer (that should already have been done elsewhere). +// +// Pre-conditions: +// begin != end +// *begin should be a character which is > ' '. This implies that there +// is at least one non-whitespace characters between [begin, end). +// headers is a valid pointer to a BalsaHeaders class. +// error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value. +// Entire first line must exist between [begin, end) +// Exactly zero or one newlines -may- exist between [begin, end) +// [begin, end) should exist in the header's buffer. +// +// Side-effects: +// headers will be modified +// error_code may be modified if either a warning or error is detected +// +// Returns: +// True if no error (as opposed to warning) is detected. +// False if an error (as opposed to warning) is detected. + +// +// If there is indeed non-whitespace in the line, then the following +// will take care of this for you: +// while (*begin <= ' ') ++begin; +// ProcessFirstLine(begin, end, is_request, &headers, &error_code); +// + +bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request, + BalsaHeaders* headers, + BalsaFrameEnums::ErrorCode* error_code) { + while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) { + --end; + } + + const char* current = + ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_, + &headers->non_whitespace_1_idx_); + current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_, + &headers->non_whitespace_2_idx_); + current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_, + &headers->non_whitespace_3_idx_); + + // Clean up any trailing whitespace that comes after the third island + const char* last = end; + while (current <= last && *last <= ' ') { + --last; + } + headers->whitespace_4_idx_ = last - begin + 1; + + // Either the passed-in line is empty, or it starts with a non-whitespace + // character. + QUICHE_DCHECK(begin == end || *begin > ' '); + + QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_); + QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_); + + // If the line isn't empty, it has at least one non-whitespace character (see + // first QUICHE_DCHECK), which will have been identified as a non-empty + // [non_whitespace_1_idx_, whitespace_2_idx_). + QUICHE_DCHECK(begin == end || + headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_); + + if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) { + // This error may be triggered if the second token is empty, OR there's no + // WS after the first token; we don't bother to distinguish exactly which. + // (I'm not sure why we distinguish different kinds of parse error at all, + // actually.) + // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request + // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response + *error_code = static_cast<BalsaFrameEnums::ErrorCode>( + BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + + static_cast<int>(is_request)); + if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + return false; + } + } + if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) { + if (*error_code == BalsaFrameEnums::NO_ERROR) { + // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request + // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response + *error_code = static_cast<BalsaFrameEnums::ErrorCode>( + BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE + + static_cast<int>(is_request)); + } + } + + if (!is_request) { + headers->parsed_response_code_ = 0; + // If the response code is non-empty: + if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) { + if (!absl::SimpleAtoi( + absl::string_view(begin + headers->non_whitespace_2_idx_, + headers->non_whitespace_3_idx_ - + headers->non_whitespace_2_idx_), + &headers->parsed_response_code_)) { + *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; + return false; + } + } + } + + return true; +} + +// begin - beginning of the firstline +// end - end of the firstline +// +// A precondition for this function is that there is non-whitespace between +// [begin, end). If this precondition is not met, the function will not perform +// as expected (and bad things may happen, and it will eat your first, second, +// and third unborn children!). +// +// Another precondition for this function is that [begin, end) includes +// at most one newline, which must be at the end of the line. +void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) { + BalsaFrameEnums::ErrorCode previous_error = last_error_; + if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) { + parse_state_ = BalsaFrameEnums::ERROR; + HandleError(last_error_); + return; + } + if (previous_error != last_error_) { + HandleWarning(last_error_); + } + + if (is_request_) { + size_t version_length = + headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_; + visitor_->OnRequestFirstLineInput( + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_2_idx_, + headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, + begin + headers_->non_whitespace_3_idx_, version_length); + if (version_length == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + } + } else { + visitor_->OnResponseFirstLineInput( + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_2_idx_, + headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, + begin + headers_->non_whitespace_3_idx_, + headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_); + } +} + +// 'stream_begin' points to the first character of the headers buffer. +// 'line_begin' points to the first character of the line. +// 'current' points to a char which is ':'. +// 'line_end' points to the position of '\n' + 1. +// 'line_begin' points to the position of first character of line. +void BalsaFrame::CleanUpKeyValueWhitespace( + const char* stream_begin, const char* line_begin, const char* current, + const char* line_end, HeaderLineDescription* current_header_line) { + const char* colon_loc = current; + QUICHE_DCHECK_LT(colon_loc, line_end); + QUICHE_DCHECK_EQ(':', *colon_loc); + QUICHE_DCHECK_EQ(':', *current); + QUICHE_DCHECK_GE(' ', *line_end) + << "\"" << std::string(line_begin, line_end) << "\""; + + // TODO(fenix): Investigate whether or not the bounds tests in the + // while loops here are redundant, and if so, remove them. + --current; + while (current > line_begin && *current <= ' ') { + --current; + } + current += static_cast<int>(current != colon_loc); + current_header_line->key_end_idx = current - stream_begin; + + current = colon_loc; + QUICHE_DCHECK_EQ(':', *current); + ++current; + while (current < line_end && *current <= ' ') { + ++current; + } + current_header_line->value_begin_idx = current - stream_begin; + + QUICHE_DCHECK_GE(current_header_line->key_end_idx, + current_header_line->first_char_idx); + QUICHE_DCHECK_GE(current_header_line->value_begin_idx, + current_header_line->key_end_idx); + QUICHE_DCHECK_GE(current_header_line->last_char_idx, + current_header_line->value_begin_idx); +} + +bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines, + bool is_trailer, + BalsaHeaders* headers) { + QUICHE_DCHECK(!lines.empty()); + const char* stream_begin = headers->OriginalHeaderStreamBegin(); + // The last line is always just a newline (and is uninteresting). + const Lines::size_type lines_size_m1 = lines.size() - 1; + // For a trailer, there is no first line, so lines[0] is the first header + // . For real headers, the first line takes lines[0], so real header starts + // at index 1. + int first_header_idx = (is_trailer ? 0 : 1); + const char* current = stream_begin + lines[first_header_idx].first; + // This code is a bit more subtle than it may appear at first glance. + // This code looks for a colon in the current line... but it also looks + // beyond the current line. If there is no colon in the current line, then + // for each subsequent line (until the colon which -has- been found is + // associated with a line), no searching for a colon will be performed. In + // this way, we minimize the amount of bytes we have scanned for a colon. + for (Lines::size_type i = first_header_idx; i < lines_size_m1;) { + const char* line_begin = stream_begin + lines[i].first; + + // Here we handle possible continuations. Note that we do not replace + // the '\n' in the line before a continuation (at least, as of now), + // which implies that any code which looks for a value must deal with + // "\r\n", etc -within- the line (and not just at the end of it). + for (++i; i < lines_size_m1; ++i) { + const char c = *(stream_begin + lines[i].first); + if (c > ' ') { + // Not a continuation, so stop. Note that if the 'original' i = 1, + // and the next line is not a continuation, we'll end up with i = 2 + // when we break. This handles the incrementing of i for the outer + // loop. + break; + } + + // Space and tab are valid starts to continuation lines. + // https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy + // can choose to reject or normalize continuation lines. + if ((c != ' ' && c != '\t') || + http_validation_policy().disallow_header_continuation_lines()) { + HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT + : BalsaFrameEnums::INVALID_HEADER_FORMAT); + return false; + } + + // If disallow_header_continuation_lines() is false, we neither reject nor + // normalize continuation lines, in violation of RFC7230. + } + const char* line_end = stream_begin + lines[i - 1].second; + QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); + + // We cleanup the whitespace at the end of the line before doing anything + // else of interest as it allows us to do nothing when irregularly formatted + // headers are parsed (e.g. those with only keys, only values, or no colon). + // + // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. + --line_end; + QUICHE_DCHECK_EQ('\n', *line_end) + << "\"" << std::string(line_begin, line_end) << "\""; + while (*line_end <= ' ' && line_end > line_begin) { + --line_end; + } + ++line_end; + QUICHE_DCHECK_GE(' ', *line_end); + QUICHE_DCHECK_LT(line_begin, line_end); + + // We use '0' for the block idx, because we're always writing to the first + // block from the framer (we do this because the framer requires that the + // entire header sequence be in a contiguous buffer). + headers->header_lines_.push_back(HeaderLineDescription( + line_begin - stream_begin, line_end - stream_begin, + line_end - stream_begin, line_end - stream_begin, 0)); + if (current >= line_end) { + if (http_validation_policy().require_header_colon()) { + HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON + : BalsaFrameEnums::HEADER_MISSING_COLON); + return false; + } + HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON + : BalsaFrameEnums::HEADER_MISSING_COLON); + // Then the next colon will not be found within this header line-- time + // to try again with another header-line. + continue; + } else if (current < line_begin) { + // When this condition is true, the last detected colon was part of a + // previous line. We reset to the beginning of the line as we don't care + // about the presence of any colon before the beginning of the current + // line. + current = line_begin; + } + for (; current < line_end; ++current) { + if (*current == ':') { + break; + } + + if (http_validation_policy().enforce_header_characters() && + IsInvalidHeaderKeyChar(*current)) { + // Generally invalid characters were found earlier. + HandleError(is_trailer + ? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER + : BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER); + return false; + } + } + + if (current == line_end) { + // There was no colon in the line. The arguments we passed into the + // construction for the HeaderLineDescription object should be OK-- it + // assumes that the entire content is 'key' by default (which is true, as + // there was no colon, there can be no value). Note that this is a + // construct which is technically not allowed by the spec. + + // In strict mode, we do treat this invalid value-less key as an error. + if (http_validation_policy().require_header_colon()) { + HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON + : BalsaFrameEnums::HEADER_MISSING_COLON); + return false; + } + HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON + : BalsaFrameEnums::HEADER_MISSING_COLON); + continue; + } + + QUICHE_DCHECK_EQ(*current, ':'); + QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin); + QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin); + + HeaderLineDescription& current_header_line = headers->header_lines_.back(); + current_header_line.key_end_idx = current - stream_begin; + current_header_line.value_begin_idx = current_header_line.key_end_idx; + if (current < line_end) { + ++current_header_line.key_end_idx; + + CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end, + ¤t_header_line); + } + } + + return true; +} + +void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) { + last_error_ = error_code; + visitor_->HandleWarning(last_error_); +} + +void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) { + last_error_ = error_code; + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleError(last_error_); +} + +BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine( + HeaderLines::size_type line_idx, size_t* length) { + const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; + const char* stream_begin = headers_->OriginalHeaderStreamBegin(); + const char* line_end = stream_begin + header_line.last_char_idx; + const char* value_begin = (stream_begin + header_line.value_begin_idx); + + if (value_begin >= line_end) { + // There is no non-whitespace value data. + DVLOG(1) << "invalid content-length -- no non-whitespace value data"; + return BalsaHeadersEnums::INVALID_CONTENT_LENGTH; + } + + *length = 0; + while (value_begin < line_end) { + if (*value_begin < '0' || *value_begin > '9') { + // bad! content-length found, and couldn't parse all of it! + DVLOG(1) << "invalid content-length - non numeric character detected"; + return BalsaHeadersEnums::INVALID_CONTENT_LENGTH; + } + const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; + size_t length_x_10 = *length * 10; + const char c = *value_begin - '0'; + if (*length > kMaxDiv10 || + (std::numeric_limits<size_t>::max() - length_x_10) < c) { + DVLOG(1) << "content-length overflow"; + return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW; + } + *length = length_x_10 + c; + ++value_begin; + } + DVLOG(1) << "content_length parsed: " << *length; + return BalsaHeadersEnums::VALID_CONTENT_LENGTH; +} + +void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { + const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; + const char* stream_begin = headers_->OriginalHeaderStreamBegin(); + const char* line_end = stream_begin + header_line.last_char_idx; + const char* value_begin = stream_begin + header_line.value_begin_idx; + size_t value_length = line_end - value_begin; + + if (absl::EqualsIgnoreCase(absl::string_view(value_begin, value_length), + kChunked)) { + headers_->transfer_encoding_is_chunked_ = true; + } else if (absl::EqualsIgnoreCase( + absl::string_view(value_begin, value_length), kIdentity)) { + headers_->transfer_encoding_is_chunked_ = false; + } else { + HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING); + return; + } +} + +bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines, + const BalsaHeaders* headers) { + // Read from the beginning of the first line to the end of the last line. + // Note we need to add the first line's offset as in the case of a trailer + // it's non-zero. + const char* stream_begin = + headers->OriginalHeaderStreamBegin() + lines.front().first; + const char* stream_end = + headers->OriginalHeaderStreamBegin() + lines.back().second; + bool found_invalid = false; + + for (const char* c = stream_begin; c < stream_end; c++) { + if (header_properties::IsInvalidHeaderChar(*c)) { + found_invalid = true; + invalid_chars_[*c]++; + } + } + + return found_invalid; +} + +void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer, + BalsaHeaders* headers) { + QUICHE_DCHECK(!lines.empty()); + DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; + + if (is_request() && track_invalid_chars()) { + if (CheckHeaderLinesForInvalidChars(lines, headers)) { + if (invalid_chars_error_enabled()) { + HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER); + return; + } else { + HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER); + } + } + } + + // There is no need to attempt to process headers (resp. trailers) + // if no header (resp. trailer) lines exist. + // + // The last line of the message, which is an empty line, is never a header + // (resp. trailer) line. Furthermore, the first line of the message is not + // a header line. Therefore there are at least two (resp. one) lines in the + // message which are not header (resp. trailer) lines. + // + // Thus, we test to see if we have more than two (resp. one) lines total + // before attempting to parse any header (resp. trailer) lines. + if (lines.size() <= (is_trailer ? 1 : 2)) { + return; + } + + HeaderLines::size_type content_length_idx = 0; + HeaderLines::size_type transfer_encoding_idx = 0; + const char* stream_begin = headers->OriginalHeaderStreamBegin(); + // Parse the rest of the header or trailer data into key-value pairs. + if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) { + return; + } + // At this point, we've parsed all of the headers/trailers. Time to look + // for those headers which we require for framing or for format errors. + const HeaderLines::size_type lines_size = headers->header_lines_.size(); + for (HeaderLines::size_type i = 0; i < lines_size; ++i) { + const HeaderLineDescription& line = headers->header_lines_[i]; + const char* key_begin = stream_begin + line.first_char_idx; + const size_t key_len = line.key_end_idx - line.first_char_idx; + const char c = key_len != 0u ? *key_begin : ' '; + DVLOG(2) << "[" << i << "]: " << std::string(key_begin, key_len) << " c: '" + << c << "' key_len: " << key_len; + + // If a header begins with either lowercase or uppercase 'c' or 't', then + // the header may be one of content-length, connection, content-encoding + // or transfer-encoding. These headers are special, as they change the way + // that the message is framed, and so the framer is required to search + // for them. However, first check for a formatting error, and skip + // special header treatment on trailer lines (when is_trailer is true). + + if (c == ' ') { + parse_state_ = BalsaFrameEnums::ERROR; + HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT + : BalsaFrameEnums::INVALID_HEADER_FORMAT); + return; + } else if (is_trailer) { + continue; + } else if (absl::EqualsIgnoreCase(absl::string_view(key_begin, key_len), + kContentLength)) { + size_t length = 0; + BalsaHeadersEnums::ContentLengthStatus content_length_status = + ProcessContentLengthLine(i, &length); + if (content_length_idx != 0) { // then we've already seen one! + if ((headers->content_length_status_ != content_length_status) || + ((headers->content_length_status_ == + BalsaHeadersEnums::VALID_CONTENT_LENGTH) && + (http_validation_policy().disallow_multiple_content_length() || + length != headers->content_length_))) { + HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS); + return; + } + continue; + } else { + content_length_idx = i + 1; + headers->content_length_status_ = content_length_status; + headers->content_length_ = length; + content_length_remaining_ = length; + } + } else if (absl::EqualsIgnoreCase(absl::string_view(key_begin, key_len), + kTransferEncoding)) { + if (transfer_encoding_idx != 0) { + HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS); + return; + } + transfer_encoding_idx = i + 1; + } + } + + if (!is_trailer) { + if (http_validation_policy() + .disallow_transfer_encoding_with_content_length() && + content_length_idx != 0 && transfer_encoding_idx != 0) { + HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH); + return; + } + if (headers->transfer_encoding_is_chunked_) { + headers->content_length_ = 0; + headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; + content_length_remaining_ = 0; + } + if (transfer_encoding_idx != 0) { + ProcessTransferEncodingLine(transfer_encoding_idx - 1); + } + } +} + +void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { + // For responses, can't have a body if the request was a HEAD, or if it is + // one of these response-codes. rfc2616 section 4.3 + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + int response_code = headers_->parsed_response_code_; + if (is_request_ || (!request_was_head_ && + BalsaHeaders::ResponseCanHaveBody(response_code))) { + // Then we can have a body. + if (headers_->transfer_encoding_is_chunked_) { + // Note that + // if ( Transfer-Encoding: chunked && Content-length: ) + // then Transfer-Encoding: chunked trumps. + // This is as specified in the spec. + // rfc2616 section 4.4.3 + parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; + } else { + // Errors parsing content-length definitely can cause + // protocol errors/warnings + switch (headers_->content_length_status_) { + // If we have a content-length, and it is parsed + // properly, there are two options. + // 1) zero content, in which case the message is done, and + // 2) nonzero content, in which case we have to + // consume the body. + case BalsaHeadersEnums::VALID_CONTENT_LENGTH: + if (headers_->content_length_ == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + } else { + parse_state_ = BalsaFrameEnums::READING_CONTENT; + } + break; + case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: + case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: + // If there were characters left-over after parsing the + // content length, we should flag an error and stop. + HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH); + break; + // We can have: no transfer-encoding, no content length, and no + // connection: close... + // Unfortunately, this case doesn't seem to be covered in the spec. + // We'll assume that the safest thing to do here is what the google + // binaries before 2008 already do, which is to assume that + // everything until the connection is closed is body. + case BalsaHeadersEnums::NO_CONTENT_LENGTH: + if (is_request_) { + absl::string_view method = headers_->request_method(); + // POSTs and PUTs should have a detectable body length. If they + // do not we consider it an error. + if (method != "POST" && method != "PUT") { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + break; + } else if (!allow_reading_until_close_for_request_) { + HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH); + break; + } + } + parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; + HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH); + break; + // The COV_NF_... statements here provide hints to the apparatus + // which computes coverage reports/ratios that this code is never + // intended to be executed, and should technically be impossible. + // COV_NF_START + default: + LOG(FATAL) << "Saw a content_length_status: " + << headers_->content_length_status_ + << " which is unknown."; + // COV_NF_END + } + } + } +} + +size_t BalsaFrame::ProcessHeaders(const char* message_start, + size_t message_length) { + const char* const original_message_start = message_start; + const char* const message_end = message_start + message_length; + const char* message_current = message_start; + const char* checkpoint = message_start; + + if (message_length == 0) { + return message_current - original_message_start; + } + + while (message_current < message_end) { + size_t base_idx = headers_->GetReadableBytesFromHeaderStream(); + + // Yes, we could use strchr (assuming null termination), or + // memchr, but as it turns out that is slower than this tight loop + // for the input that we see. + if (!saw_non_newline_char_) { + do { + const char c = *message_current; + if (c != '\r' && c != '\n') { + if (c <= ' ') { + HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST); + return message_current - original_message_start; + } + break; + } + ++message_current; + if (message_current == message_end) { + return message_current - original_message_start; + } + } while (true); + saw_non_newline_char_ = true; + message_start = message_current; + checkpoint = message_current; + } + while (message_current < message_end) { + if (*message_current != '\n') { + ++message_current; + continue; + } + const size_t relative_idx = message_current - message_start; + const size_t message_current_idx = 1 + base_idx + relative_idx; + lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx)); + if (lines_.size() == 1) { + headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint); + checkpoint = message_current + 1; + const char* begin = headers_->OriginalHeaderStreamBegin(); + + DVLOG(1) << "First line " << std::string(begin, lines_[0].second); + DVLOG(1) << "is_request_: " << is_request_; + ProcessFirstLine(begin, begin + lines_[0].second); + if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) { + break; + } else if (parse_state_ == BalsaFrameEnums::ERROR) { + return message_current - original_message_start; + } + } + const size_t chars_since_last_slash_n = + (message_current_idx - last_slash_n_idx_); + last_slash_n_idx_ = message_current_idx; + if (chars_since_last_slash_n > 2) { + // false positive. + ++message_current; + continue; + } + if ((chars_since_last_slash_n == 1) || + (((message_current > message_start) && + (*(message_current - 1) == '\r')) || + (last_char_was_slash_r_))) { + break; + } + ++message_current; + } + + if (message_current == message_end) { + continue; + } + + ++message_current; + QUICHE_DCHECK(message_current >= message_start); + if (message_current > message_start) { + headers_->WriteFromFramer(checkpoint, message_current - checkpoint); + } + + // Check if we have exceeded maximum headers length + // Although we check for this limit before and after we call this function + // we check it here as well to make sure that in case the visitor changed + // the max_header_length_ (for example after processing the first line) + // we handle it gracefully. + if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) { + HandleError(BalsaFrameEnums::HEADERS_TOO_LONG); + return message_current - original_message_start; + } + + // Since we know that we won't be writing any more bytes of the header, + // we tell that to the headers object. The headers object may make + // more efficient allocation decisions when this is signaled. + headers_->DoneWritingFromFramer(); + { + const char* readable_ptr = nullptr; + size_t readable_size = 0; + headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size); + visitor_->OnHeaderInput(readable_ptr, readable_size); + } + + // Ok, now that we've written everything into our header buffer, it is + // time to process the header lines (extract proper values for headers + // which are important for framing). + ProcessHeaderLines(lines_, false /*is_trailer*/, headers_); + if (parse_state_ == BalsaFrameEnums::ERROR) { + return message_current - original_message_start; + } + + if (continue_headers_ != nullptr && + headers_->parsed_response_code_ == kContinueStatusCode) { + // Save the headers from this 100 Continue response but reset everything + // else to prepare for the next set of headers. + BalsaHeaders saved_continue_headers = std::move(*headers_); + Reset(); + *continue_headers_ = std::move(saved_continue_headers); + visitor_->ContinueHeaderDone(); + checkpoint = message_start = message_current; + continue; + } else { + AssignParseStateAfterHeadersHaveBeenParsed(); + if (parse_state_ == BalsaFrameEnums::ERROR) { + return message_current - original_message_start; + } + visitor_->ProcessHeaders(*headers_); + visitor_->HeaderDone(); + if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) { + visitor_->MessageDone(); + } + } + return message_current - original_message_start; + } + // If we've gotten to here, it means that we've consumed all of the + // available input. We need to record whether or not the last character we + // saw was a '\r' so that a subsequent call to ProcessInput correctly finds + // a header framing that is split across the two calls. + last_char_was_slash_r_ = (*(message_end - 1) == '\r'); + QUICHE_DCHECK(message_current >= message_start); + if (message_current > message_start) { + headers_->WriteFromFramer(checkpoint, message_current - checkpoint); + } + return message_current - original_message_start; +} + +size_t BalsaFrame::BytesSafeToSplice() const { + switch (parse_state_) { + case BalsaFrameEnums::READING_CHUNK_DATA: + return chunk_length_remaining_; + case BalsaFrameEnums::READING_UNTIL_CLOSE: + return std::numeric_limits<size_t>::max(); + case BalsaFrameEnums::READING_CONTENT: + return content_length_remaining_; + default: + return 0; + } +} + +void BalsaFrame::BytesSpliced(size_t bytes_spliced) { + switch (parse_state_) { + case BalsaFrameEnums::READING_CHUNK_DATA: + if (chunk_length_remaining_ >= bytes_spliced) { + chunk_length_remaining_ -= bytes_spliced; + if (chunk_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; + } + return; + } else { + HandleError(BalsaFrameEnums:: + CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT); + return; + } + + case BalsaFrameEnums::READING_UNTIL_CLOSE: + return; + + case BalsaFrameEnums::READING_CONTENT: + if (content_length_remaining_ >= bytes_spliced) { + content_length_remaining_ -= bytes_spliced; + if (content_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->MessageDone(); + } + return; + } else { + HandleError(BalsaFrameEnums:: + CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT); + return; + } + + default: + HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO); + return; + } +} + +size_t BalsaFrame::ProcessInput(const char* input, size_t size) { + const char* current = input; + const char* on_entry = current; + const char* end = current + size; + + QUICHE_DCHECK(headers_ != nullptr); + if (headers_ == nullptr) { + return 0; + } + + if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { + const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); + // Yes, we still have to check this here as the user can change the + // max_header_length amount! + // Also it is possible that we have reached the maximum allowed header size, + // and we have more to consume (remember we are still inside + // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. + if (header_length > max_header_length_ || + (header_length == max_header_length_ && size > 0)) { + HandleError(BalsaFrameEnums::HEADERS_TOO_LONG); + return current - input; + } + const size_t bytes_to_process = + std::min(max_header_length_ - header_length, size); + current += ProcessHeaders(input, bytes_to_process); + // If we are still reading headers check if we have crossed the headers + // limit. Note that we check for >= as opposed to >. This is because if + // header_length_after equals max_header_length_ and we are still in the + // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for + // sure that the headers limit will be crossed later on + if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { + // Note that headers_ is valid only if we are still reading headers. + const size_t header_length_after = + headers_->GetReadableBytesFromHeaderStream(); + if (header_length_after >= max_header_length_) { + HandleError(BalsaFrameEnums::HEADERS_TOO_LONG); + } + } + return current - input; + } + + if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || + parse_state_ == BalsaFrameEnums::ERROR) { + // Can do nothing more 'till we're reset. + return current - input; + } + + QUICHE_DCHECK_LE(current, end); + if (current == end) { + return current - input; + } + + while (true) { + switch (parse_state_) { + case BalsaFrameEnums::READING_CHUNK_LENGTH: + // In this state we read the chunk length. + // Note that once we hit a character which is not in: + // [0-9;A-Fa-f\n], we transition to a different state. + // + QUICHE_DCHECK_LE(current, end); + while (true) { + if (current == end) { + visitor_->OnRawBodyInput(on_entry, current - on_entry); + return current - input; + } + + const char c = *current; + ++current; + + static const signed char kBad = -1; + static const signed char kDelimiter = -2; + + // valid cases: + // "09123\n" // -> 09123 + // "09123\r\n" // -> 09123 + // "09123 \n" // -> 09123 + // "09123 \r\n" // -> 09123 + // "09123 12312\n" // -> 09123 + // "09123 12312\r\n" // -> 09123 + // "09123; foo=bar\n" // -> 09123 + // "09123; foo=bar\r\n" // -> 09123 + // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF + // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF + // invalid cases: + // "[ \t]+[^\n]*\n" + // "FFFFFFFFFFFFFFFFF\r\n" (would overflow) + // "\r\n" + // "\n" + signed char addition = kBad; + // clang-format off + switch (c) { + case '0': addition = 0; break; + case '1': addition = 1; break; + case '2': addition = 2; break; + case '3': addition = 3; break; + case '4': addition = 4; break; + case '5': addition = 5; break; + case '6': addition = 6; break; + case '7': addition = 7; break; + case '8': addition = 8; break; + case '9': addition = 9; break; + case 'a': addition = 0xA; break; + case 'b': addition = 0xB; break; + case 'c': addition = 0xC; break; + case 'd': addition = 0xD; break; + case 'e': addition = 0xE; break; + case 'f': addition = 0xF; break; + case 'A': addition = 0xA; break; + case 'B': addition = 0xB; break; + case 'C': addition = 0xC; break; + case 'D': addition = 0xD; break; + case 'E': addition = 0xE; break; + case 'F': addition = 0xF; break; + case '\t': + case '\n': + case '\r': + case ' ': + case ';': + addition = kDelimiter; + break; + default: + // Leave addition == kBad + break; + } + // clang-format on + if (addition >= 0) { + chunk_length_character_extracted_ = true; + size_t length_x_16 = chunk_length_remaining_ * 16; + const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16; + if ((chunk_length_remaining_ > kMaxDiv16) || + (std::numeric_limits<size_t>::max() - length_x_16) < + static_cast<size_t>(addition)) { + // overflow -- asked for a chunk-length greater than 2^64 - 1!! + visitor_->OnRawBodyInput(on_entry, current - on_entry); + HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW); + return current - input; + } + chunk_length_remaining_ = length_x_16 + addition; + continue; + } + + if (!chunk_length_character_extracted_ || addition == kBad) { + // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no + // characters were converted, or an unexpected character was + // seen. + visitor_->OnRawBodyInput(on_entry, current - on_entry); + HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH); + return current - input; + } + + break; + } + + --current; + parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; + visitor_->OnChunkLength(chunk_length_remaining_); + continue; + + case BalsaFrameEnums::READING_CHUNK_EXTENSION: { + // TODO(phython): Convert this scanning to be 16 bytes at a time if + // there is data to be read. + const char* extensions_start = current; + size_t extensions_length = 0; + QUICHE_DCHECK_LE(current, end); + while (true) { + if (current == end) { + visitor_->OnChunkExtensionInput(extensions_start, + extensions_length); + visitor_->OnRawBodyInput(on_entry, current - on_entry); + return current - input; + } + const char c = *current; + if (c == '\r' || c == '\n') { + extensions_length = (extensions_start == current) + ? 0 + : current - extensions_start - 1; + } + + ++current; + if (c == '\n') { + break; + } + } + + chunk_length_character_extracted_ = false; + visitor_->OnChunkExtensionInput(extensions_start, extensions_length); + + if (chunk_length_remaining_ != 0) { + parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA; + continue; + } + + HeaderFramingFound('\n'); + parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM; + continue; + } + + case BalsaFrameEnums::READING_CHUNK_DATA: + while (current < end) { + if (chunk_length_remaining_ == 0) { + break; + } + // read in the chunk + size_t bytes_remaining = end - current; + size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) + ? chunk_length_remaining_ + : bytes_remaining; + const char* tmp_current = current + consumed_bytes; + visitor_->OnRawBodyInput(on_entry, tmp_current - on_entry); + visitor_->OnBodyChunkInput(current, consumed_bytes); + on_entry = current = tmp_current; + chunk_length_remaining_ -= consumed_bytes; + } + + if (chunk_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; + continue; + } + + visitor_->OnRawBodyInput(on_entry, current - on_entry); + return current - input; + + case BalsaFrameEnums::READING_CHUNK_TERM: + QUICHE_DCHECK_LE(current, end); + while (true) { + if (current == end) { + visitor_->OnRawBodyInput(on_entry, current - on_entry); + return current - input; + } + + const char c = *current; + ++current; + + if (c == '\n') { + break; + } + } + parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; + continue; + + case BalsaFrameEnums::READING_LAST_CHUNK_TERM: + QUICHE_DCHECK_LE(current, end); + while (true) { + if (current == end) { + visitor_->OnRawBodyInput(on_entry, current - on_entry); + return current - input; + } + + const char c = *current; + if (HeaderFramingFound(c) != 0) { + // If we've found a "\r\n\r\n", then the message + // is done. + ++current; + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->OnRawBodyInput(on_entry, current - on_entry); + visitor_->MessageDone(); + return current - input; + } + + // If not, however, since the spec only suggests that the + // client SHOULD indicate the presence of trailers, we get to + // *test* that they did or didn't. + // If all of the bytes we've seen since: + // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF + // are either '\r', or '\n', then we can assume that we don't yet + // know if we need to parse headers, or if the next byte will make + // the HeaderFramingFound condition (above) true. + if (!HeaderFramingMayBeFound()) { + break; + } + + // If HeaderFramingMayBeFound(), then we have seen only characters + // '\r' or '\n'. + ++current; + + // Lets try again! There is no state change here. + } + + // If (!HeaderFramingMayBeFound()), then we know that we must be + // reading the first non CRLF character of a trailer. + parse_state_ = BalsaFrameEnums::READING_TRAILER; + visitor_->OnRawBodyInput(on_entry, current - on_entry); + on_entry = current; + continue; + + // TODO(yongfa): No leading whitespace is allowed before field-name per + // RFC2616. Leading whitespace will cause header parsing error too. + case BalsaFrameEnums::READING_TRAILER: + while (current < end) { + const char c = *current; + ++current; + ++trailer_length_; + if (trailer_ != nullptr) { + // Reuse the header length limit for trailer, which is just a bunch + // of headers. + if (trailer_length_ > max_header_length_) { + --current; + HandleError(BalsaFrameEnums::TRAILER_TOO_LONG); + return current - input; + } + if (LineFramingFound(c)) { + trailer_lines_.push_back( + std::make_pair(start_of_trailer_line_, trailer_length_)); + start_of_trailer_line_ = trailer_length_; + } + } + if (HeaderFramingFound(c) != 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + if (trailer_ != nullptr) { + trailer_->WriteFromFramer(on_entry, current - on_entry); + trailer_->DoneWritingFromFramer(); + ProcessHeaderLines(trailer_lines_, true /*is_trailer*/, trailer_); + if (parse_state_ == BalsaFrameEnums::ERROR) { + return current - input; + } + visitor_->ProcessTrailers(*trailer_); + } + visitor_->OnTrailerInput(on_entry, current - on_entry); + visitor_->MessageDone(); + return current - input; + } + } + if (trailer_ != nullptr) { + trailer_->WriteFromFramer(on_entry, current - on_entry); + } + visitor_->OnTrailerInput(on_entry, current - on_entry); + return current - input; + + case BalsaFrameEnums::READING_UNTIL_CLOSE: { + const size_t bytes_remaining = end - current; + if (bytes_remaining > 0) { + visitor_->OnRawBodyInput(current, bytes_remaining); + visitor_->OnBodyChunkInput(current, bytes_remaining); + current += bytes_remaining; + } + return current - input; + } + + case BalsaFrameEnums::READING_CONTENT: + while ((content_length_remaining_ != 0u) && current < end) { + // read in the content + const size_t bytes_remaining = end - current; + const size_t consumed_bytes = + (content_length_remaining_ < bytes_remaining) + ? content_length_remaining_ + : bytes_remaining; + visitor_->OnRawBodyInput(current, consumed_bytes); + visitor_->OnBodyChunkInput(current, consumed_bytes); + current += consumed_bytes; + content_length_remaining_ -= consumed_bytes; + } + if (content_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->MessageDone(); + } + return current - input; + + default: + // The state-machine should never be in a state that isn't handled + // above. This is a glaring logic error, and we should do something + // drastic to ensure that this gets looked-at and fixed. + LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE + << " memory corruption?!"; // COV_NF_LINE + } + } +} + +const int32_t BalsaFrame::kValidTerm1; +const int32_t BalsaFrame::kValidTerm1Mask; +const int32_t BalsaFrame::kValidTerm2; +const int32_t BalsaFrame::kValidTerm2Mask; + +} // namespace quiche
diff --git a/quiche/common/balsa/balsa_frame.h b/quiche/common/balsa/balsa_frame.h new file mode 100644 index 0000000..5cc00c7 --- /dev/null +++ b/quiche/common/balsa/balsa_frame.h
@@ -0,0 +1,324 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_BALSA_FRAME_H_ +#define QUICHE_COMMON_BALSA_BALSA_FRAME_H_ + +#include <cstddef> +#include <cstdint> +#include <utility> +#include <vector> + +#include "quiche/common/balsa/balsa_enums.h" +#include "quiche/common/balsa/balsa_headers.h" +#include "quiche/common/balsa/balsa_visitor_interface.h" +#include "quiche/common/balsa/framer_interface.h" +#include "quiche/common/balsa/http_validation_policy.h" +#include "quiche/common/balsa/noop_balsa_visitor.h" +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche { + +namespace test { +class BalsaFrameTestPeer; +} // namespace test + +// BalsaFrame is a lightweight HTTP framer. +class QUICHE_EXPORT_PRIVATE BalsaFrame : public FramerInterface { + public: + typedef std::vector<std::pair<size_t, size_t> > Lines; + + typedef BalsaHeaders::HeaderLineDescription HeaderLineDescription; + typedef BalsaHeaders::HeaderLines HeaderLines; + typedef BalsaHeaders::HeaderTokenList HeaderTokenList; + + // Only applied in strict mode. + // Control characters, including \t, \n, \r, as well as space and + // (),/;<=>?@[\]{} and \x7f (see + // https://tools.ietf.org/html/rfc7230#section-3.2.6). + static constexpr char kInvalidHeaderKeyCharList[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, + 0x1E, 0x1F, ' ', '(', ')', ',', '/', ';', '<', '=', + '>', '?', '@', '[', '\\', ']', '{', '}', 0x7f}; + + enum class InvalidCharsLevel { kOff, kWarning, kError }; + + // TODO(fenix): get rid of the 'kValidTerm*' stuff by using the 'since last + // index' strategy. Note that this implies getting rid of the HeaderFramed() + + static constexpr int32_t kValidTerm1 = '\n' << 16 | '\r' << 8 | '\n'; + static constexpr int32_t kValidTerm1Mask = 0xFF << 16 | 0xFF << 8 | 0xFF; + static constexpr int32_t kValidTerm2 = '\n' << 8 | '\n'; + static constexpr int32_t kValidTerm2Mask = 0xFF << 8 | 0xFF; + BalsaFrame() + : last_char_was_slash_r_(false), + saw_non_newline_char_(false), + start_was_space_(true), + chunk_length_character_extracted_(false), + is_request_(true), + allow_reading_until_close_for_request_(false), + request_was_head_(false), + max_header_length_(16 * 1024), + visitor_(&do_nothing_visitor_), + chunk_length_remaining_(0), + content_length_remaining_(0), + last_slash_n_loc_(nullptr), + last_recorded_slash_n_loc_(nullptr), + last_slash_n_idx_(0), + term_chars_(0), + parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), + last_error_(BalsaFrameEnums::NO_ERROR), + continue_headers_(nullptr), + headers_(nullptr), + start_of_trailer_line_(0), + trailer_length_(0), + trailer_(nullptr), + invalid_chars_level_(InvalidCharsLevel::kOff), + http_validation_policy_(HttpValidationPolicy::CreateDefault()) {} + + ~BalsaFrame() override {} + + // Reset reinitializes all the member variables of the framer and clears the + // attached header object (but doesn't change the pointer value headers_). + void Reset(); + + // The method set_balsa_headers clears the headers provided and attaches them + // to the framer. This is a required step before the framer will process any + // input message data. + // To detach the header object from the framer, use + // set_balsa_headers(nullptr). + void set_balsa_headers(BalsaHeaders* headers) { + if (headers_ != headers) { + headers_ = headers; + } + if (headers_ != nullptr) { + // Clear the headers if they are non-null, even if the new headers are + // the same as the old. + headers_->Clear(); + } + } + + // If set to non-null, allow 100 Continue headers before the main headers. + void set_continue_headers(BalsaHeaders* continue_headers) { + if (continue_headers_ != continue_headers) { + continue_headers_ = continue_headers; + } + if (continue_headers_ != nullptr) { + // Clear the headers if they are non-null, even if the new headers are + // the same as the old. + continue_headers_->Clear(); + } + } + + // The method set_balsa_trailer clears the trailer provided and attaches it + // to the framer. This is a required step before the framer will process any + // input message data. + // To detach the trailer object from the framer, use + // set_balsa_trailer(nullptr). + void set_balsa_trailer(BalsaHeaders* trailer) { + if (trailer != nullptr && is_request()) { + GFE_BUG(bug_1317_1) << "Trailer in request is not allowed."; + return; + } + + if (trailer_ != trailer) { + trailer_ = trailer; + } + if (trailer_ != nullptr) { + // Clear the trailer if it is non-null, even if the new trailer is + // the same as the old. + trailer_->Clear(); + } + } + + void set_balsa_visitor(BalsaVisitorInterface* visitor) { + visitor_ = visitor; + if (visitor_ == nullptr) { + visitor_ = &do_nothing_visitor_; + } + } + + void set_invalid_chars_level(InvalidCharsLevel v) { + invalid_chars_level_ = v; + } + + bool track_invalid_chars() { + return invalid_chars_level_ != InvalidCharsLevel::kOff; + } + + bool invalid_chars_error_enabled() { + return invalid_chars_level_ == InvalidCharsLevel::kError; + } + + void set_http_validation_policy(const quiche::HttpValidationPolicy& policy) { + http_validation_policy_ = policy; + } + const quiche::HttpValidationPolicy& http_validation_policy() const { + return http_validation_policy_; + } + + void set_is_request(bool is_request) { is_request_ = is_request; } + + bool is_request() const { return is_request_; } + + void set_request_was_head(bool request_was_head) { + request_was_head_ = request_was_head; + } + + void set_max_header_length(size_t max_header_length) { + max_header_length_ = max_header_length; + } + + size_t max_header_length() const { return max_header_length_; } + + bool MessageFullyRead() const { + return parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ; + } + + BalsaFrameEnums::ParseState ParseState() const { return parse_state_; } + + bool Error() const { return parse_state_ == BalsaFrameEnums::ERROR; } + + BalsaFrameEnums::ErrorCode ErrorCode() const { return last_error_; } + + const absl::flat_hash_map<char, int>& get_invalid_chars() const { + return invalid_chars_; + } + + const BalsaHeaders* headers() const { return headers_; } + BalsaHeaders* mutable_headers() { return headers_; } + + const BalsaHeaders* trailer() const { return trailer_; } + BalsaHeaders* mutable_trailer() { return trailer_; } + + size_t BytesSafeToSplice() const; + void BytesSpliced(size_t bytes_spliced); + + size_t ProcessInput(const char* input, size_t size) override; + + void set_allow_reading_until_close_for_request(bool set) { + allow_reading_until_close_for_request_ = set; + } + + // For websockets and possibly other uses, we suspend the usual expectations + // about when a message has a body and how long it should be. + void AllowArbitraryBody() { + parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; + } + + protected: + inline BalsaHeadersEnums::ContentLengthStatus ProcessContentLengthLine( + size_t line_idx, size_t* length); + + inline void ProcessTransferEncodingLine(size_t line_idx); + + void ProcessFirstLine(const char* begin, const char* end); + + void CleanUpKeyValueWhitespace(const char* stream_begin, + const char* line_begin, const char* current, + const char* line_end, + HeaderLineDescription* current_header_line); + + void ProcessHeaderLines(const Lines& lines, bool is_trailer, + BalsaHeaders* headers); + + // Returns true if there are invalid characters, false otherwise. + // Will also update counts per invalid character in invalid_chars_. + bool CheckHeaderLinesForInvalidChars(const Lines& lines, + const BalsaHeaders* headers); + + inline size_t ProcessHeaders(const char* message_start, + size_t message_length); + + void AssignParseStateAfterHeadersHaveBeenParsed(); + + inline bool LineFramingFound(char current_char) { + return current_char == '\n'; + } + + // TODO(fenix): get rid of the following function and its uses (and + // replace with something more efficient). + // Return header framing pattern. Non-zero return value indicates found, + // which has two possible outcomes: kValidTerm1, which means \n\r\n + // or kValidTerm2, which means \n\n. Zero return value means not found. + inline int32_t HeaderFramingFound(char current_char) { + // Note that the 'if (current_char == '\n' ...)' test exists to ensure that + // the HeaderFramingMayBeFound test works properly. In benchmarking done on + // 2/13/2008, the 'if' actually speeds up performance of the function + // anyway.. + if (current_char == '\n' || current_char == '\r') { + term_chars_ <<= 8; + // This is necessary IFF architecture has > 8 bit char. Alas, I'm + // paranoid. + term_chars_ |= current_char & 0xFF; + + if ((term_chars_ & kValidTerm1Mask) == kValidTerm1) { + term_chars_ = 0; + return kValidTerm1; + } + if ((term_chars_ & kValidTerm2Mask) == kValidTerm2) { + term_chars_ = 0; + return kValidTerm2; + } + } else { + term_chars_ = 0; + } + return 0; + } + + inline bool HeaderFramingMayBeFound() const { return term_chars_ != 0; } + + private: + friend class test::BalsaFrameTestPeer; + + // Calls HandleError() and returns false on error. + bool FindColonsAndParseIntoKeyValue(const Lines& lines, bool is_trailer, + BalsaHeaders* headers); + + void HandleError(BalsaFrameEnums::ErrorCode error_code); + void HandleWarning(BalsaFrameEnums::ErrorCode error_code); + + bool last_char_was_slash_r_; + bool saw_non_newline_char_; + bool start_was_space_; + bool chunk_length_character_extracted_; + bool is_request_; // This is not reset in Reset() + // Generally, requests are not allowed to frame with connection: close. For + // protocols which do their own protocol-specific chunking, such as streamed + // stubby, we allow connection close semantics for requests. + bool allow_reading_until_close_for_request_; + bool request_was_head_; // This is not reset in Reset() + size_t max_header_length_; // This is not reset in Reset() + BalsaVisitorInterface* visitor_; + size_t chunk_length_remaining_; + size_t content_length_remaining_; + const char* last_slash_n_loc_; + const char* last_recorded_slash_n_loc_; + size_t last_slash_n_idx_; + uint32_t term_chars_; + BalsaFrameEnums::ParseState parse_state_; + BalsaFrameEnums::ErrorCode last_error_; + absl::flat_hash_map<char, int> invalid_chars_; + + Lines lines_; + + BalsaHeaders* continue_headers_; // This is not reset to nullptr in Reset(). + BalsaHeaders* headers_; // This is not reset to nullptr in Reset(). + NoOpBalsaVisitor do_nothing_visitor_; + + Lines trailer_lines_; + size_t start_of_trailer_line_; + size_t trailer_length_; + BalsaHeaders* trailer_; // Does not own and is not reset to nullptr + // in Reset(). + InvalidCharsLevel invalid_chars_level_; // This is not reset in Reset() + + quiche::HttpValidationPolicy http_validation_policy_; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_BALSA_FRAME_H_
diff --git a/quiche/common/balsa/balsa_headers.cc b/quiche/common/balsa/balsa_headers.cc new file mode 100644 index 0000000..4c57b58 --- /dev/null +++ b/quiche/common/balsa/balsa_headers.cc
@@ -0,0 +1,1111 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/balsa_headers.h" + +#include <sys/types.h> + +#include <cstdint> +#include <functional> +#include <string> +#include <utility> +#include <vector> + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" +#include "absl/strings/string_view_utils.h" +#include "quiche/common/balsa/balsa_enums.h" +#include "quiche/common/balsa/header_properties.h" +#include "quiche/common/platform/api/quiche_header_policy.h" +#include "quiche/common/platform/api/quiche_logging.h" + +namespace { + +constexpr absl::string_view kContentLength("Content-Length"); +constexpr absl::string_view kCookie("Cookie"); +constexpr absl::string_view kHost("Host"); +constexpr absl::string_view kTransferEncoding("Transfer-Encoding"); + +// The following list defines list of headers that Envoy considers multivalue. +// Headers on this list are coalesced by EFG in order to provide forward +// compatibility with Envoy behavior. See b/143490671 for details. +// Date, Last-Modified and Location are excluded because they're found on Chrome +// HttpUtil::IsNonCoalescingHeader() list. +#define ALL_ENVOY_HEADERS(HEADER_FUNC) \ + HEADER_FUNC("Accept") \ + HEADER_FUNC("Accept-Encoding") \ + HEADER_FUNC("Access-Control-Request-Headers") \ + HEADER_FUNC("Access-Control-Request-Method") \ + HEADER_FUNC("Access-Control-Allow-Origin") \ + HEADER_FUNC("Access-Control-Allow-Headers") \ + HEADER_FUNC("Access-Control-Allow-Methods") \ + HEADER_FUNC("Access-Control-Allow-Credentials") \ + HEADER_FUNC("Access-Control-Expose-Headers") \ + HEADER_FUNC("Access-Control-Max-Age") \ + HEADER_FUNC("Authorization") \ + HEADER_FUNC("Cache-Control") \ + HEADER_FUNC("X-Client-Trace-Id") \ + HEADER_FUNC("Connection") \ + HEADER_FUNC("Content-Encoding") \ + HEADER_FUNC("Content-Length") \ + HEADER_FUNC("Content-Type") \ + /* HEADER_FUNC("Date") */ \ + HEADER_FUNC("Envoy-Attempt-Count") \ + HEADER_FUNC("Envoy-Degraded") \ + HEADER_FUNC("Envoy-Decorator-Operation") \ + HEADER_FUNC("Envoy-Downstream-Service-Cluster") \ + HEADER_FUNC("Envoy-Downstream-Service-Node") \ + HEADER_FUNC("Envoy-Expected-Request-Timeout-Ms") \ + HEADER_FUNC("Envoy-External-Address") \ + HEADER_FUNC("Envoy-Force-Trace") \ + HEADER_FUNC("Envoy-Hedge-On-Per-Try-Timeout") \ + HEADER_FUNC("Envoy-Immediate-Health-Check-Fail") \ + HEADER_FUNC("Envoy-Internal-Request") \ + HEADER_FUNC("Envoy-Ip-Tags") \ + HEADER_FUNC("Envoy-Max-Retries") \ + HEADER_FUNC("Envoy-Original-Path") \ + HEADER_FUNC("Envoy-Original-Url") \ + HEADER_FUNC("Envoy-Overloaded") \ + HEADER_FUNC("Envoy-Rate-Limited") \ + HEADER_FUNC("Envoy-Retry-On") \ + HEADER_FUNC("Envoy-Retry-Grpc-On") \ + HEADER_FUNC("Envoy-Retriable-StatusCodes") \ + HEADER_FUNC("Envoy-Retriable-HeaderNames") \ + HEADER_FUNC("Envoy-Upstream-AltStatName") \ + HEADER_FUNC("Envoy-Upstream-Canary") \ + HEADER_FUNC("Envoy-Upstream-HealthCheckedCluster") \ + HEADER_FUNC("Envoy-Upstream-RequestPerTryTimeoutMs") \ + HEADER_FUNC("Envoy-Upstream-RequestTimeoutAltResponse") \ + HEADER_FUNC("Envoy-Upstream-RequestTimeoutMs") \ + HEADER_FUNC("Envoy-Upstream-ServiceTime") \ + HEADER_FUNC("Etag") \ + HEADER_FUNC("Expect") \ + HEADER_FUNC("X-Forwarded-Client-Cert") \ + HEADER_FUNC("X-Forwarded-For") \ + HEADER_FUNC("X-Forwarded-Proto") \ + HEADER_FUNC("Grpc-Accept-Encoding") \ + HEADER_FUNC("Grpc-Message") \ + HEADER_FUNC("Grpc-Status") \ + HEADER_FUNC("Grpc-Timeout") \ + HEADER_FUNC("Host") \ + HEADER_FUNC("Keep-Alive") \ + /* HEADER_FUNC("Last-Modified") */ \ + /* HEADER_FUNC("Location") */ \ + HEADER_FUNC("Method") \ + HEADER_FUNC("No-Chunks") \ + HEADER_FUNC("Origin") \ + HEADER_FUNC("X-Ot-Span-Context") \ + HEADER_FUNC("Path") \ + HEADER_FUNC("Protocol") \ + HEADER_FUNC("Proxy-Connection") \ + HEADER_FUNC("Referer") \ + HEADER_FUNC("X-Request-Id") \ + HEADER_FUNC("Scheme") \ + HEADER_FUNC("Server") \ + HEADER_FUNC("Status") \ + HEADER_FUNC("TE") \ + HEADER_FUNC("Transfer-Encoding") \ + HEADER_FUNC("Upgrade") \ + HEADER_FUNC("User-Agent") \ + HEADER_FUNC("Vary") \ + HEADER_FUNC("Via") + +// HEADER_FUNC to insert "name" into the MultivaluedHeadersSet of Envoy headers. +#define MULTIVALUE_ENVOY_HEADER(name) {name}, + +} // namespace + +namespace quiche { + +const size_t BalsaBuffer::kDefaultBlocksize; + +const BalsaHeaders::MultivaluedHeadersSet& +BalsaHeaders::multivalued_envoy_headers() { + static const MultivaluedHeadersSet* multivalued_envoy_headers = + new MultivaluedHeadersSet({ALL_ENVOY_HEADERS(MULTIVALUE_ENVOY_HEADER)}); + return *multivalued_envoy_headers; +} + +void BalsaHeaders::ParseTokenList(absl::string_view header_value, + HeaderTokenList* tokens) { + if (header_value.empty()) { + return; + } + const char* start = header_value.begin(); + const char* end = header_value.end(); + while (true) { + // search for first nonwhitespace, non separator char. + while (*start == ',' || *start <= ' ') { + ++start; + if (start == end) { + return; + } + } + // found. marked. + const char* nws = start; + + // search for next whitspace or separator char. + while (*start != ',' && *start > ' ') { + ++start; + if (start == end) { + if (nws != start) { + tokens->push_back(absl::string_view(nws, start - nws)); + } + return; + } + } + tokens->push_back(absl::string_view(nws, start - nws)); + } +} + +// This can be called after a std::move() operation, so things might be +// in an unspecified state after the move. +void BalsaHeaders::Clear() { + balsa_buffer_.Clear(); + transfer_encoding_is_chunked_ = false; + content_length_ = 0; + content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; + parsed_response_code_ = 0; + firstline_buffer_base_idx_ = 0; + whitespace_1_idx_ = 0; + non_whitespace_1_idx_ = 0; + whitespace_2_idx_ = 0; + non_whitespace_2_idx_ = 0; + whitespace_3_idx_ = 0; + non_whitespace_3_idx_ = 0; + whitespace_4_idx_ = 0; + header_lines_.clear(); + header_lines_.shrink_to_fit(); +} + +void BalsaHeaders::CopyFrom(const BalsaHeaders& other) { + // Protect against copying with self. + if (this == &other) { + return; + } + + balsa_buffer_.CopyFrom(other.balsa_buffer_); + transfer_encoding_is_chunked_ = other.transfer_encoding_is_chunked_; + content_length_ = other.content_length_; + content_length_status_ = other.content_length_status_; + parsed_response_code_ = other.parsed_response_code_; + firstline_buffer_base_idx_ = other.firstline_buffer_base_idx_; + whitespace_1_idx_ = other.whitespace_1_idx_; + non_whitespace_1_idx_ = other.non_whitespace_1_idx_; + whitespace_2_idx_ = other.whitespace_2_idx_; + non_whitespace_2_idx_ = other.non_whitespace_2_idx_; + whitespace_3_idx_ = other.whitespace_3_idx_; + non_whitespace_3_idx_ = other.non_whitespace_3_idx_; + whitespace_4_idx_ = other.whitespace_4_idx_; + header_lines_ = other.header_lines_; +} + +void BalsaHeaders::AddAndMakeDescription(absl::string_view key, + absl::string_view value, + HeaderLineDescription* d) { + QUICHE_CHECK(d != nullptr); + + if (enforce_header_policy_) { + QuicheHandleHeaderPolicy(key); + } + + // + 2 to size for ": " + size_t line_size = key.size() + 2 + value.size(); + BalsaBuffer::Blocks::size_type block_buffer_idx = 0; + char* storage = balsa_buffer_.Reserve(line_size, &block_buffer_idx); + size_t base_idx = storage - GetPtr(block_buffer_idx); + + char* cur_loc = storage; + memcpy(cur_loc, key.data(), key.size()); + cur_loc += key.size(); + *cur_loc = ':'; + ++cur_loc; + *cur_loc = ' '; + ++cur_loc; + memcpy(cur_loc, value.data(), value.size()); + *d = HeaderLineDescription( + base_idx, base_idx + key.size(), base_idx + key.size() + 2, + base_idx + key.size() + 2 + value.size(), block_buffer_idx); +} + +void BalsaHeaders::AppendAndMakeDescription(absl::string_view key, + absl::string_view value, + HeaderLineDescription* d) { + // Figure out how much space we need to reserve for the new header size. + size_t old_value_size = d->last_char_idx - d->value_begin_idx; + if (old_value_size == 0) { + AddAndMakeDescription(key, value, d); + return; + } + absl::string_view old_value(GetPtr(d->buffer_base_idx) + d->value_begin_idx, + old_value_size); + + BalsaBuffer::Blocks::size_type block_buffer_idx = 0; + // + 3 because we potentially need to add ": ", and "," to the line. + size_t new_size = key.size() + 3 + old_value_size + value.size(); + char* storage = balsa_buffer_.Reserve(new_size, &block_buffer_idx); + size_t base_idx = storage - GetPtr(block_buffer_idx); + + absl::string_view first_value = old_value; + absl::string_view second_value = value; + char* cur_loc = storage; + memcpy(cur_loc, key.data(), key.size()); + cur_loc += key.size(); + *cur_loc = ':'; + ++cur_loc; + *cur_loc = ' '; + ++cur_loc; + memcpy(cur_loc, first_value.data(), first_value.size()); + cur_loc += first_value.size(); + *cur_loc = ','; + ++cur_loc; + memcpy(cur_loc, second_value.data(), second_value.size()); + + *d = HeaderLineDescription(base_idx, base_idx + key.size(), + base_idx + key.size() + 2, base_idx + new_size, + block_buffer_idx); +} + +// Reset internal flags for chunked transfer encoding or content length if a +// header we're removing is one of those headers. +void BalsaHeaders::MaybeClearSpecialHeaderValues(absl::string_view key) { + if (absl::EqualsIgnoreCase(key, kContentLength)) { + if (transfer_encoding_is_chunked_) { + return; + } + + content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; + content_length_ = 0; + } else if (absl::EqualsIgnoreCase(key, kTransferEncoding)) { + transfer_encoding_is_chunked_ = false; + } +} + +// Removes all keys value pairs with key 'key' starting at 'start'. +void BalsaHeaders::RemoveAllOfHeaderStartingAt(absl::string_view key, + HeaderLines::iterator start) { + MaybeClearSpecialHeaderValues(key); + while (start != header_lines_.end()) { + start->skip = true; + ++start; + start = GetHeaderLinesIterator(key, start); + } +} + +void BalsaHeaders::ReplaceOrAppendHeader(absl::string_view key, + absl::string_view value) { + const HeaderLines::iterator end = header_lines_.end(); + const HeaderLines::iterator begin = header_lines_.begin(); + HeaderLines::iterator i = GetHeaderLinesIterator(key, begin); + if (i != end) { + // First, remove all of the header lines including this one. We want to + // remove before replacing, in case our replacement ends up being appended + // at the end (and thus would be removed by this call) + RemoveAllOfHeaderStartingAt(key, i); + // Now, take the first instance and replace it. This will remove the + // 'skipped' tag if the replacement is done in-place. + AddAndMakeDescription(key, value, &(*i)); + return; + } + AppendHeader(key, value); +} + +void BalsaHeaders::AppendHeader(absl::string_view key, + absl::string_view value) { + HeaderLineDescription hld; + AddAndMakeDescription(key, value, &hld); + header_lines_.push_back(hld); +} + +void BalsaHeaders::AppendToHeader(absl::string_view key, + absl::string_view value) { + HeaderLines::iterator i = GetHeaderLinesIterator(key, header_lines_.begin()); + if (i == header_lines_.end()) { + // The header did not exist already. Instead of appending to an existing + // header simply append the key/value pair to the headers. + AppendHeader(key, value); + return; + } + HeaderLineDescription hld = *i; + + AppendAndMakeDescription(key, value, &hld); + + // Invalidate the old header line and add the new one. + i->skip = true; + header_lines_.push_back(hld); +} + +void BalsaHeaders::AppendToHeaderWithCommaAndSpace(absl::string_view key, + absl::string_view value) { + HeaderLines::iterator i = GetHeaderLinesIteratorForLastMultivaluedHeader(key); + if (i == header_lines_.end()) { + // The header did not exist already. Instead of appending to an existing + // header simply append the key/value pair to the headers. No extra + // space will be added before the value. + AppendHeader(key, value); + return; + } + + std::string space_and_value = absl::StrCat(" ", value); + + HeaderLineDescription hld = *i; + AppendAndMakeDescription(key, space_and_value, &hld); + + // Invalidate the old header line and add the new one. + i->skip = true; + header_lines_.push_back(hld); +} + +absl::string_view BalsaHeaders::GetValueFromHeaderLineDescription( + const HeaderLineDescription& line) const { + QUICHE_DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return absl::string_view(GetPtr(line.buffer_base_idx) + line.value_begin_idx, + line.last_char_idx - line.value_begin_idx); +} + +absl::string_view BalsaHeaders::GetHeader(absl::string_view key) const { + QUICHE_DCHECK(!header_properties::IsMultivaluedHeader(key)) + << "Header '" << key << "' may consist of multiple lines. Do not " + << "use BalsaHeaders::GetHeader() or you may be missing some of its " + << "values."; + const HeaderLines::const_iterator end = header_lines_.end(); + HeaderLines::const_iterator i = GetConstHeaderLinesIterator(key); + if (i == end) { + return absl::string_view(); + } + return GetValueFromHeaderLineDescription(*i); +} + +BalsaHeaders::const_header_lines_iterator BalsaHeaders::GetHeaderPosition( + absl::string_view key) const { + const HeaderLines::const_iterator end = header_lines_.end(); + HeaderLines::const_iterator i = GetConstHeaderLinesIterator(key); + if (i == end) { + // TODO(tgreer) Convert from HeaderLines::const_iterator to + // const_header_lines_iterator without calling lines().end(), which is + // nontrivial. Look for other needless calls to lines().end(), or make + // lines().end() trivial. + return lines().end(); + } + + return const_header_lines_iterator(this, (i - header_lines_.begin())); +} + +BalsaHeaders::const_header_lines_key_iterator BalsaHeaders::GetIteratorForKey( + absl::string_view key) const { + HeaderLines::const_iterator i = GetConstHeaderLinesIterator(key); + if (i == header_lines_.end()) { + return header_lines_key_end(); + } + + return const_header_lines_key_iterator(this, (i - header_lines_.begin()), + key); +} + +BalsaHeaders::HeaderLines::const_iterator +BalsaHeaders::GetConstHeaderLinesIterator(absl::string_view key) const { + const HeaderLines::const_iterator end = header_lines_.end(); + for (HeaderLines::const_iterator i = header_lines_.begin(); i != end; ++i) { + const HeaderLineDescription& line = *i; + if (line.skip) { + continue; + } + const absl::string_view current_key( + GetPtr(line.buffer_base_idx) + line.first_char_idx, + line.key_end_idx - line.first_char_idx); + if (absl::EqualsIgnoreCase(current_key, key)) { + QUICHE_DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return i; + } + } + return end; +} + +BalsaHeaders::HeaderLines::iterator BalsaHeaders::GetHeaderLinesIterator( + absl::string_view key, BalsaHeaders::HeaderLines::iterator start) { + const HeaderLines::iterator end = header_lines_.end(); + for (HeaderLines::iterator i = start; i != end; ++i) { + const HeaderLineDescription& line = *i; + if (line.skip) { + continue; + } + const absl::string_view current_key( + GetPtr(line.buffer_base_idx) + line.first_char_idx, + line.key_end_idx - line.first_char_idx); + if (absl::EqualsIgnoreCase(current_key, key)) { + QUICHE_DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return i; + } + } + return end; +} + +BalsaHeaders::HeaderLines::iterator +BalsaHeaders::GetHeaderLinesIteratorForLastMultivaluedHeader( + absl::string_view key) { + const HeaderLines::iterator end = header_lines_.end(); + HeaderLines::iterator last_found_match; + bool found_a_match = false; + for (HeaderLines::iterator i = header_lines_.begin(); i != end; ++i) { + const HeaderLineDescription& line = *i; + if (line.skip) { + continue; + } + const absl::string_view current_key( + GetPtr(line.buffer_base_idx) + line.first_char_idx, + line.key_end_idx - line.first_char_idx); + if (absl::EqualsIgnoreCase(current_key, key)) { + QUICHE_DCHECK_GE(line.last_char_idx, line.value_begin_idx); + last_found_match = i; + found_a_match = true; + } + } + return (found_a_match ? last_found_match : end); +} + +void BalsaHeaders::GetAllOfHeader(absl::string_view key, + std::vector<absl::string_view>* out) const { + for (const_header_lines_key_iterator it = GetIteratorForKey(key); + it != lines().end(); ++it) { + out->push_back(it->second); + } +} + +void BalsaHeaders::GetAllOfHeaderIncludeRemoved( + absl::string_view key, std::vector<absl::string_view>* out) const { + const HeaderLines::const_iterator begin = header_lines_.begin(); + const HeaderLines::const_iterator end = header_lines_.end(); + for (bool add_removed : {false, true}) { + for (HeaderLines::const_iterator i = begin; i != end; ++i) { + const HeaderLineDescription& line = *i; + if ((!add_removed && line.skip) || (add_removed && !line.skip)) { + continue; + } + const absl::string_view current_key( + GetPtr(line.buffer_base_idx) + line.first_char_idx, + line.key_end_idx - line.first_char_idx); + if (absl::EqualsIgnoreCase(current_key, key)) { + QUICHE_DCHECK_GE(line.last_char_idx, line.value_begin_idx); + out->push_back(GetValueFromHeaderLineDescription(line)); + } + } + } +} + +namespace { + +// Helper function for HeaderHasValue that checks that the specified region +// within line is preceded by whitespace and a comma or beginning of line, +// and followed by whitespace and a comma or end of line. +bool SurroundedOnlyBySpacesAndCommas(stringpiece_ssize_type idx, + stringpiece_ssize_type end_idx, + absl::string_view line) { + for (idx = idx - 1; idx >= 0; --idx) { + if (line[idx] == ',') { + break; + } + if (line[idx] != ' ') { + return false; + } + } + + for (; end_idx < static_cast<int64_t>(line.size()); ++end_idx) { + if (line[end_idx] == ',') { + break; + } + if (line[end_idx] != ' ') { + return false; + } + } + return true; +} + +} // namespace + +bool BalsaHeaders::HeaderHasValueHelper(absl::string_view key, + absl::string_view value, + bool case_sensitive) const { + for (const_header_lines_key_iterator it = GetIteratorForKey(key); + it != lines().end(); ++it) { + absl::string_view line = it->second; + absl::string_view::size_type idx = + case_sensitive ? line.find(value, 0) + : strings::FindIgnoreCase(line, value); + while (idx != absl::string_view::npos) { + stringpiece_ssize_type end_idx = idx + value.size(); + if (SurroundedOnlyBySpacesAndCommas(idx, end_idx, line)) { + return true; + } + idx = line.find(value, idx + 1); + } + } + return false; +} + +bool BalsaHeaders::HasNonEmptyHeader(absl::string_view key) const { + for (const_header_lines_key_iterator it = GetIteratorForKey(key); + it != header_lines_key_end(); ++it) { + if (!it->second.empty()) { + return true; + } + } + return false; +} + +std::string BalsaHeaders::GetAllOfHeaderAsString(absl::string_view key) const { + // Use custom formatter to ignore header key and join only header values. + // absl::AlphaNumFormatter is the default formatter for absl::StrJoin(). + auto formatter = [](std::string* out, + std::pair<absl::string_view, absl::string_view> header) { + return absl::AlphaNumFormatter()(out, header.second); + }; + return absl::StrJoin(GetIteratorForKey(key), header_lines_key_end(), ",", + formatter); +} + +void BalsaHeaders::RemoveAllOfHeaderInList(const HeaderTokenList& keys) { + if (keys.empty()) { + return; + } + + // This extra copy sacrifices some performance to prevent the possible + // mistakes that the caller does not lower case the headers in keys. + // Better performance can be achieved by asking caller to lower case + // the keys and RemoveAllOfheaderInlist just does lookup. + absl::flat_hash_set<std::string> lowercase_keys; + for (const auto& key : keys) { + MaybeClearSpecialHeaderValues(key); + lowercase_keys.insert(absl::AsciiStrToLower(key)); + } + + for (HeaderLineDescription& line : header_lines_) { + if (line.skip) { + continue; + } + // Remove the header if it matches any of the keys to remove. + const size_t key_len = line.key_end_idx - line.first_char_idx; + absl::string_view key(GetPtr(line.buffer_base_idx) + line.first_char_idx, + key_len); + + std::string lowercase_key = absl::AsciiStrToLower(key); + if (lowercase_keys.count(lowercase_key) != 0) { + line.skip = true; + } + } +} + +void BalsaHeaders::RemoveAllOfHeader(absl::string_view key) { + HeaderLines::iterator it = GetHeaderLinesIterator(key, header_lines_.begin()); + RemoveAllOfHeaderStartingAt(key, it); +} + +void BalsaHeaders::RemoveAllHeadersWithPrefix(absl::string_view prefix) { + for (HeaderLines::size_type i = 0; i < header_lines_.size(); ++i) { + if (header_lines_[i].skip) { + continue; + } + + HeaderLineDescription& line = header_lines_[i]; + const size_t key_len = line.key_end_idx - line.first_char_idx; + if (key_len < prefix.size()) { + continue; + } + + const absl::string_view current_key_prefix( + GetPtr(line.buffer_base_idx) + line.first_char_idx, prefix.size()); + if (absl::EqualsIgnoreCase(current_key_prefix, prefix)) { + const absl::string_view current_key( + GetPtr(line.buffer_base_idx) + line.first_char_idx, key_len); + MaybeClearSpecialHeaderValues(current_key); + line.skip = true; + } + } +} + +bool BalsaHeaders::HasHeadersWithPrefix(absl::string_view prefix) const { + for (HeaderLines::size_type i = 0; i < header_lines_.size(); ++i) { + if (header_lines_[i].skip) { + continue; + } + + const HeaderLineDescription& line = header_lines_[i]; + if (line.key_end_idx - line.first_char_idx < prefix.size()) { + continue; + } + + const absl::string_view current_key_prefix( + GetPtr(line.buffer_base_idx) + line.first_char_idx, prefix.size()); + if (absl::EqualsIgnoreCase(current_key_prefix, prefix)) { + return true; + } + } + return false; +} + +void BalsaHeaders::GetAllOfHeaderWithPrefix( + absl::string_view prefix, + std::vector<std::pair<absl::string_view, absl::string_view>>* out) const { + for (HeaderLines::size_type i = 0; i < header_lines_.size(); ++i) { + if (header_lines_[i].skip) { + continue; + } + const HeaderLineDescription& line = header_lines_[i]; + absl::string_view key(GetPtr(line.buffer_base_idx) + line.first_char_idx, + line.key_end_idx - line.first_char_idx); + if (absl::StartsWithIgnoreCase(key, prefix)) { + out->push_back(std::make_pair( + key, + absl::string_view(GetPtr(line.buffer_base_idx) + line.value_begin_idx, + line.last_char_idx - line.value_begin_idx))); + } + } +} + +void BalsaHeaders::GetAllHeadersWithLimit( + std::vector<std::pair<absl::string_view, absl::string_view>>* out, + int limit) const { + for (HeaderLines::size_type i = 0; i < header_lines_.size(); ++i) { + if (limit >= 0 && out->size() >= static_cast<size_t>(limit)) { + return; + } + if (header_lines_[i].skip) { + continue; + } + const HeaderLineDescription& line = header_lines_[i]; + absl::string_view key(GetPtr(line.buffer_base_idx) + line.first_char_idx, + line.key_end_idx - line.first_char_idx); + out->push_back(std::make_pair( + key, + absl::string_view(GetPtr(line.buffer_base_idx) + line.value_begin_idx, + line.last_char_idx - line.value_begin_idx))); + } +} + +size_t BalsaHeaders::RemoveValue(absl::string_view key, + absl::string_view search_value) { + // Remove whitespace around search value. + absl::string_view needle = search_value; + strings::RemoveWhitespaceContext(&needle); + GFE_BUG_IF(bug_22783_2, needle != search_value) + << "Search value should not be surrounded by spaces."; + + // We have nothing to do for empty needle strings. + if (needle.empty()) { + return 0; + } + + // The return value: number of removed values. + size_t removals = 0; + + // Iterate over all header lines matching key with skip=false. + for (HeaderLines::iterator it = + GetHeaderLinesIterator(key, header_lines_.begin()); + it != header_lines_.end(); it = GetHeaderLinesIterator(key, ++it)) { + HeaderLineDescription* line = &(*it); + + // If needle given to us is longer than this header, don't consider it. + if (line->ValuesLength() < needle.size()) { + continue; + } + + // If the values are equivalent, just remove the whole line. + char* buf = GetPtr(line->buffer_base_idx); // The head of our buffer. + char* value_begin = buf + line->value_begin_idx; + // StringPiece containing values that have yet to be processed. The head of + // this stringpiece will continually move forward, and its tail + // (head+length) will always remain the same. + absl::string_view values(value_begin, line->ValuesLength()); + strings::RemoveWhitespaceContext(&values); + if (values.size() == needle.size()) { + if (values == needle) { + line->skip = true; + removals++; + } + continue; + } + + // Find all occurrences of the needle to be removed. + char* insertion = value_begin; + while (values.size() >= needle.size()) { + // Strip leading whitespace. + ssize_t cur_leading_whitespace = + strings::RemoveLeadingWhitespace(&values); + + // See if we've got a match (at least as a prefix). + bool found = absl::StartsWith(values, needle); + + // Find the entirety of this value (including trailing comma if existent). + bool comma_found = false; + size_t cur_size = 0; + + const size_t next_comma = + values.find(',', /* pos = */ found ? needle.size() : 0); + comma_found = next_comma != absl::string_view::npos; + cur_size = (comma_found ? next_comma + 1 : values.size()); + + // Make sure that our prefix match is a full match. + if (found && cur_size != needle.size()) { + absl::string_view cur(values.data(), cur_size); + if (comma_found) { + cur.remove_suffix(1); + } + strings::RemoveTrailingWhitespace(&cur); + found = (cur.size() == needle.size()); + } + + // Move as necessary (avoid move just for the sake of leading whitespace). + if (found) { + removals++; + // Remove trailing comma if we happen to have found the last value. + if (!comma_found) { + // We modify insertion since it'll be used to update last_char_idx. + insertion--; + } + } else { + if (insertion + cur_leading_whitespace != values.data()) { + // Has the side-effect of also copying any trailing whitespace. + memmove(insertion, values.data(), cur_size); + insertion += cur_size; + } else { + insertion += cur_leading_whitespace + cur_size; + } + } + + // No longer consider the current value. (Increment.) + values.remove_prefix(cur_size); + } + // Move remaining data. + if (!values.empty()) { + if (insertion != values.data()) { + memmove(insertion, values.data(), values.size()); + } + insertion += values.size(); + } + // Set new line size. + if (insertion <= value_begin) { + // All values removed. + line->skip = true; + } else { + line->last_char_idx = insertion - buf; + } + } + + return removals; +} + +size_t BalsaHeaders::GetSizeForWriteBuffer() const { + // First add the space required for the first line + line separator. + size_t write_buf_size = whitespace_4_idx_ - non_whitespace_1_idx_ + 2; + // Then add the space needed for each header line to write out + line + // separator. + const HeaderLines::size_type end = header_lines_.size(); + for (HeaderLines::size_type i = 0; i < end; ++i) { + const HeaderLineDescription& line = header_lines_[i]; + if (!line.skip) { + // Add the key size and ": ". + write_buf_size += line.key_end_idx - line.first_char_idx + 2; + // Add the value size and the line separator. + write_buf_size += line.last_char_idx - line.value_begin_idx + 2; + } + } + // Finally tack on the terminal line separator. + return write_buf_size + 2; +} + +void BalsaHeaders::DumpToString(std::string* str) const { + DumpToPrefixedString(" ", str); +} + +std::string BalsaHeaders::DebugString() const { + std::string s; + DumpToString(&s); + return s; +} + +bool BalsaHeaders::ForEachHeader( + std::function<bool(const absl::string_view key, + const absl::string_view value)> + fn) const { + int s = header_lines_.size(); + for (int i = 0; i < s; ++i) { + const HeaderLineDescription& desc = header_lines_[i]; + if (!desc.skip && desc.KeyLength() > 0) { + const char* stream_begin = GetPtr(desc.buffer_base_idx); + if (!fn(absl::string_view(stream_begin + desc.first_char_idx, + desc.KeyLength()), + absl::string_view(stream_begin + desc.value_begin_idx, + desc.ValuesLength()))) { + return false; + } + } + } + return true; +} + +void BalsaHeaders::DumpToPrefixedString(const char* spaces, + std::string* str) const { + const absl::string_view firstline = first_line(); + const int buffer_length = GetReadableBytesFromHeaderStream(); + // First check whether the header object is empty. + if (firstline.empty() && buffer_length == 0) { + absl::StrAppend(str, "\n", spaces, "<empty header>\n"); + return; + } + + // Then check whether the header is in a partially parsed state. If so, just + // dump the raw data. + if (!FramerIsDoneWriting()) { + absl::StrAppendFormat(str, "\n%s<incomplete header len: %d>\n%s%.*s\n", + spaces, buffer_length, spaces, buffer_length, + OriginalHeaderStreamBegin()); + return; + } + + // If the header is complete, then just dump them with the logical key value + // pair. + str->reserve(str->size() + GetSizeForWriteBuffer()); + absl::StrAppend(str, "\n", spaces, firstline, "\n"); + for (const auto& line : lines()) { + absl::StrAppend(str, spaces, line.first, ": ", line.second, "\n"); + } +} + +void BalsaHeaders::SetContentLength(size_t length) { + // If the content-length is already the one we want, don't do anything. + if (content_length_status_ == BalsaHeadersEnums::VALID_CONTENT_LENGTH && + content_length_ == length) { + return; + } + // If header state indicates that there is either a content length or + // transfer encoding header, remove them before adding the new content + // length. There is always the possibility that client can manually add + // either header directly and cause content_length_status_ or + // transfer_encoding_is_chunked_ to be inconsistent with the actual header. + // In the interest of efficiency, however, we will assume that clients will + // use the header object correctly and thus we will not scan the all headers + // each time this function is called. + if (content_length_status_ != BalsaHeadersEnums::NO_CONTENT_LENGTH) { + RemoveAllOfHeader(kContentLength); + } else if (transfer_encoding_is_chunked_) { + RemoveAllOfHeader(kTransferEncoding); + } + content_length_status_ = BalsaHeadersEnums::VALID_CONTENT_LENGTH; + content_length_ = length; + + AppendHeader(kContentLength, absl::StrCat(length)); +} + +void BalsaHeaders::SetTransferEncodingToChunkedAndClearContentLength() { + if (transfer_encoding_is_chunked_) { + return; + } + if (content_length_status_ != BalsaHeadersEnums::NO_CONTENT_LENGTH) { + // Per https://httpwg.org/specs/rfc7230.html#header.content-length, we can't + // send both transfer-encoding and content-length. + ClearContentLength(); + } + ReplaceOrAppendHeader(kTransferEncoding, "chunked"); + transfer_encoding_is_chunked_ = true; +} + +void BalsaHeaders::SetNoTransferEncoding() { + if (transfer_encoding_is_chunked_) { + // clears transfer_encoding_is_chunked_ + RemoveAllOfHeader(kTransferEncoding); + } +} + +void BalsaHeaders::ClearContentLength() { RemoveAllOfHeader(kContentLength); } + +bool BalsaHeaders::IsEmpty() const { + return balsa_buffer_.GetTotalBytesUsed() == 0; +} + +absl::string_view BalsaHeaders::Authority() const { return GetHeader(kHost); } + +void BalsaHeaders::ReplaceOrAppendAuthority(absl::string_view value) { + ReplaceOrAppendHeader(kHost, value); +} + +void BalsaHeaders::RemoveAuthority() { RemoveAllOfHeader(kHost); } + +void BalsaHeaders::ApplyToCookie( + std::function<void(absl::string_view cookie)> f) const { + f(GetHeader(kCookie)); +} + +void BalsaHeaders::SetResponseFirstline(absl::string_view version, + size_t parsed_response_code, + absl::string_view reason_phrase) { + SetFirstlineFromStringPieces(version, absl::StrCat(parsed_response_code), + reason_phrase); + parsed_response_code_ = parsed_response_code; +} + +void BalsaHeaders::SetFirstlineFromStringPieces(absl::string_view firstline_a, + absl::string_view firstline_b, + absl::string_view firstline_c) { + size_t line_size = + (firstline_a.size() + firstline_b.size() + firstline_c.size() + 2); + char* storage = balsa_buffer_.Reserve(line_size, &firstline_buffer_base_idx_); + char* cur_loc = storage; + + memcpy(cur_loc, firstline_a.data(), firstline_a.size()); + cur_loc += firstline_a.size(); + + *cur_loc = ' '; + ++cur_loc; + + memcpy(cur_loc, firstline_b.data(), firstline_b.size()); + cur_loc += firstline_b.size(); + + *cur_loc = ' '; + ++cur_loc; + + memcpy(cur_loc, firstline_c.data(), firstline_c.size()); + + whitespace_1_idx_ = storage - BeginningOfFirstLine(); + non_whitespace_1_idx_ = whitespace_1_idx_; + whitespace_2_idx_ = non_whitespace_1_idx_ + firstline_a.size(); + non_whitespace_2_idx_ = whitespace_2_idx_ + 1; + whitespace_3_idx_ = non_whitespace_2_idx_ + firstline_b.size(); + non_whitespace_3_idx_ = whitespace_3_idx_ + 1; + whitespace_4_idx_ = non_whitespace_3_idx_ + firstline_c.size(); +} + +void BalsaHeaders::SetRequestMethod(absl::string_view method) { + // This is the first of the three parts of the firstline. + if (method.size() <= (whitespace_2_idx_ - non_whitespace_1_idx_)) { + non_whitespace_1_idx_ = whitespace_2_idx_ - method.size(); + if (!method.empty()) { + char* stream_begin = BeginningOfFirstLine(); + memcpy(stream_begin + non_whitespace_1_idx_, method.data(), + method.size()); + } + } else { + // The new method is too large to fit in the space available for the old + // one, so we have to reformat the firstline. + SetRequestFirstlineFromStringPieces(method, request_uri(), + request_version()); + } +} + +void BalsaHeaders::SetResponseVersion(absl::string_view version) { + // Note: There is no difference between request_method() and + // response_Version(). Thus, a function to set one is equivalent to a + // function to set the other. We maintain two functions for this as it is + // much more descriptive, and makes code more understandable. + SetRequestMethod(version); +} + +void BalsaHeaders::SetRequestUri(absl::string_view uri) { + SetRequestFirstlineFromStringPieces(request_method(), uri, request_version()); +} + +void BalsaHeaders::SetResponseCode(absl::string_view code) { + // Note: There is no difference between request_uri() and response_code(). + // Thus, a function to set one is equivalent to a function to set the other. + // We maintain two functions for this as it is much more descriptive, and + // makes code more understandable. + SetRequestUri(code); +} + +void BalsaHeaders::SetParsedResponseCodeAndUpdateFirstline( + size_t parsed_response_code) { + parsed_response_code_ = parsed_response_code; + SetResponseCode(absl::StrCat(parsed_response_code)); +} + +void BalsaHeaders::SetRequestVersion(absl::string_view version) { + // This is the last of the three parts of the firstline. + // Since whitespace_3_idx and non_whitespace_3_idx may point to the same + // place, we ensure below that any available space includes space for a + // litteral space (' ') character between the second component and the third + // component. + bool fits_in_space_allowed = + version.size() + 1 <= whitespace_4_idx_ - whitespace_3_idx_; + + if (fits_in_space_allowed) { + char* stream_begin = BeginningOfFirstLine(); + *(stream_begin + whitespace_3_idx_) = ' '; + non_whitespace_3_idx_ = whitespace_3_idx_ + 1; + whitespace_4_idx_ = non_whitespace_3_idx_ + version.size(); + memcpy(stream_begin + non_whitespace_3_idx_, version.data(), + version.size()); + } else { + // The new version is too large to fit in the space available for the old + // one, so we have to reformat the firstline. + SetRequestFirstlineFromStringPieces(request_method(), request_uri(), + version); + } +} + +void BalsaHeaders::SetResponseReasonPhrase(absl::string_view reason) { + // Note: There is no difference between request_version() and + // response_reason_phrase(). Thus, a function to set one is equivalent to a + // function to set the other. We maintain two functions for this as it is + // much more descriptive, and makes code more understandable. + SetRequestVersion(reason); +} + +void BalsaHeaders::RemoveLastTokenFromHeaderValue(absl::string_view key) { + BalsaHeaders::HeaderLines::iterator it = + GetHeaderLinesIterator(key, header_lines_.begin()); + if (it == header_lines_.end()) { + DLOG(WARNING) << "Attempting to remove last token from a non-existent " + << "header \"" << key << "\""; + return; + } + + // Find the last line with that key. + BalsaHeaders::HeaderLines::iterator header_line; + do { + header_line = it; + it = GetHeaderLinesIterator(key, it + 1); + } while (it != header_lines_.end()); + + // Tokenize just that line. + BalsaHeaders::HeaderTokenList tokens; + // Find where this line is stored. + const char* stream_begin = GetPtr(header_line->buffer_base_idx); + absl::string_view value( + stream_begin + header_line->value_begin_idx, + header_line->last_char_idx - header_line->value_begin_idx); + // Tokenize. + ParseTokenList(value, &tokens); + + if (tokens.empty()) { + DLOG(WARNING) << "Attempting to remove a token from an empty header value " + << "for header \"" << key << "\""; + header_line->skip = true; // remove the whole line + } else if (tokens.size() == 1) { + header_line->skip = true; // remove the whole line + } else { + // Shrink the line size and leave the extra data in the buffer. + absl::string_view new_last_token = tokens[tokens.size() - 2]; + const char* last_char_address = + new_last_token.data() + new_last_token.size() - 1; + const char* stream_begin = GetPtr(header_line->buffer_base_idx); + + header_line->last_char_idx = last_char_address - stream_begin + 1; + } +} + +bool BalsaHeaders::ResponseCanHaveBody(int response_code) { + // For responses, can't have a body if the request was a HEAD, or if it is + // one of these response-codes. rfc2616 section 4.3 + if (response_code >= 100 && response_code < 200) { + // 1xx responses can't have bodies. + return false; + } + + // No content and Not modified responses have no body. + return (response_code != 204) && (response_code != 304); +} + +} // namespace quiche
diff --git a/quiche/common/balsa/balsa_headers.h b/quiche/common/balsa/balsa_headers.h new file mode 100644 index 0000000..e8b4efd --- /dev/null +++ b/quiche/common/balsa/balsa_headers.h
@@ -0,0 +1,1462 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// A lightweight implementation for storing HTTP headers. + +#ifndef QUICHE_COMMON_BALSA_BALSA_HEADERS_H_ +#define QUICHE_COMMON_BALSA_BALSA_HEADERS_H_ + +#include <cstddef> +#include <cstring> +#include <functional> +#include <iterator> +#include <memory> +#include <ostream> +#include <string> +#include <utility> +#include <vector> + +#include "absl/iterator/range.h" +#include "absl/memory/memory.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "quiche/common/balsa/balsa_enums.h" +#include "quiche/common/balsa/header_api.h" +#include "quiche/common/balsa/standard_header_map.h" +#include "quiche/common/platform/api/quiche_bug_tracker.h" +#include "quiche/common/platform/api/quiche_export.h" +#include "quiche/common/platform/api/quiche_logging.h" + +namespace gfe2 { +class Http2HeaderValidator; +} // namespace gfe2 + +namespace quiche { + +class BalsaHeaders; + +namespace test { +class BalsaHeadersTestPeer; +} // namespace test + +// WARNING: +// Note that -no- char* returned by any function in this +// file is null-terminated. + +// This class exists to service the specific needs of BalsaHeaders. +// +// Functional goals: +// 1) provide a backing-store for all of the StringPieces that BalsaHeaders +// returns. Every StringPiece returned from BalsaHeaders should remain +// valid until the BalsaHeader's object is cleared, or the header-line is +// erased. +// 2) provide a backing-store for BalsaFrame, which requires contiguous memory +// for its fast-path parsing functions. Note that the cost of copying is +// less than the cost of requiring the parser to do slow-path parsing, as +// it would have to check for bounds every byte, instead of every 16 bytes. +// +// This class is optimized for the case where headers are stored in one of two +// buffers. It doesn't make a lot of effort to densely pack memory-- in fact, +// it -may- be somewhat memory inefficient. This possible inefficiency allows a +// certain simplicity of implementation and speed which makes it worthwhile. +// If, in the future, better memory density is required, it should be possible +// to reuse the abstraction presented by this object to achieve those goals. +// +// In the most common use-case, this memory inefficiency should be relatively +// small. +// +// Alternate implementations of BalsaBuffer may include: +// - vector of strings, one per header line (similar to HTTPHeaders) +// - densely packed strings: +// - keep a sorted array/map of free-space linked lists or numbers. +// - use the entry that most closely first your needs. +// - at this point, perhaps just use a vector of strings, and let +// the allocator do the right thing. +// +class QUICHE_EXPORT_PRIVATE BalsaBuffer { + public: + static constexpr size_t kDefaultBlocksize = 4096; + // We have two friends here. These exist as friends as we + // want to allow access to the constructors for the test + // class and the Balsa* classes. We put this into the + // header file as we want this class to be inlined into the + // BalsaHeaders implementation, yet be testable. + friend class BalsaBufferTestSpouse; + friend class BalsaHeaders; + + // The BufferBlock is a structure used internally by the + // BalsaBuffer class to store the base buffer pointers to + // each block, as well as the important metadata for buffer + // sizes and bytes free. It *may* be possible to replace this + // with a vector<char>, but it's unclear whether moving a vector + // can invalidate pointers into it. LWG issue 2321 proposes to fix this. + struct QUICHE_EXPORT_PRIVATE BufferBlock { + public: + std::unique_ptr<char[]> buffer; + size_t buffer_size = 0; + size_t bytes_free = 0; + + size_t bytes_used() const { return buffer_size - bytes_free; } + char* start_of_unused_bytes() const { return buffer.get() + bytes_used(); } + + BufferBlock() {} + + BufferBlock(std::unique_ptr<char[]> buf, size_t size, size_t free) + : buffer(std::move(buf)), buffer_size(size), bytes_free(free) {} + + BufferBlock(const BufferBlock&) = delete; + BufferBlock& operator=(const BufferBlock&) = delete; + BufferBlock(BufferBlock&&) = default; + BufferBlock& operator=(BufferBlock&&) = default; + + // Note: allocating a fresh buffer even if we could reuse an old one may let + // us shed memory, and invalidates old StringPieces (making them easier to + // catch with asan). + void CopyFrom(const BufferBlock& rhs) { + QUICHE_DCHECK(this != &rhs); + buffer_size = rhs.buffer_size; + bytes_free = rhs.bytes_free; + if (rhs.buffer == nullptr) { + buffer = nullptr; + } else { + buffer = absl::make_unique<char[]>(buffer_size); + memcpy(buffer.get(), rhs.buffer.get(), rhs.bytes_used()); + } + } + }; + + typedef std::vector<BufferBlock> Blocks; + + BalsaBuffer(const BalsaBuffer&) = delete; + BalsaBuffer& operator=(const BalsaBuffer&) = delete; + BalsaBuffer(BalsaBuffer&&) = default; + BalsaBuffer& operator=(BalsaBuffer&&) = default; + + // Returns the total amount of memory reserved by the buffer blocks. + size_t GetTotalBufferBlockSize() const { + size_t buffer_size = 0; + for (Blocks::const_iterator iter = blocks_.begin(); iter != blocks_.end(); + ++iter) { + buffer_size += iter->buffer_size; + } + return buffer_size; + } + + // Returns the total amount of memory used by the buffer blocks. + size_t GetTotalBytesUsed() const { + size_t bytes_used = 0; + for (const auto& b : blocks_) { + bytes_used += b.bytes_used(); + } + return bytes_used; + } + + const char* GetPtr(Blocks::size_type block_idx) const { + QUICHE_DCHECK_LT(block_idx, blocks_.size()) + << block_idx << ", " << blocks_.size(); + return block_idx >= blocks_.size() ? nullptr + : blocks_[block_idx].buffer.get(); + } + + char* GetPtr(Blocks::size_type block_idx) { + QUICHE_DCHECK_LT(block_idx, blocks_.size()) + << block_idx << ", " << blocks_.size(); + return block_idx >= blocks_.size() ? nullptr + : blocks_[block_idx].buffer.get(); + } + + // This function is different from Reserve(), as it ensures that the data + // stored via subsequent calls to this function are all contiguous (and in + // the order in which these writes happened). This is essentially the same + // as a string append. + // + // You may call this function at any time between object + // construction/Clear(), and the calling of the + // NoMoreWriteToContiguousBuffer() function. + // + // You must not call this function after the NoMoreWriteToContiguousBuffer() + // function is called, unless a Clear() has been called since. + // If you do, the program will abort(). + // + // This condition is placed upon this code so that calls to Reserve() can + // append to the buffer in the first block safely, and without invaliding + // the StringPiece which it returns. + // + // This function's main intended user is the BalsaFrame class, which, + // for reasons of efficiency, requires that the buffer from which it parses + // the headers be contiguous. + // + void WriteToContiguousBuffer(absl::string_view sp) { + if (sp.empty()) { + return; + } + QUICHE_CHECK(can_write_to_contiguous_buffer_); + + if (blocks_.empty()) { + blocks_.push_back(AllocBlock()); + } + + QUICHE_DCHECK_GE(blocks_.size(), 1u); + if (blocks_[0].buffer == nullptr && sp.size() <= blocksize_) { + blocks_[0] = AllocBlock(); + memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size()); + } else if (blocks_[0].bytes_free < sp.size()) { + // the first block isn't big enough, resize it. + const size_t old_storage_size_used = blocks_[0].bytes_used(); + // Increase to at least 2*old_storage_size_used; if sp.size() is larger, + // we'll increase by that amount. + const size_t new_storage_size = + old_storage_size_used + (old_storage_size_used < sp.size() + ? sp.size() + : old_storage_size_used); + std::unique_ptr<char[]> new_storage{new char[new_storage_size]}; + char* old_storage = blocks_[0].buffer.get(); + if (old_storage_size_used != 0u) { + memcpy(new_storage.get(), old_storage, old_storage_size_used); + } + memcpy(new_storage.get() + old_storage_size_used, sp.data(), sp.size()); + blocks_[0].buffer = std::move(new_storage); + blocks_[0].bytes_free = new_storage_size - old_storage_size_used; + blocks_[0].buffer_size = new_storage_size; + } else { + memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size()); + } + blocks_[0].bytes_free -= sp.size(); + } + + void NoMoreWriteToContiguousBuffer() { + can_write_to_contiguous_buffer_ = false; + } + + // Reserves "permanent" storage of the size indicated. Returns a pointer to + // the beginning of that storage, and assigns the index of the block used to + // block_buffer_idx. This function uses the first block IFF the + // NoMoreWriteToContiguousBuffer function has been called since the last + // Clear/Construction. + char* Reserve(size_t size, Blocks::size_type* block_buffer_idx) { + if (blocks_.empty()) { + blocks_.push_back(AllocBlock()); + } + + // There should always be a 'first_block', even if it + // contains nothing. + QUICHE_DCHECK_GE(blocks_.size(), 1u); + BufferBlock* block = nullptr; + Blocks::size_type block_idx = can_write_to_contiguous_buffer_ ? 1 : 0; + for (; block_idx < blocks_.size(); ++block_idx) { + if (blocks_[block_idx].bytes_free >= size) { + block = &blocks_[block_idx]; + break; + } + } + if (block == nullptr) { + if (blocksize_ < size) { + blocks_.push_back(AllocCustomBlock(size)); + } else { + blocks_.push_back(AllocBlock()); + } + block = &blocks_.back(); + } + + char* storage = block->start_of_unused_bytes(); + block->bytes_free -= size; + if (block_buffer_idx != nullptr) { + *block_buffer_idx = block_idx; + } + return storage; + } + + void Clear() { + blocks_.clear(); + blocks_.shrink_to_fit(); + can_write_to_contiguous_buffer_ = true; + } + + void CopyFrom(const BalsaBuffer& b) { + blocks_.resize(b.blocks_.size()); + for (Blocks::size_type i = 0; i < blocks_.size(); ++i) { + blocks_[i].CopyFrom(b.blocks_[i]); + } + blocksize_ = b.blocksize_; + can_write_to_contiguous_buffer_ = b.can_write_to_contiguous_buffer_; + } + + const char* StartOfFirstBlock() const { + QUICHE_BUG_IF(bug_if_1182_1, blocks_.empty()) + << "First block not allocated yet!"; + return blocks_.empty() ? nullptr : blocks_[0].buffer.get(); + } + + const char* EndOfFirstBlock() const { + QUICHE_BUG_IF(bug_if_1182_2, blocks_.empty()) + << "First block not allocated yet!"; + return blocks_.empty() ? nullptr : blocks_[0].start_of_unused_bytes(); + } + + size_t GetReadableBytesOfFirstBlock() const { + return blocks_.empty() ? 0 : blocks_[0].bytes_used(); + } + + bool can_write_to_contiguous_buffer() const { + return can_write_to_contiguous_buffer_; + } + size_t blocksize() const { return blocksize_; } + Blocks::size_type num_blocks() const { return blocks_.size(); } + size_t buffer_size(size_t idx) const { return blocks_[idx].buffer_size; } + size_t bytes_used(size_t idx) const { return blocks_[idx].bytes_used(); } + + private: + BalsaBuffer() + : blocksize_(kDefaultBlocksize), can_write_to_contiguous_buffer_(true) {} + + explicit BalsaBuffer(size_t blocksize) + : blocksize_(blocksize), can_write_to_contiguous_buffer_(true) {} + + BufferBlock AllocBlock() { return AllocCustomBlock(blocksize_); } + + BufferBlock AllocCustomBlock(size_t blocksize) { + return BufferBlock{absl::make_unique<char[]>(blocksize), blocksize, + blocksize}; + } + + // A container of BufferBlocks + Blocks blocks_; + + // The default allocation size for a block. + // In general, blocksize_ bytes will be allocated for + // each buffer. + size_t blocksize_; + + // If set to true, then the first block cannot be used for Reserve() calls as + // the WriteToContiguous... function will modify the base pointer for this + // block, and the Reserve() calls need to be sure that the base pointer will + // not be changing in order to provide the user with StringPieces which + // continue to be valid. + bool can_write_to_contiguous_buffer_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +// All of the functions in the BalsaHeaders class use string pieces, by either +// using the StringPiece class, or giving an explicit size and char* (as these +// are the native representation for these string pieces). +// This is done for several reasons. +// 1) This minimizes copying/allocation/deallocation as compared to using +// string parameters +// 2) This reduces the number of strlen() calls done (as the length of any +// string passed in is relatively likely to be known at compile time, and for +// those strings passed back we obviate the need for a strlen() to determine +// the size of new storage allocations if a new allocation is required. +// 3) This class attempts to store all of its data in two linear buffers in +// order to enhance the speed of parsing and writing out to a buffer. As a +// result, many string pieces are -not- terminated by '\0', and are not +// c-strings. Since this is the case, we must delineate the length of the +// string explicitly via a length. +// +// WARNING: The side effect of using StringPiece is that if the underlying +// buffer changes (due to modifying the headers) the StringPieces which point +// to the data which was modified, may now contain "garbage", and should not +// be dereferenced. +// For example, If you fetch some component of the first-line, (request or +// response), and then you modify the first line, the StringPieces you +// originally received from the original first-line may no longer be valid). +// +// StringPieces pointing to pieces of header lines which have not been +// erased() or modified should be valid until the object is cleared or +// destroyed. +// +// Key comparisons are case-insensitive. + +class QUICHE_EXPORT_PRIVATE BalsaHeaders : public HeaderApi { + public: + // Each header line is parsed into a HeaderLineDescription, which maintains + // pointers into the BalsaBuffer. + struct HeaderLineDescription; + + using HeaderTokenList = std::vector<absl::string_view>; + + // An iterator for walking through all the header lines. + class const_header_lines_iterator; + + // An iterator that only stops at lines with a particular key + // (case-insensitive). See also GetIteratorForKey. + // + // Check against header_lines_key_end() to determine when iteration is + // finished. lines().end() will also work. + class const_header_lines_key_iterator; + + // Set of names of headers that might have multiple values. The GFE2 and + // Envoy/GFE3 use different sets, and CoalesceOption::kCoalesce can be used + // to match Envoy behavior in WriteToBuffer(). + using MultivaluedHeadersSet = + absl::flat_hash_set<absl::string_view, StringPieceCaseHash, + StringPieceCaseEqual>; + + // Map of key => vector<value>, where vector contains ordered list of all + // values for |key| (ignoring the casing). + using MultivaluedHeadersValuesMap = + absl::flat_hash_map<absl::string_view, std::vector<absl::string_view>, + StringPieceCaseHash, StringPieceCaseEqual>; + + // TODO(fenix): Revisit the amount of bytes initially allocated to the second + // block of the balsa_buffer_. It may make sense to pre-allocate some amount + // (roughly the amount we'd append in new headers such as X-User-Ip, etc.) + BalsaHeaders() + : balsa_buffer_(4096), + content_length_(0), + content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH), + parsed_response_code_(0), + firstline_buffer_base_idx_(0), + whitespace_1_idx_(0), + non_whitespace_1_idx_(0), + whitespace_2_idx_(0), + non_whitespace_2_idx_(0), + whitespace_3_idx_(0), + non_whitespace_3_idx_(0), + whitespace_4_idx_(0), + transfer_encoding_is_chunked_(false) {} + + explicit BalsaHeaders(size_t bufsize) + : balsa_buffer_(bufsize), + content_length_(0), + content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH), + parsed_response_code_(0), + firstline_buffer_base_idx_(0), + whitespace_1_idx_(0), + non_whitespace_1_idx_(0), + whitespace_2_idx_(0), + non_whitespace_2_idx_(0), + whitespace_3_idx_(0), + non_whitespace_3_idx_(0), + whitespace_4_idx_(0), + transfer_encoding_is_chunked_(false) {} + + // Copying BalsaHeaders is expensive, so require that it be visible. + BalsaHeaders(const BalsaHeaders&) = delete; + BalsaHeaders& operator=(const BalsaHeaders&) = delete; + BalsaHeaders(BalsaHeaders&&) = default; + BalsaHeaders& operator=(BalsaHeaders&&) = default; + + // Returns a range that represents all of the header lines. + absl::iterator_range<const_header_lines_iterator> lines() const; + + // Returns an iterator range consisting of the header lines matching key. + // String backing 'key' must remain valid for lifetime of range. + absl::iterator_range<const_header_lines_key_iterator> lines( + absl::string_view key) const; + + // Returns a forward-only iterator that only stops at lines matching key. + // String backing 'key' must remain valid for lifetime of iterator. + // + // Check returned iterator against header_lines_key_end() to determine when + // iteration is finished. + // + // Consider calling lines(key)--it may be more readable. + const_header_lines_key_iterator GetIteratorForKey( + absl::string_view key) const; + + const_header_lines_key_iterator header_lines_key_end() const; + + void erase(const const_header_lines_iterator& it); + + void Clear(); + + // Explicit copy functions to avoid risk of accidental copies. + BalsaHeaders Copy() const { + BalsaHeaders copy; + copy.CopyFrom(*this); + return copy; + } + void CopyFrom(const BalsaHeaders& other); + + // Replaces header entries with key 'key' if they exist, or appends + // a new header if none exist. See 'AppendHeader' below for additional + // comments about ContentLength and TransferEncoding headers. Note that this + // will allocate new storage every time that it is called. + // TODO(fenix): modify this function to reuse existing storage + // if it is available. + void ReplaceOrAppendHeader(absl::string_view key, + absl::string_view value) override; + + // Append a new header entry to the header object. Clients who wish to append + // Content-Length header should use SetContentLength() method instead of + // adding the content length header using AppendHeader (manually adding the + // content length header will not update the content_length_ and + // content_length_status_ values). + // Similarly, clients who wish to add or remove the transfer encoding header + // in order to apply or remove chunked encoding should use + // SetTransferEncodingToChunkedAndClearContentLength() or + // SetNoTransferEncoding() instead. + void AppendHeader(absl::string_view key, absl::string_view value) override; + + // Appends ',value' to an existing header named 'key'. If no header with the + // correct key exists, it will call AppendHeader(key, value). Calling this + // function on a key which exists several times in the headers will produce + // unpredictable results. + void AppendToHeader(absl::string_view key, absl::string_view value) override; + + // Appends ', value' to an existing header named 'key'. If no header with the + // correct key exists, it will call AppendHeader(key, value). Calling this + // function on a key which exists several times in the headers will produce + // unpredictable results. + void AppendToHeaderWithCommaAndSpace(absl::string_view key, + absl::string_view value) override; + + // Returns the value corresponding to the given header key. Returns an empty + // string if the header key does not exist. For headers that may consist of + // multiple lines, use GetAllOfHeader() instead. + absl::string_view GetHeader(absl::string_view key) const override; + + // Iterates over all currently valid header lines, appending their + // values into the vector 'out', in top-to-bottom order. + // Header-lines which have been erased are not currently valid, and + // will not have their values appended. Empty values will be + // represented as empty string. If 'key' doesn't exist in the headers at + // all, out will not be changed. We do not clear the vector out + // before adding new entries. If there are header lines with matching + // key but empty value then they are also added to the vector out. + // (Basically empty values are not treated in any special manner). + // + // Example: + // Input header: + // "GET / HTTP/1.0\r\n" + // "key1: v1\r\n" + // "key1: \r\n" + // "key1:\r\n" + // "key1: v1\r\n" + // "key1:v2\r\n" + // + // vector out is initially: ["foo"] + // vector out after GetAllOfHeader("key1", &out) is: + // ["foo", "v1", "", "", "v1", "v2"] + // + // See gfe::header_properties::IsMultivaluedHeader() for which headers + // GFE treats as being multivalued. + + // Make all methods in this overload set visible, and override just one. + using HeaderApi::GetAllOfHeader; + void GetAllOfHeader(absl::string_view key, + std::vector<absl::string_view>* out) const override; + + // Same as above, but iterates over all header lines including removed ones. + // Appends their values into the vector 'out' in top-to-bottom order, + // first all valid headers then all that were removed. + void GetAllOfHeaderIncludeRemoved(absl::string_view key, + std::vector<absl::string_view>* out) const; + + // Joins all values for `key` into a comma-separated string. + std::string GetAllOfHeaderAsString(absl::string_view key) const override; + + // Determine if a given header is present. Case-insensitive. + inline bool HasHeader(absl::string_view key) const override { + return GetConstHeaderLinesIterator(key) != header_lines_.end(); + } + + // Goes through all headers with key 'key' and checks to see if one of the + // values is 'value'. Returns true if there are headers with the desired key + // and value, false otherwise. Case-insensitive for the key; case-sensitive + // for the value. + bool HeaderHasValue(absl::string_view key, + absl::string_view value) const override { + return HeaderHasValueHelper(key, value, true); + } + // Same as above, but also case-insensitive for the value. + bool HeaderHasValueIgnoreCase(absl::string_view key, + absl::string_view value) const override { + return HeaderHasValueHelper(key, value, false); + } + + // Returns true iff any header 'key' exists with non-empty value. + bool HasNonEmptyHeader(absl::string_view key) const override; + + const_header_lines_iterator GetHeaderPosition(absl::string_view key) const; + + // Removes all headers in given set |keys| at once efficiently. Keys + // are case insensitive. + // + // Alternatives considered: + // + // 1. Use string_hash_set<>, the caller (such as ClearHopByHopHeaders) lower + // cases the keys and RemoveAllOfHeaderInList just does lookup. This according + // to microbenchmark gives the best performance because it does not require + // an extra copy of the hash table. However, it is not taken because of the + // possible risk that caller could forget to lowercase the keys. + // + // 2. Use flat_hash_set<StringPiece, StringPieceCaseHash,StringPieceCaseEqual> + // or string_hash_set<StringPieceCaseHash, StringPieceCaseEqual>. Both appear + // to have (much) worse performance with WithoutDupToken and LongHeader case + // in microbenchmark. + void RemoveAllOfHeaderInList(const HeaderTokenList& keys) override; + + void RemoveAllOfHeader(absl::string_view key) override; + + // Removes all headers starting with 'key' [case insensitive] + void RemoveAllHeadersWithPrefix(absl::string_view key) override; + + // Returns true if we have at least one header with given prefix + // [case insensitive]. Currently for test use only. + bool HasHeadersWithPrefix(absl::string_view key) const override; + + // Returns the key value pairs for all headers where the header key begins + // with the specified prefix. + void GetAllOfHeaderWithPrefix( + absl::string_view prefix, + std::vector<std::pair<absl::string_view, absl::string_view>>* out) + const override; + + void GetAllHeadersWithLimit( + std::vector<std::pair<absl::string_view, absl::string_view>>* out, + int limit) const override; + + // Removes all values equal to a given value from header lines with given key. + // All string operations done here are case-sensitive. + // If a header line has only values matching the given value, the entire + // line is removed. + // If the given value is found in a multi-value header line mixed with other + // values, the line is edited in-place to remove the values. + // Returns the number of occurrences of value that were removed. + // This method runs in linear time. + size_t RemoveValue(absl::string_view key, absl::string_view value); + + // Returns the upper bound on the required buffer space to fully write out + // the header object (this include the first line, all header lines, and the + // final line separator that marks the ending of the header). + size_t GetSizeForWriteBuffer() const override; + + // Indicates if to serialize headers with lower-case header keys. + enum class CaseOption { kNoModification, kLowercase, kPropercase }; + + // Indicates if to coalesce headers with multiple values to match Envoy/GFE3. + enum class CoalesceOption { kNoCoalesce, kCoalesce }; + + // The following WriteHeader* methods are template member functions that + // place one requirement on the Buffer class: it must implement a Write + // method that takes a pointer and a length. The buffer passed in is not + // required to be stretchable. For non-stretchable buffers, the user must + // call GetSizeForWriteBuffer() to find out the upper bound on the output + // buffer space required to make sure that the entire header is serialized. + // BalsaHeaders will not check that there is adequate space in the buffer + // object during the write. + + // Writes the entire header and the final line separator that marks the end + // of the HTTP header section to the buffer. After this method returns, no + // more header data should be written to the buffer. + template <typename Buffer> + void WriteHeaderAndEndingToBuffer(Buffer* buffer, CaseOption case_option, + CoalesceOption coalesce_option) const { + WriteToBuffer(buffer, case_option, coalesce_option); + WriteHeaderEndingToBuffer(buffer); + } + + template <typename Buffer> + void WriteHeaderAndEndingToBuffer(Buffer* buffer) const { + WriteHeaderAndEndingToBuffer(buffer, CaseOption::kNoModification, + CoalesceOption::kNoCoalesce); + } + + // Writes the final line separator to the buffer to terminate the HTTP header + // section. After this method returns, no more header data should be written + // to the buffer. + template <typename Buffer> + static void WriteHeaderEndingToBuffer(Buffer* buffer) { + buffer->WriteString("\r\n"); + } + + // Writes the entire header to the buffer without the line separator that + // terminates the HTTP header. This lets users append additional header lines + // using WriteHeaderLineToBuffer and then terminate the header with + // WriteHeaderEndingToBuffer as the header is serialized to the buffer, + // without having to first copy the header. + template <typename Buffer> + void WriteToBuffer(Buffer* buffer, CaseOption case_option, + CoalesceOption coalesce_option) const; + + template <typename Buffer> + void WriteToBuffer(Buffer* buffer) const { + WriteToBuffer(buffer, CaseOption::kNoModification, + CoalesceOption::kNoCoalesce); + } + + // Used by WriteToBuffer to coalesce multiple values of headers listed in + // |multivalued_headers| into a single comma-separated value. Public for test. + template <typename Buffer> + void WriteToBufferCoalescingMultivaluedHeaders( + Buffer* buffer, const MultivaluedHeadersSet& multivalued_headers, + CaseOption case_option) const; + + // Populates |multivalues| with values of |header_lines_| with keys present + // in |multivalued_headers| set. + void GetValuesOfMultivaluedHeaders( + const MultivaluedHeadersSet& multivalued_headers, + MultivaluedHeadersValuesMap* multivalues) const; + + static std::string ToPropercase(absl::string_view header) { + std::string copy = std::string(header); + bool should_uppercase = true; + for (char& c : copy) { + if (!absl::ascii_isalnum(c)) { + should_uppercase = true; + } else if (should_uppercase) { + c = absl::ascii_toupper(c); + should_uppercase = false; + } else { + c = absl::ascii_tolower(c); + } + } + return copy; + } + + template <typename Buffer> + void WriteHeaderKeyToBuffer(Buffer* buffer, absl::string_view key, + CaseOption case_option) const { + if (case_option == CaseOption::kLowercase) { + buffer->WriteString(absl::AsciiStrToLower(key)); + } else if (case_option == CaseOption::kPropercase) { + const auto& header_set = quiche::GetStandardHeaderSet(); + auto it = header_set.find(key); + if (it != header_set.end()) { + buffer->WriteString(*it); + } else { + buffer->WriteString(ToPropercase(key)); + } + } else { + buffer->WriteString(key); + } + } + + // Takes a header line in the form of a key/value pair and append it to the + // buffer. This function should be called after WriteToBuffer to + // append additional header lines to the header without copying the header. + // When the user is done with appending to the buffer, + // WriteHeaderEndingToBuffer must be used to terminate the HTTP + // header in the buffer. This method is a no-op if key is empty. + template <typename Buffer> + void WriteHeaderLineToBuffer(Buffer* buffer, absl::string_view key, + absl::string_view value, + CaseOption case_option) const { + // If the key is empty, we don't want to write the rest because it + // will not be a well-formed header line. + if (!key.empty()) { + WriteHeaderKeyToBuffer(buffer, key, case_option); + buffer->WriteString(": "); + buffer->WriteString(value); + buffer->WriteString("\r\n"); + } + } + + // Takes a header line in the form of a key and vector of values and appends + // it to the buffer. This function should be called after WriteToBuffer to + // append additional header lines to the header without copying the header. + // When the user is done with appending to the buffer, + // WriteHeaderEndingToBuffer must be used to terminate the HTTP + // header in the buffer. This method is a no-op if the |key| is empty. + template <typename Buffer> + void WriteHeaderLineValuesToBuffer( + Buffer* buffer, absl::string_view key, + const std::vector<absl::string_view>& values, + CaseOption case_option) const { + // If the key is empty, we don't want to write the rest because it + // will not be a well-formed header line. + if (!key.empty()) { + WriteHeaderKeyToBuffer(buffer, key, case_option); + buffer->WriteString(": "); + for (auto it = values.begin();;) { + buffer->WriteString(*it); + if (++it == values.end()) { + break; + } + buffer->WriteString(","); + } + buffer->WriteString("\r\n"); + } + } + + // Dump the textural representation of the header object to a string, which + // is suitable for writing out to logs. All CRLF will be printed out as \n. + // This function can be called on a header object in any state. Raw header + // data will be printed out if the header object is not completely parsed, + // e.g., when there was an error in the middle of parsing. + // The header content is appended to the string; the original content is not + // cleared. + // If used in test cases, WillNotWriteFromFramer() may be of interest. + void DumpToString(std::string* str) const; + std::string DebugString() const override; + + bool ForEachHeader(std::function<bool(const absl::string_view key, + const absl::string_view value)> + fn) const override; + + void DumpToPrefixedString(const char* spaces, std::string* str) const; + + absl::string_view first_line() const { + QUICHE_DCHECK_GE(whitespace_4_idx_, non_whitespace_1_idx_); + return whitespace_4_idx_ == non_whitespace_1_idx_ + ? "" + : absl::string_view( + BeginningOfFirstLine() + non_whitespace_1_idx_, + whitespace_4_idx_ - non_whitespace_1_idx_); + } + std::string first_line_of_request() const override { + return std::string(first_line()); + } + + // Returns the parsed value of the response code if it has been parsed. + // Guaranteed to return 0 when unparsed (though it is a much better idea to + // verify that the BalsaFrame had no errors while parsing). + // This may return response codes which are outside the normal bounds of + // HTTP response codes-- it is up to the user of this class to ensure that + // the response code is one which is interpretable. + size_t parsed_response_code() const override { return parsed_response_code_; } + + absl::string_view request_method() const override { + QUICHE_DCHECK_GE(whitespace_2_idx_, non_whitespace_1_idx_); + return whitespace_2_idx_ == non_whitespace_1_idx_ + ? "" + : absl::string_view( + BeginningOfFirstLine() + non_whitespace_1_idx_, + whitespace_2_idx_ - non_whitespace_1_idx_); + } + + absl::string_view response_version() const override { + // Note: There is no difference between request_method() and + // response_version(). They both could be called + // GetFirstTokenFromFirstline()... but that wouldn't be anywhere near as + // descriptive. + return request_method(); + } + + absl::string_view request_uri() const override { + QUICHE_DCHECK_GE(whitespace_3_idx_, non_whitespace_2_idx_); + return whitespace_3_idx_ == non_whitespace_2_idx_ + ? "" + : absl::string_view( + BeginningOfFirstLine() + non_whitespace_2_idx_, + whitespace_3_idx_ - non_whitespace_2_idx_); + } + + absl::string_view response_code() const override { + // Note: There is no difference between request_uri() and response_code(). + // They both could be called GetSecondtTokenFromFirstline(), but, as noted + // in an earlier comment, that wouldn't be as descriptive. + return request_uri(); + } + + absl::string_view request_version() const override { + QUICHE_DCHECK_GE(whitespace_4_idx_, non_whitespace_3_idx_); + return whitespace_4_idx_ == non_whitespace_3_idx_ + ? "" + : absl::string_view( + BeginningOfFirstLine() + non_whitespace_3_idx_, + whitespace_4_idx_ - non_whitespace_3_idx_); + } + + absl::string_view response_reason_phrase() const override { + // Note: There is no difference between request_version() and + // response_reason_phrase(). They both could be called + // GetThirdTokenFromFirstline(), but, as noted in an earlier comment, that + // wouldn't be as descriptive. + return request_version(); + } + + void SetRequestFirstlineFromStringPieces(absl::string_view method, + absl::string_view uri, + absl::string_view version) { + SetFirstlineFromStringPieces(method, uri, version); + } + + void SetResponseFirstline(absl::string_view version, + size_t parsed_response_code, + absl::string_view reason_phrase); + + // These functions are exactly the same, except that their names are + // different. This is done so that the code using this class is more + // expressive. + void SetRequestMethod(absl::string_view method) override; + void SetResponseVersion(absl::string_view version) override; + + void SetRequestUri(absl::string_view uri) override; + void SetResponseCode(absl::string_view code) override; + void set_parsed_response_code(size_t parsed_response_code) { + parsed_response_code_ = parsed_response_code; + } + void SetParsedResponseCodeAndUpdateFirstline( + size_t parsed_response_code) override; + + // These functions are exactly the same, except that their names are + // different. This is done so that the code using this class is more + // expressive. + void SetRequestVersion(absl::string_view version) override; + void SetResponseReasonPhrase(absl::string_view reason_phrase) override; + + // Simple accessors to some of the internal state + bool transfer_encoding_is_chunked() const { + return transfer_encoding_is_chunked_; + } + + static bool ResponseCodeImpliesNoBody(size_t code) { + // From HTTP spec section 6.1.1 all 1xx responses must not have a body, + // as well as 204 No Content and 304 Not Modified. + return ((code >= 100) && (code <= 199)) || (code == 204) || (code == 304); + } + + // Note: never check this for requests. Nothing bad will happen if you do, + // but spec does not allow requests framed by connection close. + // TODO(vitaliyl): refactor. + bool is_framed_by_connection_close() const { + // We declare that response is framed by connection close if it has no + // content-length, no transfer encoding, and is allowed to have a body by + // the HTTP spec. + // parsed_response_code_ is 0 for requests, so ResponseCodeImpliesNoBody + // will return false. + return (content_length_status_ == BalsaHeadersEnums::NO_CONTENT_LENGTH) && + !transfer_encoding_is_chunked_ && + !ResponseCodeImpliesNoBody(parsed_response_code_); + } + + size_t content_length() const override { return content_length_; } + BalsaHeadersEnums::ContentLengthStatus content_length_status() const { + return content_length_status_; + } + bool content_length_valid() const override { + return content_length_status_ == BalsaHeadersEnums::VALID_CONTENT_LENGTH; + } + + // SetContentLength, SetTransferEncodingToChunkedAndClearContentLength, and + // SetNoTransferEncoding modifies the header object to use + // content-length and transfer-encoding headers in a consistent + // manner. They set all internal flags and status so client can get + // a consistent view from various accessors. + void SetContentLength(size_t length) override; + // Sets transfer-encoding to chunked and updates internal state. + void SetTransferEncodingToChunkedAndClearContentLength() override; + // Removes transfer-encoding headers and updates internal state. + void SetNoTransferEncoding() override; + + // If you have a response that needs framing by connection close, use this + // method instead of RemoveAllOfHeader("Content-Length"). Has no effect if + // transfer_encoding_is_chunked(). + void ClearContentLength(); + + // This should be called if balsa headers are created entirely manually (not + // by any of the framer classes) to make sure that function calls like + // DumpToString will work correctly. + void WillNotWriteFromFramer() { + balsa_buffer_.NoMoreWriteToContiguousBuffer(); + } + + // True if DoneWritingFromFramer or WillNotWriteFromFramer is called. + bool FramerIsDoneWriting() const { + return !balsa_buffer_.can_write_to_contiguous_buffer(); + } + + bool IsEmpty() const override; + + // From HeaderApi and ConstHeaderApi. + absl::string_view Authority() const override; + void ReplaceOrAppendAuthority(absl::string_view value) override; + void RemoveAuthority() override; + void ApplyToCookie( + std::function<void(absl::string_view cookie)> f) const override; + + void set_enforce_header_policy(bool enforce) override { + enforce_header_policy_ = enforce; + } + + // Removes the last token from the header value. In the presence of multiple + // header lines with given key, will remove the last token of the last line. + // Can be useful if the last encoding has to be removed. + void RemoveLastTokenFromHeaderValue(absl::string_view key); + + // Gets the list of names of headers that are multivalued in Envoy. + static const MultivaluedHeadersSet& multivalued_envoy_headers(); + + // Returns true if HTTP responses with this response code have bodies. + static bool ResponseCanHaveBody(int response_code); + + // Given a pointer to the beginning and the end of the header value + // in some buffer, populates tokens list with beginning and end indices + // of all tokens present in the value string. + static void ParseTokenList(absl::string_view header_value, + HeaderTokenList* tokens); + + private: + typedef std::vector<HeaderLineDescription> HeaderLines; + + class iterator_base; + + friend class BalsaFrame; + friend class gfe2::Http2HeaderValidator; + friend class SpdyPayloadFramer; + friend class HTTPMessage; + friend class test::BalsaHeadersTestPeer; + + friend bool ParseHTTPFirstLine(const char* begin, const char* end, + bool is_request, BalsaHeaders* headers, + BalsaFrameEnums::ErrorCode* error_code); + + // Reverse iterators have been removed for lack of use, refer to + // cl/30618773 in case they are needed. + + const char* BeginningOfFirstLine() const { + return GetPtr(firstline_buffer_base_idx_); + } + + char* BeginningOfFirstLine() { return GetPtr(firstline_buffer_base_idx_); } + + char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) { + return balsa_buffer_.GetPtr(block_idx); + } + + const char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) const { + return balsa_buffer_.GetPtr(block_idx); + } + + void WriteFromFramer(const char* ptr, size_t size) { + balsa_buffer_.WriteToContiguousBuffer(absl::string_view(ptr, size)); + } + + void DoneWritingFromFramer() { + balsa_buffer_.NoMoreWriteToContiguousBuffer(); + } + + const char* OriginalHeaderStreamBegin() const { + return balsa_buffer_.StartOfFirstBlock(); + } + + const char* OriginalHeaderStreamEnd() const { + return balsa_buffer_.EndOfFirstBlock(); + } + + size_t GetReadableBytesFromHeaderStream() const { + return balsa_buffer_.GetReadableBytesOfFirstBlock(); + } + + void GetReadablePtrFromHeaderStream(const char** p, size_t* s) { + *p = OriginalHeaderStreamBegin(); + *s = GetReadableBytesFromHeaderStream(); + } + + absl::string_view GetValueFromHeaderLineDescription( + const HeaderLineDescription& line) const; + + void AddAndMakeDescription(absl::string_view key, absl::string_view value, + HeaderLineDescription* d); + + void AppendAndMakeDescription(absl::string_view key, absl::string_view value, + HeaderLineDescription* d); + + // Removes all header lines with the given key starting at start. + void RemoveAllOfHeaderStartingAt(absl::string_view key, + HeaderLines::iterator start); + + HeaderLines::const_iterator GetConstHeaderLinesIterator( + absl::string_view key) const; + + HeaderLines::iterator GetHeaderLinesIterator(absl::string_view key, + HeaderLines::iterator start); + + HeaderLines::iterator GetHeaderLinesIteratorForLastMultivaluedHeader( + absl::string_view key); + + template <typename IteratorType> + const IteratorType HeaderLinesBeginHelper() const; + + template <typename IteratorType> + const IteratorType HeaderLinesEndHelper() const; + + // Helper function for HeaderHasValue and HeaderHasValueIgnoreCase that + // does most of the work. + bool HeaderHasValueHelper(absl::string_view key, absl::string_view value, + bool case_sensitive) const; + + // Called by header removal methods to reset internal values for transfer + // encoding or content length if we're removing the corresponding headers. + void MaybeClearSpecialHeaderValues(absl::string_view key); + + void SetFirstlineFromStringPieces(absl::string_view firstline_a, + absl::string_view firstline_b, + absl::string_view firstline_c); + BalsaBuffer balsa_buffer_; + + size_t content_length_; + BalsaHeadersEnums::ContentLengthStatus content_length_status_; + size_t parsed_response_code_; + // HTTP firstlines all have the following structure: + // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF + // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n" + // ws1 nws1 ws2 nws2 ws3 nws3 ws4 + // | [-------) [-------) [----------------) + // REQ: method request_uri version + // RESP: version statuscode reason + // + // The first NONWS->LWS component we'll call firstline_a. + // The second firstline_b, and the third firstline_c. + // + // firstline_a goes from nws1 to (but not including) ws2 + // firstline_b goes from nws2 to (but not including) ws3 + // firstline_c goes from nws3 to (but not including) ws4 + // + // In the code: + // ws1 == whitespace_1_idx_ + // nws1 == non_whitespace_1_idx_ + // ws2 == whitespace_2_idx_ + // nws2 == non_whitespace_2_idx_ + // ws3 == whitespace_3_idx_ + // nws3 == non_whitespace_3_idx_ + // ws4 == whitespace_4_idx_ + BalsaBuffer::Blocks::size_type firstline_buffer_base_idx_; + size_t whitespace_1_idx_; + size_t non_whitespace_1_idx_; + size_t whitespace_2_idx_; + size_t non_whitespace_2_idx_; + size_t whitespace_3_idx_; + size_t non_whitespace_3_idx_; + size_t whitespace_4_idx_; + + bool transfer_encoding_is_chunked_; + + // If true, QUICHE_BUG if a header that starts with an invalid prefix is + // explicitly set. + bool enforce_header_policy_ = true; + + HeaderLines header_lines_; +}; + +// Succinctly describes one header line as indices into a buffer. +struct BalsaHeaders::HeaderLineDescription { + HeaderLineDescription(size_t first_character_index, size_t key_end_index, + size_t value_begin_index, size_t last_character_index, + size_t buffer_base_index) + : first_char_idx(first_character_index), + key_end_idx(key_end_index), + value_begin_idx(value_begin_index), + last_char_idx(last_character_index), + buffer_base_idx(buffer_base_index), + skip(false) {} + + HeaderLineDescription() + : first_char_idx(0), + key_end_idx(0), + value_begin_idx(0), + last_char_idx(0), + buffer_base_idx(0), + skip(false) {} + + size_t KeyLength() const { + QUICHE_DCHECK_GE(key_end_idx, first_char_idx); + return key_end_idx - first_char_idx; + } + size_t ValuesLength() const { + QUICHE_DCHECK_GE(last_char_idx, value_begin_idx); + return last_char_idx - value_begin_idx; + } + + size_t first_char_idx; + size_t key_end_idx; + size_t value_begin_idx; + size_t last_char_idx; + BalsaBuffer::Blocks::size_type buffer_base_idx; + bool skip; +}; + +// Base class for iterating the headers in a BalsaHeaders object, returning a +// pair of string_view's for each header. +class BalsaHeaders::iterator_base + : public std::iterator<std::forward_iterator_tag, + std::pair<absl::string_view, absl::string_view>> { + public: + // default constructor. + iterator_base() : headers_(nullptr), idx_(0) {} + + // copy constructor. + iterator_base(const iterator_base& it) + : headers_(it.headers_), idx_(it.idx_) {} + + std::pair<absl::string_view, absl::string_view>& operator*() const { + return Lookup(idx_); + } + + std::pair<absl::string_view, absl::string_view>* operator->() const { + return &(this->operator*()); + } + + bool operator==(const BalsaHeaders::iterator_base& it) const { + return idx_ == it.idx_; + } + + bool operator<(const BalsaHeaders::iterator_base& it) const { + return idx_ < it.idx_; + } + + bool operator<=(const BalsaHeaders::iterator_base& it) const { + return idx_ <= it.idx_; + } + + bool operator!=(const BalsaHeaders::iterator_base& it) const { + return !(*this == it); + } + + bool operator>(const BalsaHeaders::iterator_base& it) const { + return it < *this; + } + + bool operator>=(const BalsaHeaders::iterator_base& it) const { + return it <= *this; + } + + // This mainly exists so that we can have interesting output for + // unittesting. The EXPECT_EQ, EXPECT_NE functions require that + // operator<< work for the classes it sees. It would be better if there + // was an additional traits-like system for the gUnit output... but oh + // well. + friend QUICHE_EXPORT_PRIVATE std::ostream& operator<<( + std::ostream& os, const iterator_base& it) { + os << "[" << it.headers_ << ", " << it.idx_ << "]"; + return os; + } + + private: + friend class BalsaHeaders; + + iterator_base(const BalsaHeaders* headers, HeaderLines::size_type index) + : headers_(headers), idx_(index) {} + + void increment() { + const HeaderLines& header_lines = headers_->header_lines_; + const HeaderLines::size_type header_lines_size = header_lines.size(); + const HeaderLines::size_type original_idx = idx_; + do { + ++idx_; + } while (idx_ < header_lines_size && header_lines[idx_].skip == true); + // The condition below exists so that ++(end() - 1) == end(), even + // if there are only 'skip == true' elements between the end() iterator + // and the end of the vector of HeaderLineDescriptions. + // TODO(fenix): refactor this list so that we don't have to do + // linear scanning through skipped headers (and this condition is + // then unnecessary) + if (idx_ == header_lines_size) { + idx_ = original_idx + 1; + } + } + + std::pair<absl::string_view, absl::string_view>& Lookup( + HeaderLines::size_type index) const { + QUICHE_DCHECK_LT(index, headers_->header_lines_.size()); + const HeaderLineDescription& line = headers_->header_lines_[index]; + const char* stream_begin = headers_->GetPtr(line.buffer_base_idx); + value_ = std::make_pair( + absl::string_view(stream_begin + line.first_char_idx, line.KeyLength()), + absl::string_view(stream_begin + line.value_begin_idx, + line.ValuesLength())); + return value_; + } + + const BalsaHeaders* headers_; + HeaderLines::size_type idx_; + mutable std::pair<absl::string_view, absl::string_view> value_; +}; + +// A const iterator for all the header lines. +class BalsaHeaders::const_header_lines_iterator + : public BalsaHeaders::iterator_base { + public: + const_header_lines_iterator() : iterator_base() {} + + const_header_lines_iterator(const const_header_lines_iterator& it) + : iterator_base(it.headers_, it.idx_) {} + + const_header_lines_iterator& operator++() { + iterator_base::increment(); + return *this; + } + + private: + friend class BalsaHeaders; + + const_header_lines_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index) + : iterator_base(headers, index) {} +}; + +// A const iterator that stops only on header lines for a particular key. +class BalsaHeaders::const_header_lines_key_iterator + : public BalsaHeaders::iterator_base { + public: + const_header_lines_key_iterator& operator++() { + do { + iterator_base::increment(); + } while (!AtEnd() && !absl::EqualsIgnoreCase(key_, (**this).first)); + return *this; + } + + // Only forward-iteration makes sense, so no operator-- defined. + + private: + friend class BalsaHeaders; + + const_header_lines_key_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index, + absl::string_view key) + : iterator_base(headers, index), key_(key) {} + + // Should only be used for creating an end iterator. + const_header_lines_key_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index) + : iterator_base(headers, index) {} + + bool AtEnd() const { return *this >= headers_->lines().end(); } + + absl::string_view key_; +}; + +inline absl::iterator_range<BalsaHeaders::const_header_lines_iterator> +BalsaHeaders::lines() const { + return {HeaderLinesBeginHelper<const_header_lines_iterator>(), + HeaderLinesEndHelper<const_header_lines_iterator>()}; +} + +inline absl::iterator_range<BalsaHeaders::const_header_lines_key_iterator> +BalsaHeaders::lines(absl::string_view key) const { + return {GetIteratorForKey(key), header_lines_key_end()}; +} + +inline BalsaHeaders::const_header_lines_key_iterator +BalsaHeaders::header_lines_key_end() const { + return HeaderLinesEndHelper<const_header_lines_key_iterator>(); +} + +inline void BalsaHeaders::erase(const const_header_lines_iterator& it) { + QUICHE_DCHECK_EQ(it.headers_, this); + QUICHE_DCHECK_LT(it.idx_, header_lines_.size()); + QUICHE_DCHECK_GE(it.idx_, 0u); + header_lines_[it.idx_].skip = true; +} + +template <typename Buffer> +void BalsaHeaders::WriteToBuffer(Buffer* buffer, CaseOption case_option, + CoalesceOption coalesce_option) const { + // write the first line. + const absl::string_view firstline = first_line(); + if (!firstline.empty()) { + buffer->WriteString(firstline); + } + buffer->WriteString("\r\n"); + if (coalesce_option != CoalesceOption::kCoalesce) { + const HeaderLines::size_type end = header_lines_.size(); + for (HeaderLines::size_type i = 0; i < end; ++i) { + const HeaderLineDescription& line = header_lines_[i]; + if (line.skip) { + continue; + } + const char* line_ptr = GetPtr(line.buffer_base_idx); + WriteHeaderLineToBuffer( + buffer, + absl::string_view(line_ptr + line.first_char_idx, line.KeyLength()), + absl::string_view(line_ptr + line.value_begin_idx, + line.ValuesLength()), + case_option); + } + } else { + WriteToBufferCoalescingMultivaluedHeaders( + buffer, multivalued_envoy_headers(), case_option); + } +} + +inline void BalsaHeaders::GetValuesOfMultivaluedHeaders( + const MultivaluedHeadersSet& multivalued_headers, + MultivaluedHeadersValuesMap* multivalues) const { + multivalues->reserve(header_lines_.capacity()); + + // Find lines that need to be coalesced and store them in |multivalues|. + for (const auto& line : header_lines_) { + if (line.skip) { + continue; + } + const char* line_ptr = GetPtr(line.buffer_base_idx); + absl::string_view header_key = + absl::string_view(line_ptr + line.first_char_idx, line.KeyLength()); + // If this is multivalued header, it may need to be coalesced. + if (multivalued_headers.contains(header_key)) { + absl::string_view header_value = absl::string_view( + line_ptr + line.value_begin_idx, line.ValuesLength()); + // Add |header_value| to the vector of values for this |header_key|, + // therefore preserving the order of values for the same key. + (*multivalues)[header_key].push_back(header_value); + } + } +} + +template <typename Buffer> +void BalsaHeaders::WriteToBufferCoalescingMultivaluedHeaders( + Buffer* buffer, const MultivaluedHeadersSet& multivalued_headers, + CaseOption case_option) const { + MultivaluedHeadersValuesMap multivalues; + GetValuesOfMultivaluedHeaders(multivalued_headers, &multivalues); + + // Write out header lines while coalescing those that need to be coalesced. + for (const auto& line : header_lines_) { + if (line.skip) { + continue; + } + const char* line_ptr = GetPtr(line.buffer_base_idx); + absl::string_view header_key = + absl::string_view(line_ptr + line.first_char_idx, line.KeyLength()); + auto header_multivalue = multivalues.find(header_key); + // If current line doesn't need to be coalesced (as it is either not + // multivalue, or has just a single value so it equals to current line), + // then just write it out. + if (header_multivalue == multivalues.end() || + header_multivalue->second.size() == 1) { + WriteHeaderLineToBuffer(buffer, header_key, + absl::string_view(line_ptr + line.value_begin_idx, + line.ValuesLength()), + case_option); + } else { + // If this line needs to be coalesced, then write all its values and clear + // them, so the subsequent same header keys will not be written. + if (!header_multivalue->second.empty()) { + WriteHeaderLineValuesToBuffer(buffer, header_key, + header_multivalue->second, case_option); + // Clear the multivalue list as it is already written out, so subsequent + // same header keys will not be written. + header_multivalue->second.clear(); + } + } + } +} + +template <typename IteratorType> +const IteratorType BalsaHeaders::HeaderLinesBeginHelper() const { + if (header_lines_.empty()) { + return IteratorType(this, 0); + } + const HeaderLines::size_type header_lines_size = header_lines_.size(); + for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { + if (header_lines_[i].skip == false) { + return IteratorType(this, i); + } + } + return IteratorType(this, 0); +} + +template <typename IteratorType> +const IteratorType BalsaHeaders::HeaderLinesEndHelper() const { + if (header_lines_.empty()) { + return IteratorType(this, 0); + } + const HeaderLines::size_type header_lines_size = header_lines_.size(); + HeaderLines::size_type i = header_lines_size; + do { + --i; + if (header_lines_[i].skip == false) { + return IteratorType(this, i + 1); + } + } while (i != 0); + return IteratorType(this, 0); +} + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_BALSA_HEADERS_H_
diff --git a/quiche/common/balsa/balsa_visitor_interface.h b/quiche/common/balsa/balsa_visitor_interface.h new file mode 100644 index 0000000..a8fcce3 --- /dev/null +++ b/quiche/common/balsa/balsa_visitor_interface.h
@@ -0,0 +1,172 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_BALSA_VISITOR_INTERFACE_H_ +#define QUICHE_COMMON_BALSA_BALSA_VISITOR_INTERFACE_H_ + +#include <cstddef> + +#include "quiche/common/balsa/balsa_enums.h" +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche { + +class BalsaHeaders; + +// By default the BalsaFrame instantiates a class derived from this interface +// which does absolutely nothing. If you'd prefer to have interesting +// functionality execute when any of the below functions are called by the +// BalsaFrame, then you should subclass it, and set an instantiation of your +// subclass as the current visitor for the BalsaFrame class using +// BalsaFrame::set_visitor(). +class QUICHE_EXPORT_PRIVATE BalsaVisitorInterface { + public: + virtual ~BalsaVisitorInterface() {} + + // Summary: + // This is how the BalsaFrame passes you the raw input which it knows to + // be a part of the body. To be clear, every byte of the Balsa which isn't + // part of the header (or its framing), or trailers will be passed through + // this function. This includes data as well as chunking framing. + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void OnRawBodyInput(const char* input, size_t size) = 0; + + // Summary: + // This is like OnRawBodyInput, but it will only include those parts of + // the body which would be stored by a program such as wget, i.e. the bytes + // indicating chunking will have been removed. Trailers will not be + // passed in through this function-- they'll be passed in through + // OnTrailerInput. + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void OnBodyChunkInput(const char* input, size_t size) = 0; + + // Summary: + // BalsaFrame passes the raw header data through this function. This is + // not cleaned up in any way. + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void OnHeaderInput(const char* input, size_t size) = 0; + + // Summary: + // BalsaFrame passes the raw trailer data through this function. This is + // not cleaned up in any way. Note that trailers only occur in a message + // if there was a chunked encoding, and not always then. + // + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void OnTrailerInput(const char* input, size_t size) = 0; + + // Summary: + // Since the BalsaFrame already has to parse the headers in order to + // determine proper framing, it might as well pass the parsed and + // cleaned-up results to whatever might need it. This function exists for + // that purpose-- parsed headers are passed into this function. + // Arguments: + // headers - contains the parsed headers in the order in which + // they occurred in the header. + virtual void ProcessHeaders(const BalsaHeaders& headers) = 0; + + // Summary: + // Since the BalsaFrame already has to parse the trailer, it might as well + // pass the parsed and cleaned-up results to whatever might need it. + // This function exists for that purpose-- parsed trailer is passed into + // this function. This will not be called if the trailer_ object is + // not set in the framer, even if trailer exists in request/response. + // Arguments: + // trailer - contains the parsed headers in the order in which + // they occurred in the trailer. + virtual void ProcessTrailers(const BalsaHeaders& trailer) = 0; + + // Summary: + // Called when the first line of the message is parsed, in this case, for a + // request. + // Arguments: + // line_input - pointer to the beginning of the first line string. + // line_length - length of the first line string. (i.e. the numer of + // bytes it is safe to read from line_ptr) + // method_input - pointer to the beginning of the method string + // method_length - length of the method string (i.e. the number + // of bytes it is safe to read from method_input) + // request_uri_input - pointer to the beginning of the request uri + // string. + // request_uri_length - length of the method string (i.e. the number + // of bytes it is safe to read from method_input) + // version_input - pointer to the beginning of the version string. + // version_length - length of the version string (i.e. the number + // of bytes it i ssafe to read from version_input) + virtual void OnRequestFirstLineInput( + const char* line_input, size_t line_length, const char* method_input, + size_t method_length, const char* request_uri_input, + size_t request_uri_length, const char* version_input, + size_t version_length) = 0; + + // Summary: + // Called when the first line of the message is parsed, in this case, for a + // response. + // Arguments: + // line_input - pointer to the beginning of the first line string. + // line_length - length of the first line string. (i.e. the numer of + // bytes it is safe to read from line_ptr) + // version_input - pointer to the beginning of the version string. + // version_length - length of the version string (i.e. the number + // of bytes it i ssafe to read from version_input) + // status_input - pointer to the beginning of the status string + // status_length - length of the status string (i.e. the number + // of bytes it is safe to read from status_input) + // reason_input - pointer to the beginning of the reason string + // reason_length - length of the reason string (i.e. the number + // of bytes it is safe to read from reason_input) + virtual void OnResponseFirstLineInput( + const char* line_input, size_t line_length, const char* version_input, + size_t version_length, const char* status_input, size_t status_length, + const char* reason_input, size_t reason_length) = 0; + + // Called when a chunk length is parsed. + // Arguments: + // chunk length - the length of the next incoming chunk. + virtual void OnChunkLength(size_t chunk_length) = 0; + + // Summary: + // BalsaFrame passes the raw chunk extension data through this function. + // The data is not cleaned up at all. + // + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void OnChunkExtensionInput(const char* input, size_t size) = 0; + + // Summary: + // Called when the header is framed and processed. + virtual void HeaderDone() = 0; + + // Summary: + // Called when the 100 Continue headers are framed and processed. + virtual void ContinueHeaderDone() = 0; + + // Summary: + // Called when the message is framed and processed. + virtual void MessageDone() = 0; + + // Summary: + // Called when an error is detected + // Arguments: + // error_code - the error which is to be reported + virtual void HandleError(BalsaFrameEnums::ErrorCode error_code) = 0; + + // Summary: + // Called when something meriting a warning is detected + // Arguments: + // error_code - the warning which is to be reported + virtual void HandleWarning(BalsaFrameEnums::ErrorCode error_code) = 0; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_BALSA_VISITOR_INTERFACE_H_
diff --git a/quiche/common/balsa/framer_interface.h b/quiche/common/balsa/framer_interface.h new file mode 100644 index 0000000..68ee3ca --- /dev/null +++ b/quiche/common/balsa/framer_interface.h
@@ -0,0 +1,24 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_FRAMER_INTERFACE_H_ +#define QUICHE_COMMON_BALSA_FRAMER_INTERFACE_H_ + +#include <cstddef> + +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche { + +// A minimal interface supported by BalsaFrame and other framer types. For use +// in HttpReader. +class QUICHE_EXPORT_PRIVATE FramerInterface { + public: + virtual ~FramerInterface() {} + virtual size_t ProcessInput(const char* input, size_t length) = 0; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_FRAMER_INTERFACE_H_
diff --git a/quiche/common/balsa/header_api.h b/quiche/common/balsa/header_api.h new file mode 100644 index 0000000..c8ec740 --- /dev/null +++ b/quiche/common/balsa/header_api.h
@@ -0,0 +1,274 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_HEADER_API_H_ +#define QUICHE_COMMON_BALSA_HEADER_API_H_ + +#include <cstddef> +#include <functional> +#include <string> +#include <vector> + +#include "absl/strings/string_view.h" +#include "quiche/common/platform/api/quiche_export.h" +#include "quiche/common/platform/api/quiche_lower_case_string.h" + +namespace quiche { + +// An API so we can reuse functions for BalsaHeaders and Envoy's HeaderMap. +// Contains only const member functions, so it can wrap const HeaderMaps; +// non-const functions are in HeaderApi. +// +// Depending on the implementation, the headers may act like HTTP/1 headers +// (BalsaHeaders) or HTTP/2 headers (HeaderMap). For HTTP-version-specific +// headers or pseudoheaders like "host" or ":authority", use this API's +// implementation-independent member functions, like Authority(). Looking those +// headers up by name is deprecated and may QUICHE_DCHECK-fail. +// For the differences between HTTP/1 and HTTP/2 headers, see RFC 7540: +// https://tools.ietf.org/html/rfc7540#section-8.1.2 +// +// Operations on header keys are case-insensitive while operations on header +// values are case-sensitive. +// +// Some methods have overloads which accept Envoy-style LowerCaseStrings. Often +// these keys are accessible from Envoy::Http::Headers::get().SomeHeader, +// already lowercaseified. It's faster to avoid converting them to and from +// lowercase. Additionally, some implementations of ConstHeaderApi might take +// advantage of a constant-time lookup for inlined headers. +class QUICHE_EXPORT_PRIVATE ConstHeaderApi { + public: + virtual ~ConstHeaderApi() {} + + // Determine whether the headers are empty. + virtual bool IsEmpty() const = 0; + + // Returns the header entry for the first instance with key |key| + // If header isn't present, returns absl::string_view(). + virtual absl::string_view GetHeader(absl::string_view key) const = 0; + + virtual absl::string_view GetHeader(const QuicheLowerCaseString& key) const { + // Default impl for BalsaHeaders, etc. + return GetHeader(key.get()); + } + + // Collects all of the header entries with key |key| and returns them in |out| + // Headers are returned in the order they are inserted. + virtual void GetAllOfHeader(absl::string_view key, + std::vector<absl::string_view>* out) const = 0; + virtual std::vector<absl::string_view> GetAllOfHeader( + absl::string_view key) const { + std::vector<absl::string_view> out; + GetAllOfHeader(key, &out); + return out; + } + virtual void GetAllOfHeader(const QuicheLowerCaseString& key, + std::vector<absl::string_view>* out) const { + return GetAllOfHeader(key.get(), out); + } + + // Determine if a given header is present. + virtual bool HasHeader(absl::string_view key) const = 0; + + // Determines if a given header is present with non-empty value. + virtual bool HasNonEmptyHeader(absl::string_view key) const = 0; + + // Goes through all headers with key |key| and checks to see if one of the + // values is |value|. Returns true if there are headers with the desired key + // and value, false otherwise. + virtual bool HeaderHasValue(absl::string_view key, + absl::string_view value) const = 0; + + // Same as above, but value is treated as case insensitive. + virtual bool HeaderHasValueIgnoreCase(absl::string_view key, + absl::string_view value) const = 0; + + // Joins all values for header entries with `key` into a comma-separated + // string. Headers are returned in the order they are inserted. + virtual std::string GetAllOfHeaderAsString(absl::string_view key) const = 0; + virtual std::string GetAllOfHeaderAsString( + const QuicheLowerCaseString& key) const { + return GetAllOfHeaderAsString(key.get()); + } + + // Returns true if we have at least one header with given prefix + // [case insensitive]. Currently for test use only. + virtual bool HasHeadersWithPrefix(absl::string_view key) const = 0; + + // Returns the key value pairs for all headers where the header key begins + // with the specified prefix. + // Headers are returned in the order they are inserted. + virtual void GetAllOfHeaderWithPrefix( + absl::string_view prefix, + std::vector<std::pair<absl::string_view, absl::string_view>>* out) + const = 0; + + // Returns the key value pairs for all headers in this object. If 'limit' is + // >= 0, return at most 'limit' headers. + virtual void GetAllHeadersWithLimit( + std::vector<std::pair<absl::string_view, absl::string_view>>* out, + int limit) const = 0; + + // Returns a textual representation of the header object. The format of the + // string may depend on the underlying implementation. + virtual std::string DebugString() const = 0; + + // Applies the argument function to each header line. If the argument + // function returns false, iteration stops and ForEachHeader returns false; + // otherwise, ForEachHeader returns true. + virtual bool ForEachHeader(std::function<bool(const absl::string_view key, + const absl::string_view value)> + fn) const = 0; + + // Returns the upper bound byte size of the headers. This can be used to size + // a Buffer when serializing headers. + virtual size_t GetSizeForWriteBuffer() const = 0; + + // Returns the response code for response headers. If no status code exists, + // the return value is implementation-specific. + virtual absl::string_view response_code() const = 0; + + // Returns the response code for response headers or 0 if no status code + // exists. + virtual size_t parsed_response_code() const = 0; + + // Returns the response reason phrase; the stored one for HTTP/1 headers, or a + // phrase determined from the response code for HTTP/2 headers.. + virtual absl::string_view response_reason_phrase() const = 0; + + // Return the HTTP first line of this request, generally of the format: + // GET /path/ HTTP/1.1 + // TODO(b/110421449): deprecate this method. + virtual std::string first_line_of_request() const = 0; + + // Return the method for this request, such as GET or POST. + virtual absl::string_view request_method() const = 0; + + // Return the request URI from the first line of this request, such as + // "/path/". + virtual absl::string_view request_uri() const = 0; + + // Return the version portion of the first line of this request, such as + // "HTTP/1.1". + // TODO(b/110421449): deprecate this method. + virtual absl::string_view request_version() const = 0; + + virtual absl::string_view response_version() const = 0; + + // Returns the authority portion of a request, or an empty string if missing. + // This is the value of the host header for HTTP/1 headers and the value of + // the :authority pseudo-header for HTTP/2 headers. + virtual absl::string_view Authority() const = 0; + + // Call the provided function on the cookie, avoiding + // copies if possible. The cookie is the value of the Cookie header; for + // HTTP/2 headers, if there are multiple Cookie headers, they will be joined + // by "; ", per go/rfc/7540#section-8.1.2.5. If there is no Cookie header, + // cookie.data() will be nullptr. The lifetime of the cookie isn't guaranteed + // to extend beyond this call. + virtual void ApplyToCookie( + std::function<void(absl::string_view cookie)> f) const = 0; + + virtual size_t content_length() const = 0; + virtual bool content_length_valid() const = 0; + + // TODO(b/118501626): Add functions for working with other headers and + // pseudo-headers whose presence or value depends on HTTP version, including: + // :method, :scheme, :path, connection, and cookie. +}; + +// An API so we can reuse functions for BalsaHeaders and Envoy's HeaderMap. +// Inherits const functions from ConstHeaderApi and adds non-const functions, +// for use with non-const HeaderMaps. +// +// For HTTP-version-specific headers and pseudo-headers, the same caveats apply +// as with ConstHeaderApi. +// +// Operations on header keys are case-insensitive while operations on header +// values are case-sensitive. +class QUICHE_EXPORT_PRIVATE HeaderApi : public virtual ConstHeaderApi { + public: + // Replaces header entries with key |key| if they exist, or appends + // a new header if none exist. + virtual void ReplaceOrAppendHeader(absl::string_view key, + absl::string_view value) = 0; + + // Removes all headers in given set of |keys| at once + virtual void RemoveAllOfHeaderInList( + const std::vector<absl::string_view>& keys) = 0; + + // Removes all headers with key |key|. + virtual void RemoveAllOfHeader(absl::string_view key) = 0; + + // Append a new header entry to the header object with key |key| and value + // |value|. + virtual void AppendHeader(absl::string_view key, absl::string_view value) = 0; + + // Removes all headers starting with 'key' [case insensitive] + virtual void RemoveAllHeadersWithPrefix(absl::string_view key) = 0; + + // Appends ',value' to an existing header named 'key'. If no header with the + // correct key exists, it will call AppendHeader(key, value). Calling this + // function on a key which exists several times in the headers will produce + // unpredictable results. + virtual void AppendToHeader(absl::string_view key, + absl::string_view value) = 0; + + // Appends ', value' to an existing header named 'key'. If no header with the + // correct key exists, it will call AppendHeader(key, value). Calling this + // function on a key which exists several times in the headers will produce + // unpredictable results. + virtual void AppendToHeaderWithCommaAndSpace(absl::string_view key, + absl::string_view value) = 0; + + // Set the header or pseudo-header corresponding to the authority portion of a + // request: host for HTTP/1 headers, or :authority for HTTP/2 headers. + virtual void ReplaceOrAppendAuthority(absl::string_view value) = 0; + virtual void RemoveAuthority() = 0; + + // These set portions of the first line for HTTP/1 headers, or the + // corresponding pseudo-headers for HTTP/2 headers. + virtual void SetRequestMethod(absl::string_view method) = 0; + virtual void SetResponseCode(absl::string_view code) = 0; + // As SetResponseCode, but slightly faster for BalsaHeaders if the caller + // represents the response code as an integer and not a string. + virtual void SetParsedResponseCodeAndUpdateFirstline( + size_t parsed_response_code) = 0; + + // Sets the request URI. + // + // For HTTP/1 headers, sets the request URI portion of the first line (the + // second token). Doesn't parse the URI; leaves the Host header unchanged. + // + // For HTTP/2 headers, sets the :path pseudo-header, and also :scheme and + // :authority if they're present in the URI; otherwise, leaves :scheme and + // :authority unchanged. + // + // The caller is responsible for verifying that the URI is in a valid format. + virtual void SetRequestUri(absl::string_view uri) = 0; + + // These are only meaningful for HTTP/1 headers; for HTTP/2 headers, they do + // nothing. + virtual void SetRequestVersion(absl::string_view version) = 0; + virtual void SetResponseVersion(absl::string_view version) = 0; + virtual void SetResponseReasonPhrase(absl::string_view reason_phrase) = 0; + + // SetContentLength, SetTransferEncodingToChunkedAndClearContentLength, and + // SetNoTransferEncoding modifies the header object to use + // content-length and transfer-encoding headers in a consistent + // manner. They set all internal flags and status, if applicable, so client + // can get a consistent view from various accessors. + virtual void SetContentLength(size_t length) = 0; + // Sets transfer-encoding to chunked and updates internal state. + virtual void SetTransferEncodingToChunkedAndClearContentLength() = 0; + // Removes transfer-encoding headers and updates internal state. + virtual void SetNoTransferEncoding() = 0; + + // If true, QUICHE_BUG if a header that starts with an invalid prefix is + // explicitly set. Not implemented for Envoy headers; can only be set false. + virtual void set_enforce_header_policy(bool enforce) = 0; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_HEADER_API_H_
diff --git a/quiche/common/balsa/header_properties.cc b/quiche/common/balsa/header_properties.cc new file mode 100644 index 0000000..415bc4d --- /dev/null +++ b/quiche/common/balsa/header_properties.cc
@@ -0,0 +1,95 @@ +#include "quiche/common/balsa/header_properties.h" + +#include <array> + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/string_view.h" +#include "quiche/common/quiche_text_utils.h" + +namespace quiche::header_properties { + +namespace { + +using MultivaluedHeadersSet = + absl::flat_hash_set<absl::string_view, StringPieceCaseHash, + StringPieceCaseEqual>; + +MultivaluedHeadersSet* buildMultivaluedHeaders() { + return new MultivaluedHeadersSet({ + "accept", + "accept-charset", + "accept-encoding", + "accept-language", + "accept-ranges", + // The follow four headers are all CORS standard headers + "access-control-allow-headers", + "access-control-allow-methods", + "access-control-expose-headers", + "access-control-request-headers", + "allow", + "cache-control", + // IETF draft makes this have cache-control syntax + "cdn-cache-control", + "connection", + "content-encoding", + "content-language", + "expect", + "if-match", + "if-none-match", + // See RFC 5988 section 5 + "link", + "pragma", + "proxy-authenticate", + "te", + // Used in the opening handshake of the WebSocket protocol. + "sec-websocket-extensions", + // Not mentioned in RFC 2616, but it can have multiple values. + "set-cookie", + "trailer", + "transfer-encoding", + "upgrade", + "vary", + "via", + "warning", + "www-authenticate", + // De facto standard not in the RFCs + "x-forwarded-for", + // Internal Google usage gives this cache-control syntax + "x-go" /**/ "ogle-cache-control", + }); +} + +std::array<bool, 256> buildInvalidCharLookupTable() { + std::array<bool, 256> invalidCharTable; + invalidCharTable.fill(false); + for (char c : kInvalidHeaderCharList) { + invalidCharTable[c] = true; + } + return invalidCharTable; +} + +} // anonymous namespace + +bool IsMultivaluedHeader(absl::string_view header) { + static const MultivaluedHeadersSet* const multivalued_headers = + buildMultivaluedHeaders(); + return multivalued_headers->contains(header); +} + +bool IsInvalidHeaderChar(char c) { + static const std::array<bool, 256> invalidCharTable = + buildInvalidCharLookupTable(); + + return invalidCharTable[c]; +} + +bool HasInvalidHeaderChars(absl::string_view value) { + for (const char c : value) { + if (IsInvalidHeaderChar(c)) { + return true; + } + } + return false; +} + +} // namespace quiche::header_properties
diff --git a/quiche/common/balsa/header_properties.h b/quiche/common/balsa/header_properties.h new file mode 100644 index 0000000..a747fab --- /dev/null +++ b/quiche/common/balsa/header_properties.h
@@ -0,0 +1,35 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_HEADER_PROPERTIES_H_ +#define QUICHE_COMMON_BALSA_HEADER_PROPERTIES_H_ + +#include "absl/strings/string_view.h" +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche::header_properties { + +// Returns true if RFC 2616 Section 14 (or other relevant standards or +// practices) indicates that header can have multiple values. Note that nothing +// stops clients from sending multiple values of other headers, so this may not +// be perfectly reliable in practice. +QUICHE_EXPORT_PRIVATE bool IsMultivaluedHeader(absl::string_view header); + +// An array of characters that are invalid in HTTP header field values, +// according to RFC 7230 Section 3.2. Valid low characters not in this array +// are \t (0x09), \n (0x0A), and \r (0x0D). +// Note that HTTP header field names are even more restrictive. +inline constexpr char kInvalidHeaderCharList[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, + 0x0C, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, + 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x7F}; + +// Returns true if the given `c` is invalid in a header field or the `value` has +// invalid characters. +QUICHE_EXPORT_PRIVATE bool IsInvalidHeaderChar(char c); +QUICHE_EXPORT_PRIVATE bool HasInvalidHeaderChars(absl::string_view value); + +} // namespace quiche::header_properties + +#endif // QUICHE_COMMON_BALSA_HEADER_PROPERTIES_H_
diff --git a/quiche/common/balsa/header_properties_test.cc b/quiche/common/balsa/header_properties_test.cc new file mode 100644 index 0000000..ffb6b83 --- /dev/null +++ b/quiche/common/balsa/header_properties_test.cc
@@ -0,0 +1,49 @@ +#include "quiche/common/balsa/header_properties.h" + +#include "quiche/common/platform/api/quiche_test.h" + +namespace quiche::header_properties::test { +namespace { + +TEST(HeaderPropertiesTest, IsMultivaluedHeaderIsCaseInsensitive) { + EXPECT_TRUE(IsMultivaluedHeader("content-encoding")); + EXPECT_TRUE(IsMultivaluedHeader("Content-Encoding")); + EXPECT_TRUE(IsMultivaluedHeader("set-cookie")); + EXPECT_TRUE(IsMultivaluedHeader("sEt-cOOkie")); + EXPECT_TRUE(IsMultivaluedHeader("X-Goo" /**/ "gle-Cache-Control")); + EXPECT_TRUE(IsMultivaluedHeader("access-control-expose-HEADERS")); + + EXPECT_FALSE(IsMultivaluedHeader("set-cook")); + EXPECT_FALSE(IsMultivaluedHeader("content-length")); + EXPECT_FALSE(IsMultivaluedHeader("Content-Length")); +} + +TEST(HeaderPropertiesTest, IsInvalidHeaderChar) { + EXPECT_TRUE(IsInvalidHeaderChar(0x00)); + EXPECT_TRUE(IsInvalidHeaderChar(0x06)); + EXPECT_TRUE(IsInvalidHeaderChar(0x1F)); + EXPECT_TRUE(IsInvalidHeaderChar(0x7F)); + + EXPECT_FALSE(IsInvalidHeaderChar(' ')); + EXPECT_FALSE(IsInvalidHeaderChar('\t')); + EXPECT_FALSE(IsInvalidHeaderChar('\r')); + EXPECT_FALSE(IsInvalidHeaderChar('\n')); + EXPECT_FALSE(IsInvalidHeaderChar(0x42)); +} + +TEST(HeaderPropertiesTest, HasInvalidHeaderChars) { + const char with_null[] = "Here's l\x00king at you, kid"; + EXPECT_TRUE(HasInvalidHeaderChars(std::string(with_null, sizeof(with_null)))); + EXPECT_TRUE(HasInvalidHeaderChars("Why's \x06 afraid of \x07? \x07\x08\x09")); + EXPECT_TRUE(HasInvalidHeaderChars("\x1Flower power")); + EXPECT_TRUE(HasInvalidHeaderChars("\x7Flowers more powers")); + + EXPECT_FALSE(HasInvalidHeaderChars("Plenty of space")); + EXPECT_FALSE(HasInvalidHeaderChars("Keeping \tabs")); + EXPECT_FALSE(HasInvalidHeaderChars("Al\right")); + EXPECT_FALSE(HasInvalidHeaderChars("\new day")); + EXPECT_FALSE(HasInvalidHeaderChars("\x42 is a nice character")); +} + +} // namespace +} // namespace quiche::header_properties::test
diff --git a/quiche/common/balsa/http_validation_policy.cc b/quiche/common/balsa/http_validation_policy.cc new file mode 100644 index 0000000..b0418ec --- /dev/null +++ b/quiche/common/balsa/http_validation_policy.cc
@@ -0,0 +1,30 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/http_validation_policy.h" + +#include <tuple> + +#include "quiche/common/platform/api/quiche_logging.h" + +namespace quiche { + +HttpValidationPolicy::HttpValidationPolicy(bool enforce_header_keys, + bool enforce_all) + : enforce_header_keys_(enforce_header_keys), enforce_all_(enforce_all) { + if (enforce_all_) { + QUICHE_DCHECK(enforce_header_keys_); + } +} + +HttpValidationPolicy HttpValidationPolicy::CreateDefault() { + return HttpValidationPolicy(true, false); +} + +bool HttpValidationPolicy::operator==(const HttpValidationPolicy& other) const { + return std::tie(enforce_header_keys_, enforce_all_) == + std::tie(other.enforce_header_keys_, other.enforce_all_); +} + +} // namespace quiche
diff --git a/quiche/common/balsa/http_validation_policy.h b/quiche/common/balsa/http_validation_policy.h new file mode 100644 index 0000000..3b75ec7 --- /dev/null +++ b/quiche/common/balsa/http_validation_policy.h
@@ -0,0 +1,62 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_HTTP_VALIDATION_POLICY_H_ +#define QUICHE_COMMON_BALSA_HTTP_VALIDATION_POLICY_H_ + +#include <ostream> + +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche { + +// An HttpValidationPolicy captures policy choices affecting parsing of HTTP +// requests. It offers individual Boolean member functions to be consulted +// during the parsing of an HTTP request. +class QUICHE_EXPORT_PRIVATE HttpValidationPolicy { + public: + HttpValidationPolicy(bool enforce_header_keys, bool enforce_all); + + static HttpValidationPolicy CreateDefault(); + + // A header name has to be drawn from a set of allowed characters. + bool enforce_header_characters() const { return enforce_header_keys_; } + + // https://tools.ietf.org/html/rfc7230#section-3.2.4 deprecates "folding" + // of long header lines onto continuation lines. + bool disallow_header_continuation_lines() const { return enforce_all_; } + + // A valid header line requires a header name and a colon. + bool require_header_colon() const { return enforce_all_; } + + // https://tools.ietf.org/html/rfc7230#section-3.3.2 disallows multiple + // Content-Length header fields with the same value. + bool disallow_multiple_content_length() const { return enforce_all_; } + + // https://tools.ietf.org/html/rfc7230#section-3.3.2 disallows + // Transfer-Encoding and Content-Length header fields together. + bool disallow_transfer_encoding_with_content_length() const { + return enforce_all_; + } + + bool operator==(const HttpValidationPolicy& other) const; + + friend QUICHE_EXPORT_PRIVATE std::ostream& operator<<( + std::ostream& os, const HttpValidationPolicy& policy) { + os << "HttpValidationPolicy(enforce_header_keys_=" + << policy.enforce_header_keys_ + << ", enforce_all_=" << policy.enforce_all_ << ")"; + return os; + } + + private: + // Enforce more standard-compliant parsing of HTTP headers. + bool enforce_header_keys_; + // Enforce "everything": set for strictest possible parsing. + bool enforce_all_; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_HTTP_VALIDATION_POLICY_H_
diff --git a/quiche/common/balsa/noop_balsa_visitor.h b/quiche/common/balsa/noop_balsa_visitor.h new file mode 100644 index 0000000..b107dbd --- /dev/null +++ b/quiche/common/balsa/noop_balsa_visitor.h
@@ -0,0 +1,57 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_NOOP_BALSA_VISITOR_H_ +#define QUICHE_COMMON_BALSA_NOOP_BALSA_VISITOR_H_ + +#include <cstddef> + +#include "quiche/common/balsa/balsa_visitor_interface.h" +#include "quiche/common/platform/api/quiche_export.h" + +namespace quiche { + +class BalsaHeaders; + +// Provides empty BalsaVisitorInterface overrides for convenience. +// Intended to be used as a base class for BalsaVisitorInterface subclasses that +// only need to override a small number of methods. +class QUICHE_EXPORT_PRIVATE NoOpBalsaVisitor : public BalsaVisitorInterface { + public: + NoOpBalsaVisitor() = default; + + NoOpBalsaVisitor(const NoOpBalsaVisitor&) = delete; + NoOpBalsaVisitor& operator=(const NoOpBalsaVisitor&) = delete; + + ~NoOpBalsaVisitor() override {} + + void OnRawBodyInput(const char* /*input*/, size_t /*size*/) override {} + void OnBodyChunkInput(const char* /*input*/, size_t /*size*/) override {} + void OnHeaderInput(const char* /*input*/, size_t /*size*/) override {} + void OnTrailerInput(const char* /*input*/, size_t /*size*/) override {} + void ProcessHeaders(const BalsaHeaders& /*headers*/) override {} + void ProcessTrailers(const BalsaHeaders& /*trailer*/) override {} + + void OnRequestFirstLineInput( + const char* /*line_input*/, size_t /*line_length*/, + const char* /*method_input*/, size_t /*method_length*/, + const char* /*request_uri_input*/, size_t /*request_uri_length*/, + const char* /*version_input*/, size_t /*version_length*/) override {} + void OnResponseFirstLineInput( + const char* /*line_input*/, size_t /*line_length*/, + const char* /*version_input*/, size_t /*version_length*/, + const char* /*status_input*/, size_t /*status_length*/, + const char* /*reason_input*/, size_t /*reason_length*/) override {} + void OnChunkLength(size_t /*chunk_length*/) override {} + void OnChunkExtensionInput(const char* /*input*/, size_t /*size*/) override {} + void ContinueHeaderDone() override {} + void HeaderDone() override {} + void MessageDone() override {} + void HandleError(BalsaFrameEnums::ErrorCode /*error_code*/) override {} + void HandleWarning(BalsaFrameEnums::ErrorCode /*error_code*/) override {} +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_NOOP_BALSA_VISITOR_H_
diff --git a/quiche/common/balsa/simple_buffer.cc b/quiche/common/balsa/simple_buffer.cc new file mode 100644 index 0000000..e441422 --- /dev/null +++ b/quiche/common/balsa/simple_buffer.cc
@@ -0,0 +1,159 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/simple_buffer.h" + +#include <cstring> +#include <memory> + +#include "quiche/common/platform/api/quiche_bug_tracker.h" +#include "quiche/common/platform/api/quiche_logging.h" + +namespace quiche { + +static const int kInitialSimpleBufferSize = 10; + +SimpleBuffer::SimpleBuffer() + : storage_(new char[kInitialSimpleBufferSize]), + write_idx_(0), + read_idx_(0), + storage_size_(kInitialSimpleBufferSize) {} + +SimpleBuffer::SimpleBuffer(int size) + : write_idx_(0), read_idx_(0), storage_size_(size) { + // Callers may try to allocate overly large blocks, but negative sizes are + // obviously wrong. + QUICHE_CHECK_GE(size, 0); + storage_ = new char[size]; +} + +//////////////////////////////////////////////////////////////////////////////// + +int SimpleBuffer::Write(const char* bytes, int size) { + if (size < 0) { + QUICHE_BUG(simple_buffer_write_negative_size) + << "size must not be negative: " << size; + return 0; + } + + bool has_room = ((storage_size_ - write_idx_) >= size); + if (!has_room) { + Reserve(size); + } + memcpy(storage_ + write_idx_, bytes, size); + AdvanceWritablePtr(size); + return size; +} + +//////////////////////////////////////////////////////////////////////////////// + +int SimpleBuffer::Read(char* bytes, int size) { + if (size < 0) { + QUICHE_BUG(simple_buffer_read_negative_size) + << "size must not be negative: " << size; + return 0; + } + + char* read_ptr = nullptr; + int read_size = 0; + GetReadablePtr(&read_ptr, &read_size); + if (read_size > size) { + read_size = size; + } + memcpy(bytes, read_ptr, read_size); + AdvanceReadablePtr(read_size); + return read_size; +} + +//////////////////////////////////////////////////////////////////////////////// + +// Attempts to reserve a contiguous block of buffer space either by reclaiming +// consumed data or by allocating a larger buffer. +void SimpleBuffer::Reserve(int size) { + if (size < 0) { + QUICHE_BUG(simple_buffer_reserve_negative_size) + << "size must not be negative: " << size; + return; + } + + if (size == 0 || storage_size_ - write_idx_ >= size) { + return; + } + + char* read_ptr = nullptr; + int read_size = 0; + GetReadablePtr(&read_ptr, &read_size); + + if (read_size + size <= storage_size_) { + // Can reclaim space from consumed bytes by shifting. + memmove(storage_, read_ptr, read_size); + read_idx_ = 0; + write_idx_ = read_size; + return; + } + + // The new buffer needs to be at least `read_size + size` bytes. + // At least double the buffer to amortize allocation costs. + int new_storage_size = 2 * storage_size_; + if (new_storage_size < size + read_size) { + new_storage_size = size + read_size; + } + + char* new_storage = new char[new_storage_size]; + memcpy(new_storage, read_ptr, read_size); + delete[] storage_; + + read_idx_ = 0; + write_idx_ = read_size; + storage_ = new_storage; + storage_size_ = new_storage_size; +} + +void SimpleBuffer::AdvanceReadablePtr(int amount_to_advance) { + if (amount_to_advance < 0) { + QUICHE_BUG(simple_buffer_advance_read_negative_arg) + << "amount_to_advance must not be negative: " << amount_to_advance; + return; + } + + read_idx_ += amount_to_advance; + if (read_idx_ > write_idx_) { + QUICHE_BUG(simple_buffer_read_ptr_too_far) + << "error: readable pointer advanced beyond writable one"; + read_idx_ = write_idx_; + } + + if (read_idx_ == write_idx_) { + // Buffer is empty, rewind `read_idx_` and `write_idx_` so that next write + // happens at the beginning of buffer instead of cutting free space in two. + Clear(); + } +} + +void SimpleBuffer::AdvanceWritablePtr(int amount_to_advance) { + if (amount_to_advance < 0) { + QUICHE_BUG(simple_buffer_advance_write_negative_arg) + << "amount_to_advance must not be negative: " << amount_to_advance; + return; + } + + write_idx_ += amount_to_advance; + if (write_idx_ > storage_size_) { + QUICHE_BUG(simple_buffer_write_ptr_too_far) + << "error: writable pointer advanced beyond end of storage"; + write_idx_ = storage_size_; + } +} + +QuicheMemSlice SimpleBuffer::ReleaseAsSlice() { + if (write_idx_ == 0) { + return QuicheMemSlice(); + } + QuicheMemSlice slice(std::unique_ptr<char[]>(storage_), write_idx_); + Clear(); + storage_ = nullptr; + storage_size_ = 0; + return slice; +} +} // namespace quiche
diff --git a/quiche/common/balsa/simple_buffer.h b/quiche/common/balsa/simple_buffer.h new file mode 100644 index 0000000..52f3400 --- /dev/null +++ b/quiche/common/balsa/simple_buffer.h
@@ -0,0 +1,116 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_SIMPLE_BUFFER_H_ +#define QUICHE_COMMON_BALSA_SIMPLE_BUFFER_H_ + +#include "absl/strings/string_view.h" +#include "quiche/common/platform/api/quiche_export.h" +#include "quiche/common/platform/api/quiche_mem_slice.h" + +namespace quiche { + +namespace test { +class SimpleBufferTest; +} // namespace test + +// SimpleBuffer stores data in a contiguous region. It can grow on demand, +// which involves moving its data. It keeps track of a read and a write +// position. Reading consumes data. +class QUICHE_EXPORT_PRIVATE SimpleBuffer { + public: + SimpleBuffer(); + // Create SimpleBuffer with `size` reserved capacity. + explicit SimpleBuffer(int size); + + SimpleBuffer(const SimpleBuffer&) = delete; + SimpleBuffer& operator=(const SimpleBuffer&) = delete; + + virtual ~SimpleBuffer() { delete[] storage_; } + + // Returns the number of bytes that can be read from the buffer. + int ReadableBytes() const { return write_idx_ - read_idx_; } + + bool Empty() const { return read_idx_ == write_idx_; } + + // Copies `size` bytes to the buffer. Returns size. + int Write(const char* bytes, int size); + int WriteString(absl::string_view piece) { + return Write(piece.data(), piece.size()); + } + + // Gets a pointer into the buffer that can be written to. Stores the number + // of characters which are allowed to be written in `*size`. The pointer and + // size can be used in functions like recv() or read(). If `*size` is zero + // upon returning from this function, then it is unsafe to dereference `*ptr`. + // Writing to this region after calling any other non-const method results in + // undefined behavior. + void GetWritablePtr(char** ptr, int* size) const { + *ptr = storage_ + write_idx_; + *size = storage_size_ - write_idx_; + } + + // Gets a pointer that can be read from. This pointer (and size) can be used + // in functions like send() or write(). If `*size` is zero upon returning + // from this function, then it is unsafe to dereference `*ptr`. Reading from + // this region after calling any other non-const method results in undefined + // behavior. + void GetReadablePtr(char** ptr, int* size) const { + *ptr = storage_ + read_idx_; + *size = write_idx_ - read_idx_; + } + + // Returns the readable region as a string_view. Reading from this region + // after calling any other non-const method results in undefined behavior. + absl::string_view GetReadableRegion() const { + return absl::string_view(storage_ + read_idx_, write_idx_ - read_idx_); + } + + // Reads bytes out of the buffer, and writes them into `bytes`. Returns the + // number of bytes read. Consumes bytes from the buffer. + int Read(char* bytes, int size); + + // Marks all data consumed, making the entire reserved buffer available for + // write. Does not resize or free up any memory. + void Clear() { read_idx_ = write_idx_ = 0; } + + // Makes sure at least `size` bytes can be written into the buffer. This can + // be an expensive operation: costing a new and a delete, and copying of all + // existing data. Even if the existing buffer does not need to be resized, + // unread data may need to be moved to consolidate fragmented free space. + void Reserve(int size); + + // Marks the oldest `amount_to_advance` bytes as consumed. + // `amount_to_advance` must not be negative and it must not exceed + // ReadableBytes(). + void AdvanceReadablePtr(int amount_to_advance); + + // Marks the first `amount_to_advance` bytes of the writable area written. + // `amount_to_advance` must not be negative and it must not exceed the size of + // the writable area, returned as the `size` outparam of GetWritablePtr(). + void AdvanceWritablePtr(int amount_to_advance); + + // Releases the current contents of the SimpleBuffer and returns them as a + // MemSlice. Logically, has the same effect as calling Clear(). + QuicheMemSlice ReleaseAsSlice(); + + private: + friend class test::SimpleBufferTest; + + // The buffer owned by this class starts at `*storage_` and is `storage_size_` + // bytes long. + // `0 <= read_idx_ <= write_idx_ <= storage_size_` must always hold. + // If `read_idx_ == write_idx_`, then they must be equal to zero. + // The first `read_idx_` bytes of the buffer are consumed, + // the next `write_idx_ - read_idx_` bytes are the readable region, and the + // remaining `storage_size_ - write_idx_` bytes are the writable region. + char* storage_; + int write_idx_; + int read_idx_; + int storage_size_; +}; + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_SIMPLE_BUFFER_H_
diff --git a/quiche/common/balsa/simple_buffer_test.cc b/quiche/common/balsa/simple_buffer_test.cc new file mode 100644 index 0000000..d8b134e --- /dev/null +++ b/quiche/common/balsa/simple_buffer_test.cc
@@ -0,0 +1,364 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/simple_buffer.h" + +#include <string> + +#include "absl/strings/string_view.h" +#include "quiche/common/platform/api/quiche_test.h" + +namespace quiche { + +namespace test { + +namespace { + +// Buffer full of 40 char strings. +const char ibuf[] = { + "123456789!@#$%^&*()abcdefghijklmnopqrstu" + "123456789!@#$%^&*()abcdefghijklmnopqrstu" + "123456789!@#$%^&*()abcdefghijklmnopqrstu" + "123456789!@#$%^&*()abcdefghijklmnopqrstu" + "123456789!@#$%^&*()abcdefghijklmnopqrstu"}; + +} // namespace + +class SimpleBufferTest : public QuicheTest { + public: + static char* storage(SimpleBuffer& buffer) { return buffer.storage_; } + static int write_idx(SimpleBuffer& buffer) { return buffer.write_idx_; } + static int read_idx(SimpleBuffer& buffer) { return buffer.read_idx_; } + static int storage_size(SimpleBuffer& buffer) { return buffer.storage_size_; } +}; + +namespace { + +TEST_F(SimpleBufferTest, TestCreationWithSize) { + SimpleBuffer buffer(5); + EXPECT_EQ(5, storage_size(buffer)); +} + +// Make sure that a zero-sized initial buffer does not throw things off. +TEST_F(SimpleBufferTest, TestCreationWithZeroSize) { + SimpleBuffer buffer(0); + EXPECT_EQ(0, storage_size(buffer)); + EXPECT_EQ(4, buffer.Write(ibuf, 4)); + EXPECT_EQ(4, write_idx(buffer)); + EXPECT_EQ(4, storage_size(buffer)); + EXPECT_EQ(4, buffer.ReadableBytes()); +} + +TEST(SimpleBufferDeathTest, TestCreationWithNegativeSize) { + EXPECT_DEATH(SimpleBuffer buffer(-1), "Check failed"); +} + +TEST_F(SimpleBufferTest, TestBasics) { + SimpleBuffer buffer; + + EXPECT_TRUE(buffer.Empty()); + EXPECT_EQ("", buffer.GetReadableRegion()); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(0, write_idx(buffer)); + + char* readable_ptr = nullptr; + int readable_size = 0; + buffer.GetReadablePtr(&readable_ptr, &readable_size); + char* writeable_ptr = nullptr; + int writable_size = 0; + buffer.GetWritablePtr(&writeable_ptr, &writable_size); + + EXPECT_EQ(storage(buffer), readable_ptr); + EXPECT_EQ(0, readable_size); + EXPECT_EQ(storage(buffer), writeable_ptr); + EXPECT_EQ(10, writable_size); + EXPECT_EQ(0, buffer.ReadableBytes()); + + const SimpleBuffer buffer2; + EXPECT_EQ(0, buffer2.ReadableBytes()); +} + +TEST_F(SimpleBufferTest, TestBasicWR) { + SimpleBuffer buffer; + + EXPECT_EQ(4, buffer.Write(ibuf, 4)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(4, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(4, buffer.ReadableBytes()); + EXPECT_EQ("1234", buffer.GetReadableRegion()); + int bytes_written = 4; + EXPECT_TRUE(!buffer.Empty()); + + char* readable_ptr = nullptr; + int readable_size = 0; + buffer.GetReadablePtr(&readable_ptr, &readable_size); + char* writeable_ptr = nullptr; + int writable_size = 0; + buffer.GetWritablePtr(&writeable_ptr, &writable_size); + + EXPECT_EQ(storage(buffer), readable_ptr); + EXPECT_EQ(4, readable_size); + EXPECT_EQ(storage(buffer) + 4, writeable_ptr); + EXPECT_EQ(6, writable_size); + + char obuf[ABSL_ARRAYSIZE(ibuf)]; + int bytes_read = 0; + EXPECT_EQ(4, buffer.Read(obuf + bytes_read, 40)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(0, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(0, buffer.ReadableBytes()); + EXPECT_EQ("", buffer.GetReadableRegion()); + bytes_read += 4; + EXPECT_TRUE(buffer.Empty()); + buffer.GetReadablePtr(&readable_ptr, &readable_size); + buffer.GetWritablePtr(&writeable_ptr, &writable_size); + EXPECT_EQ(storage(buffer), readable_ptr); + EXPECT_EQ(0, readable_size); + EXPECT_EQ(storage(buffer), writeable_ptr); + EXPECT_EQ(10, writable_size); + + EXPECT_EQ(bytes_written, bytes_read); + for (int i = 0; i < bytes_read; ++i) { + EXPECT_EQ(obuf[i], ibuf[i]); + } + + // More R/W tests. + EXPECT_EQ(10, buffer.Write(ibuf + bytes_written, 10)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(10, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(10, buffer.ReadableBytes()); + bytes_written += 10; + + EXPECT_TRUE(!buffer.Empty()); + + EXPECT_EQ(6, buffer.Read(obuf + bytes_read, 6)); + EXPECT_EQ(6, read_idx(buffer)); + EXPECT_EQ(10, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(4, buffer.ReadableBytes()); + bytes_read += 6; + + EXPECT_TRUE(!buffer.Empty()); + + EXPECT_EQ(4, buffer.Read(obuf + bytes_read, 7)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(0, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(0, buffer.ReadableBytes()); + bytes_read += 4; + + EXPECT_TRUE(buffer.Empty()); + + EXPECT_EQ(bytes_written, bytes_read); + for (int i = 0; i < bytes_read; ++i) { + EXPECT_EQ(obuf[i], ibuf[i]); + } +} + +TEST_F(SimpleBufferTest, TestReserve) { + SimpleBuffer buffer; + + // Reserve by expanding the buffer. + const int initial_size = storage_size(buffer); + buffer.Reserve(initial_size + 1); + EXPECT_EQ(2 * initial_size, storage_size(buffer)); + + buffer.AdvanceWritablePtr(initial_size); + buffer.AdvanceReadablePtr(initial_size - 2); + EXPECT_EQ(initial_size, write_idx(buffer)); + EXPECT_EQ(2 * initial_size, storage_size(buffer)); + + // Reserve by moving data around. `storage_size` does not change. + buffer.Reserve(initial_size + 1); + EXPECT_EQ(2, write_idx(buffer)); + EXPECT_EQ(2 * initial_size, storage_size(buffer)); +} + +TEST_F(SimpleBufferTest, TestExtend) { + SimpleBuffer buffer; + + // Test a write which should not extend the buffer. + EXPECT_EQ(7, buffer.Write(ibuf, 7)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(7, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(7, buffer.ReadableBytes()); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(7, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(7, buffer.ReadableBytes()); + int bytes_written = 7; + + // Test a write which should extend the buffer. + EXPECT_EQ(4, buffer.Write(ibuf + bytes_written, 4)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(11, write_idx(buffer)); + EXPECT_EQ(20, storage_size(buffer)); + EXPECT_EQ(11, buffer.ReadableBytes()); + bytes_written += 4; + + char obuf[ABSL_ARRAYSIZE(ibuf)]; + EXPECT_EQ(11, buffer.Read(obuf, 11)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(0, write_idx(buffer)); + EXPECT_EQ(20, storage_size(buffer)); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(0, write_idx(buffer)); + EXPECT_EQ(0, buffer.ReadableBytes()); + + const int bytes_read = 11; + EXPECT_EQ(bytes_written, bytes_read); + for (int i = 0; i < bytes_read; ++i) { + EXPECT_EQ(obuf[i], ibuf[i]); + } +} + +TEST_F(SimpleBufferTest, TestClear) { + SimpleBuffer buffer; + + buffer.Clear(); + EXPECT_EQ(0, read_idx(buffer)); + EXPECT_EQ(0, write_idx(buffer)); + EXPECT_EQ(10, storage_size(buffer)); + EXPECT_EQ(0, buffer.ReadableBytes()); + EXPECT_EQ(10, storage_size(buffer)); +} + +TEST_F(SimpleBufferTest, TestLongWrite) { + SimpleBuffer buffer; + + std::string s1 = "HTTP/1.1 500 Service Unavailable"; + buffer.Write(s1.data(), s1.size()); + buffer.Write("\r\n", 2); + std::string key = "Connection"; + std::string value = "close"; + buffer.Write(key.data(), key.size()); + buffer.Write(": ", 2); + buffer.Write(value.data(), value.size()); + buffer.Write("\r\n", 2); + buffer.Write("\r\n", 2); + std::string message = + "<html><head>\n" + "<meta http-equiv=\"content-type\"" + " content=\"text/html;charset=us-ascii\">\n" + "<style><!--\n" + "body {font-family: arial,sans-serif}\n" + "div.nav {margin-top: 1ex}\n" + "div.nav A {font-size: 10pt; font-family: arial,sans-serif}\n" + "span.nav {font-size: 10pt; font-family: arial,sans-serif;" + " font-weight: bold}\n" + "div.nav A,span.big {font-size: 12pt; color: #0000cc}\n" + "div.nav A {font-size: 10pt; color: black}\n" + "A.l:link {color: #6f6f6f}\n" + "A.u:link {color: green}\n" + "//--></style>\n" + "</head>\n" + "<body text=#000000 bgcolor=#ffffff>\n" + "<table border=0 cellpadding=2 cellspacing=0 width=100%>" + "<tr><td rowspan=3 width=1% nowrap>\n" + "<b>" + "<font face=times color=#0039b6 size=10>G</font>" + "<font face=times color=#c41200 size=10>o</font>" + "<font face=times color=#f3c518 size=10>o</font>" + "<font face=times color=#0039b6 size=10>g</font>" + "<font face=times color=#30a72f size=10>l</font>" + "<font face=times color=#c41200 size=10>e</font>" + " </b>\n" + "<td> </td></tr>\n" + "<tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff>" + " <b>Error</b></td></tr>\n" + "<tr><td> </td></tr></table>\n" + "<blockquote>\n" + "<H1> Internal Server Error</H1>\n" + " This server was unable to complete the request\n" + "<p></blockquote>\n" + "<table width=100% cellpadding=0 cellspacing=0>" + "<tr><td bgcolor=#3366cc><img alt=\"\" width=1 height=4></td></tr>" + "</table>" + "</body></html>\n"; + buffer.Write(message.data(), message.size()); + const std::string correct_result = + "HTTP/1.1 500 Service Unavailable\r\n" + "Connection: close\r\n" + "\r\n" + "<html><head>\n" + "<meta http-equiv=\"content-type\"" + " content=\"text/html;charset=us-ascii\">\n" + "<style><!--\n" + "body {font-family: arial,sans-serif}\n" + "div.nav {margin-top: 1ex}\n" + "div.nav A {font-size: 10pt; font-family: arial,sans-serif}\n" + "span.nav {font-size: 10pt; font-family: arial,sans-serif;" + " font-weight: bold}\n" + "div.nav A,span.big {font-size: 12pt; color: #0000cc}\n" + "div.nav A {font-size: 10pt; color: black}\n" + "A.l:link {color: #6f6f6f}\n" + "A.u:link {color: green}\n" + "//--></style>\n" + "</head>\n" + "<body text=#000000 bgcolor=#ffffff>\n" + "<table border=0 cellpadding=2 cellspacing=0 width=100%>" + "<tr><td rowspan=3 width=1% nowrap>\n" + "<b>" + "<font face=times color=#0039b6 size=10>G</font>" + "<font face=times color=#c41200 size=10>o</font>" + "<font face=times color=#f3c518 size=10>o</font>" + "<font face=times color=#0039b6 size=10>g</font>" + "<font face=times color=#30a72f size=10>l</font>" + "<font face=times color=#c41200 size=10>e</font>" + " </b>\n" + "<td> </td></tr>\n" + "<tr><td bgcolor=#3366cc><font face=arial,sans-serif color=#ffffff>" + " <b>Error</b></td></tr>\n" + "<tr><td> </td></tr></table>\n" + "<blockquote>\n" + "<H1> Internal Server Error</H1>\n" + " This server was unable to complete the request\n" + "<p></blockquote>\n" + "<table width=100% cellpadding=0 cellspacing=0>" + "<tr><td bgcolor=#3366cc><img alt=\"\" width=1 height=4></td></tr>" + "</table>" + "</body></html>\n"; + EXPECT_EQ(correct_result, buffer.GetReadableRegion()); +} + +TEST_F(SimpleBufferTest, ReleaseAsSlice) { + SimpleBuffer buffer; + + buffer.WriteString("abc"); + QuicheMemSlice slice = buffer.ReleaseAsSlice(); + EXPECT_EQ("abc", slice.AsStringView()); + + char* readable_ptr = nullptr; + int readable_size = 0; + buffer.GetReadablePtr(&readable_ptr, &readable_size); + EXPECT_EQ(0, readable_size); + + buffer.WriteString("def"); + slice = buffer.ReleaseAsSlice(); + buffer.GetReadablePtr(&readable_ptr, &readable_size); + EXPECT_EQ(0, readable_size); + EXPECT_EQ("def", slice.AsStringView()); +} + +TEST_F(SimpleBufferTest, EmptyBufferReleaseAsSlice) { + SimpleBuffer buffer; + char* readable_ptr = nullptr; + int readable_size = 0; + + QuicheMemSlice slice = buffer.ReleaseAsSlice(); + buffer.GetReadablePtr(&readable_ptr, &readable_size); + EXPECT_EQ(0, readable_size); + EXPECT_TRUE(slice.empty()); +} + +} // namespace + +} // namespace test + +} // namespace quiche
diff --git a/quiche/common/balsa/standard_header_map.cc b/quiche/common/balsa/standard_header_map.cc new file mode 100644 index 0000000..0cdab01 --- /dev/null +++ b/quiche/common/balsa/standard_header_map.cc
@@ -0,0 +1,143 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "quiche/common/balsa/standard_header_map.h" + +namespace quiche { + +const StandardHttpHeaderNameSet& GetStandardHeaderSet() { + static const StandardHttpHeaderNameSet* const header_map = + new StandardHttpHeaderNameSet({ + {"Accept"}, + {"Accept-Charset"}, + {"Accept-CH"}, + {"Accept-CH-Lifetime"}, + {"Accept-Encoding"}, + {"Accept-Language"}, + {"Accept-Ranges"}, + {"Access-Control-Allow-Credentials"}, + {"Access-Control-Allow-Headers"}, + {"Access-Control-Allow-Methods"}, + {"Access-Control-Allow-Origin"}, + {"Access-Control-Expose-Headers"}, + {"Access-Control-Max-Age"}, + {"Access-Control-Request-Headers"}, + {"Access-Control-Request-Method"}, + {"Age"}, + {"Allow"}, + {"Authorization"}, + {"Cache-Control"}, + {"Connection"}, + {"Content-Disposition"}, + {"Content-Encoding"}, + {"Content-Language"}, + {"Content-Length"}, + {"Content-Location"}, + {"Content-Range"}, + {"Content-Security-Policy"}, + {"Content-Security-Policy-Report-Only"}, + {"X-Content-Security-Policy"}, + {"X-Content-Security-Policy-Report-Only"}, + {"X-WebKit-CSP"}, + {"X-WebKit-CSP-Report-Only"}, + {"Content-Type"}, + {"Content-MD5"}, + {"X-Content-Type-Options"}, + {"Cookie"}, + {"Cookie2"}, + {"Cross-Origin-Resource-Policy"}, + {"Cross-Origin-Opener-Policy"}, + {"Date"}, + {"DAV"}, + {"Depth"}, + {"Destination"}, + {"DNT"}, + {"DPR"}, + {"Early-Data"}, + {"ETag"}, + {"Expect"}, + {"Expires"}, + {"Follow-Only-When-Prerender-Shown"}, + {"Forwarded"}, + {"From"}, + {"Host"}, + {"HTTP2-Settings"}, + {"If"}, + {"If-Match"}, + {"If-Modified-Since"}, + {"If-None-Match"}, + {"If-Range"}, + {"If-Unmodified-Since"}, + {"Keep-Alive"}, + {"Label"}, + {"Last-Modified"}, + {"Link"}, + {"Location"}, + {"Lock-Token"}, + {"Max-Forwards"}, + {"MS-Author-Via"}, + {"Origin"}, + {"Overwrite"}, + {"P3P"}, + {"Ping-From"}, + {"Ping-To"}, + {"Pragma"}, + {"Proxy-Connection"}, + {"Proxy-Authenticate"}, + {"Public-Key-Pins"}, + {"Public-Key-Pins-Report-Only"}, + {"Range"}, + {"Referer"}, + {"Referrer-Policy"}, + {"Refresh"}, + {"Report-To"}, + {"Retry-After"}, + {"Sec-Fetch-Dest"}, + {"Sec-Fetch-Mode"}, + {"Sec-Fetch-Site"}, + {"Sec-Fetch-User"}, + {"Sec-Metadata"}, + {"Sec-Token-Binding"}, + {"Sec-Provided-Token-Binding-ID"}, + {"Sec-Referred-Token-Binding-ID"}, + {"Sec-WebSocket-Accept"}, + {"Sec-WebSocket-Extensions"}, + {"Sec-WebSocket-Key"}, + {"Sec-WebSocket-Protocol"}, + {"Sec-WebSocket-Version"}, + {"Server"}, + {"Server-Timing"}, + {"Service-Worker"}, + {"Service-Worker-Allowed"}, + {"Service-Worker-Navigation-Preload"}, + {"Set-Cookie"}, + {"Set-Cookie2"}, + {"Status-URI"}, + {"Strict-Transport-Security"}, + {"SourceMap"}, + {"Timeout"}, + {"Timing-Allow-Origin"}, + {"Tk"}, + {"Trailer"}, + {"Trailers"}, + {"Transfer-Encoding"}, + {"TE"}, + {"Upgrade"}, + {"Upgrade-Insecure-Requests"}, + {"User-Agent"}, + {"X-OperaMini-Phone-UA"}, + {"X-UCBrowser-UA"}, + {"X-UCBrowser-Device-UA"}, + {"X-Device-User-Agent"}, + {"Vary"}, + {"Via"}, + {"CDN-Loop"}, + {"Warning"}, + {"WWW-Authenticate"}, + }); + + return *header_map; +} + +} // namespace quiche
diff --git a/quiche/common/balsa/standard_header_map.h b/quiche/common/balsa/standard_header_map.h new file mode 100644 index 0000000..3e9dbb7 --- /dev/null +++ b/quiche/common/balsa/standard_header_map.h
@@ -0,0 +1,24 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_COMMON_BALSA_STANDARD_HEADER_MAP_H_ +#define QUICHE_COMMON_BALSA_STANDARD_HEADER_MAP_H_ + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/string_view.h" +#include "quiche/common/quiche_text_utils.h" + +namespace quiche { + +// This specifies an absl::flat_hash_set with case-insensitive lookup and +// hashing +using StandardHttpHeaderNameSet = + absl::flat_hash_set<absl::string_view, StringPieceCaseHash, + StringPieceCaseEqual>; + +const StandardHttpHeaderNameSet& GetStandardHeaderSet(); + +} // namespace quiche + +#endif // QUICHE_COMMON_BALSA_STANDARD_HEADER_MAP_H_