blob: 8e693cbfd072dee3f025156dfb2ca04ecd084a85 [file] [log] [blame]
// Copyright 2022 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "quiche/balsa/balsa_frame.h"
#include <algorithm>
#include <array>
#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <ostream>
#include <string>
#include <utility>
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
#include "quiche/balsa/balsa_enums.h"
#include "quiche/balsa/balsa_headers.h"
#include "quiche/balsa/balsa_visitor_interface.h"
#include "quiche/balsa/header_properties.h"
#include "quiche/common/platform/api/quiche_logging.h"
// When comparing characters (other than == and !=), cast to unsigned char
// to make sure values above 127 rank as expected, even on platforms where char
// is signed and thus such values are represented as negative numbers before the
// cast.
#define CHAR_LT(a, b) \
(static_cast<unsigned char>(a) < static_cast<unsigned char>(b))
#define CHAR_LE(a, b) \
(static_cast<unsigned char>(a) <= static_cast<unsigned char>(b))
#define CHAR_GT(a, b) \
(static_cast<unsigned char>(a) > static_cast<unsigned char>(b))
#define CHAR_GE(a, b) \
(static_cast<unsigned char>(a) >= static_cast<unsigned char>(b))
#define QUICHE_DCHECK_CHAR_GE(a, b) \
QUICHE_DCHECK_GE(static_cast<unsigned char>(a), static_cast<unsigned char>(b))
namespace quiche {
namespace {
const size_t kContinueStatusCode = 100;
constexpr absl::string_view kChunked = "chunked";
constexpr absl::string_view kContentLength = "content-length";
constexpr absl::string_view kIdentity = "identity";
constexpr absl::string_view kTransferEncoding = "transfer-encoding";
} // namespace
void BalsaFrame::Reset() {
last_char_was_slash_r_ = false;
saw_non_newline_char_ = false;
start_was_space_ = true;
chunk_length_character_extracted_ = false;
// is_request_ = true; // not reset between messages.
allow_reading_until_close_for_request_ = false;
// request_was_head_ = false; // not reset between messages.
// max_header_length_ = 16 * 1024; // not reset between messages.
// visitor_ = &do_nothing_visitor_; // not reset between messages.
chunk_length_remaining_ = 0;
content_length_remaining_ = 0;
last_slash_n_loc_ = nullptr;
last_recorded_slash_n_loc_ = nullptr;
last_slash_n_idx_ = 0;
term_chars_ = 0;
parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
last_error_ = BalsaFrameEnums::BALSA_NO_ERROR;
invalid_chars_.clear();
lines_.clear();
if (continue_headers_ != nullptr) {
continue_headers_->Clear();
}
if (headers_ != nullptr) {
headers_->Clear();
}
trailer_lines_.clear();
start_of_trailer_line_ = 0;
trailer_length_ = 0;
if (trailer_ != nullptr) {
trailer_->Clear();
}
}
namespace {
// Within the line bounded by [current, end), parses a single "island",
// comprising a (possibly empty) span of whitespace followed by a (possibly
// empty) span of non-whitespace.
//
// Returns a pointer to the first whitespace character beyond this island, or
// returns end if no additional whitespace characters are present after this
// island. (I.e., returnvalue == end || *returnvalue > ' ')
//
// Upon return, the whitespace span are the characters
// whose indices fall in [*first_whitespace, *first_nonwhite), while the
// non-whitespace span are the characters whose indices fall in
// [*first_nonwhite, returnvalue - begin).
inline const char* ParseOneIsland(const char* current, const char* begin,
const char* end, size_t* first_whitespace,
size_t* first_nonwhite) {
*first_whitespace = current - begin;
while (current < end && CHAR_LE(*current, ' ')) {
++current;
}
*first_nonwhite = current - begin;
while (current < end && CHAR_GT(*current, ' ')) {
++current;
}
return current;
}
} // namespace
// Summary:
// Parses the first line of either a request or response.
// Note that in the case of a detected warning, error_code will be set
// but the function will not return false.
// Exactly zero or one warning or error (but not both) may be detected
// by this function.
// Note that this function will not write the data of the first-line
// into the header's buffer (that should already have been done elsewhere).
//
// Pre-conditions:
// begin != end
// *begin should be a character which is > ' '. This implies that there
// is at least one non-whitespace characters between [begin, end).
// headers is a valid pointer to a BalsaHeaders class.
// error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
// Entire first line must exist between [begin, end)
// Exactly zero or one newlines -may- exist between [begin, end)
// [begin, end) should exist in the header's buffer.
//
// Side-effects:
// headers will be modified
// error_code may be modified if either a warning or error is detected
//
// Returns:
// True if no error (as opposed to warning) is detected.
// False if an error (as opposed to warning) is detected.
//
// If there is indeed non-whitespace in the line, then the following
// will take care of this for you:
// while (*begin <= ' ') ++begin;
// ProcessFirstLine(begin, end, is_request, &headers, &error_code);
//
bool ParseHTTPFirstLine(const char* begin, const char* end, bool is_request,
BalsaHeaders* headers,
BalsaFrameEnums::ErrorCode* error_code) {
while (begin < end && (end[-1] == '\n' || end[-1] == '\r')) {
--end;
}
const char* current =
ParseOneIsland(begin, begin, end, &headers->whitespace_1_idx_,
&headers->non_whitespace_1_idx_);
current = ParseOneIsland(current, begin, end, &headers->whitespace_2_idx_,
&headers->non_whitespace_2_idx_);
current = ParseOneIsland(current, begin, end, &headers->whitespace_3_idx_,
&headers->non_whitespace_3_idx_);
// Clean up any trailing whitespace that comes after the third island
const char* last = end;
while (current <= last && CHAR_LE(*last, ' ')) {
--last;
}
headers->whitespace_4_idx_ = last - begin + 1;
// Either the passed-in line is empty, or it starts with a non-whitespace
// character.
QUICHE_DCHECK(begin == end || static_cast<unsigned char>(*begin) > ' ');
QUICHE_DCHECK_EQ(0u, headers->whitespace_1_idx_);
QUICHE_DCHECK_EQ(0u, headers->non_whitespace_1_idx_);
// If the line isn't empty, it has at least one non-whitespace character (see
// first QUICHE_DCHECK), which will have been identified as a non-empty
// [non_whitespace_1_idx_, whitespace_2_idx_).
QUICHE_DCHECK(begin == end ||
headers->non_whitespace_1_idx_ < headers->whitespace_2_idx_);
if (headers->non_whitespace_2_idx_ == headers->whitespace_3_idx_) {
// This error may be triggered if the second token is empty, OR there's no
// WS after the first token; we don't bother to distinguish exactly which.
// (I'm not sure why we distinguish different kinds of parse error at all,
// actually.)
// FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
// FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
*error_code = static_cast<BalsaFrameEnums::ErrorCode>(
BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
static_cast<int>(is_request));
if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
return false;
}
}
if (headers->whitespace_3_idx_ == headers->non_whitespace_3_idx_) {
if (*error_code == BalsaFrameEnums::BALSA_NO_ERROR) {
// FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request
// FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
*error_code = static_cast<BalsaFrameEnums::ErrorCode>(
BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE +
static_cast<int>(is_request));
}
}
if (!is_request) {
headers->parsed_response_code_ = 0;
// If the response code is non-empty:
if (headers->non_whitespace_2_idx_ < headers->whitespace_3_idx_) {
if (!absl::SimpleAtoi(
absl::string_view(begin + headers->non_whitespace_2_idx_,
headers->non_whitespace_3_idx_ -
headers->non_whitespace_2_idx_),
&headers->parsed_response_code_)) {
*error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
return false;
}
}
}
return true;
}
// begin - beginning of the firstline
// end - end of the firstline
//
// A precondition for this function is that there is non-whitespace between
// [begin, end). If this precondition is not met, the function will not perform
// as expected (and bad things may happen, and it will eat your first, second,
// and third unborn children!).
//
// Another precondition for this function is that [begin, end) includes
// at most one newline, which must be at the end of the line.
void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
BalsaFrameEnums::ErrorCode previous_error = last_error_;
if (!ParseHTTPFirstLine(begin, end, is_request_, headers_, &last_error_)) {
parse_state_ = BalsaFrameEnums::ERROR;
HandleError(last_error_);
return;
}
if (previous_error != last_error_) {
HandleWarning(last_error_);
}
const absl::string_view line_input(
begin + headers_->non_whitespace_1_idx_,
headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_);
const absl::string_view part1(
begin + headers_->non_whitespace_1_idx_,
headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_);
const absl::string_view part2(
begin + headers_->non_whitespace_2_idx_,
headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_);
const absl::string_view part3(
begin + headers_->non_whitespace_3_idx_,
headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
if (is_request_) {
visitor_->OnRequestFirstLineInput(line_input, part1, part2, part3);
if (part3.empty()) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
}
return;
}
visitor_->OnResponseFirstLineInput(line_input, part1, part2, part3);
}
// 'stream_begin' points to the first character of the headers buffer.
// 'line_begin' points to the first character of the line.
// 'current' points to a char which is ':'.
// 'line_end' points to the position of '\n' + 1.
// 'line_begin' points to the position of first character of line.
void BalsaFrame::CleanUpKeyValueWhitespace(
const char* stream_begin, const char* line_begin, const char* current,
const char* line_end, HeaderLineDescription* current_header_line) {
const char* colon_loc = current;
QUICHE_DCHECK_LT(colon_loc, line_end);
QUICHE_DCHECK_EQ(':', *colon_loc);
QUICHE_DCHECK_EQ(':', *current);
QUICHE_DCHECK_CHAR_GE(' ', *line_end)
<< "\"" << std::string(line_begin, line_end) << "\"";
// TODO(fenix): Investigate whether or not the bounds tests in the
// while loops here are redundant, and if so, remove them.
--current;
while (current > line_begin && CHAR_LE(*current, ' ')) {
--current;
}
current += static_cast<int>(current != colon_loc);
current_header_line->key_end_idx = current - stream_begin;
current = colon_loc;
QUICHE_DCHECK_EQ(':', *current);
++current;
while (current < line_end && CHAR_LE(*current, ' ')) {
++current;
}
current_header_line->value_begin_idx = current - stream_begin;
QUICHE_DCHECK_GE(current_header_line->key_end_idx,
current_header_line->first_char_idx);
QUICHE_DCHECK_GE(current_header_line->value_begin_idx,
current_header_line->key_end_idx);
QUICHE_DCHECK_GE(current_header_line->last_char_idx,
current_header_line->value_begin_idx);
}
bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
bool is_trailer,
BalsaHeaders* headers) {
QUICHE_DCHECK(!lines.empty());
const char* stream_begin = headers->OriginalHeaderStreamBegin();
// The last line is always just a newline (and is uninteresting).
const Lines::size_type lines_size_m1 = lines.size() - 1;
// For a trailer, there is no first line, so lines[0] is the first header.
// For real headers, the first line takes lines[0], so real header starts
// at index 1.
int first_header_idx = (is_trailer ? 0 : 1);
const char* current = stream_begin + lines[first_header_idx].first;
// This code is a bit more subtle than it may appear at first glance.
// This code looks for a colon in the current line... but it also looks
// beyond the current line. If there is no colon in the current line, then
// for each subsequent line (until the colon which -has- been found is
// associated with a line), no searching for a colon will be performed. In
// this way, we minimize the amount of bytes we have scanned for a colon.
for (Lines::size_type i = first_header_idx; i < lines_size_m1;) {
const char* line_begin = stream_begin + lines[i].first;
// Here we handle possible continuations. Note that we do not replace
// the '\n' in the line before a continuation (at least, as of now),
// which implies that any code which looks for a value must deal with
// "\r\n", etc -within- the line (and not just at the end of it).
for (++i; i < lines_size_m1; ++i) {
const char c = *(stream_begin + lines[i].first);
if (CHAR_GT(c, ' ')) {
// Not a continuation, so stop. Note that if the 'original' i = 1,
// and the next line is not a continuation, we'll end up with i = 2
// when we break. This handles the incrementing of i for the outer
// loop.
break;
}
// Space and tab are valid starts to continuation lines.
// https://tools.ietf.org/html/rfc7230#section-3.2.4 says that a proxy
// can choose to reject or normalize continuation lines.
if ((c != ' ' && c != '\t') ||
http_validation_policy().disallow_header_continuation_lines()) {
HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
: BalsaFrameEnums::INVALID_HEADER_FORMAT);
return false;
}
// If disallow_header_continuation_lines() is false, we neither reject nor
// normalize continuation lines, in violation of RFC7230.
}
const char* line_end = stream_begin + lines[i - 1].second;
QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
// We cleanup the whitespace at the end of the line before doing anything
// else of interest as it allows us to do nothing when irregularly formatted
// headers are parsed (e.g. those with only keys, only values, or no colon).
//
// We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
--line_end;
QUICHE_DCHECK_EQ('\n', *line_end)
<< "\"" << std::string(line_begin, line_end) << "\"";
while (CHAR_LE(*line_end, ' ') && line_end > line_begin) {
--line_end;
}
++line_end;
QUICHE_DCHECK_CHAR_GE(' ', *line_end);
QUICHE_DCHECK_LT(line_begin, line_end);
// We use '0' for the block idx, because we're always writing to the first
// block from the framer (we do this because the framer requires that the
// entire header sequence be in a contiguous buffer).
headers->header_lines_.push_back(HeaderLineDescription(
line_begin - stream_begin, line_end - stream_begin,
line_end - stream_begin, line_end - stream_begin, 0));
if (current >= line_end) {
if (http_validation_policy().require_header_colon()) {
HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
: BalsaFrameEnums::HEADER_MISSING_COLON);
return false;
}
HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
: BalsaFrameEnums::HEADER_MISSING_COLON);
// Then the next colon will not be found within this header line-- time
// to try again with another header-line.
continue;
}
if (current < line_begin) {
// When this condition is true, the last detected colon was part of a
// previous line. We reset to the beginning of the line as we don't care
// about the presence of any colon before the beginning of the current
// line.
current = line_begin;
}
for (; current < line_end; ++current) {
if (*current == ':') {
break;
}
if (header_properties::IsInvalidHeaderKeyChar(*current)) {
// Generally invalid characters were found earlier.
HandleError(is_trailer
? BalsaFrameEnums::INVALID_TRAILER_NAME_CHARACTER
: BalsaFrameEnums::INVALID_HEADER_NAME_CHARACTER);
return false;
}
}
if (current == line_end) {
// There was no colon in the line. The arguments we passed into the
// construction for the HeaderLineDescription object should be OK-- it
// assumes that the entire content is 'key' by default (which is true, as
// there was no colon, there can be no value). Note that this is a
// construct which is technically not allowed by the spec.
// In strict mode, we do treat this invalid value-less key as an error.
if (http_validation_policy().require_header_colon()) {
HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
: BalsaFrameEnums::HEADER_MISSING_COLON);
return false;
}
HandleWarning(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
: BalsaFrameEnums::HEADER_MISSING_COLON);
continue;
}
QUICHE_DCHECK_EQ(*current, ':');
QUICHE_DCHECK_LE(current - stream_begin, line_end - stream_begin);
QUICHE_DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
HeaderLineDescription& current_header_line = headers->header_lines_.back();
current_header_line.key_end_idx = current - stream_begin;
current_header_line.value_begin_idx = current_header_line.key_end_idx;
if (current < line_end) {
++current_header_line.key_end_idx;
CleanUpKeyValueWhitespace(stream_begin, line_begin, current, line_end,
&current_header_line);
}
const absl::string_view key(
stream_begin + current_header_line.first_char_idx,
current_header_line.key_end_idx - current_header_line.first_char_idx);
const absl::string_view value(
stream_begin + current_header_line.value_begin_idx,
current_header_line.last_char_idx -
current_header_line.value_begin_idx);
visitor_->OnHeader(key, value);
}
return true;
}
void BalsaFrame::HandleWarning(BalsaFrameEnums::ErrorCode error_code) {
last_error_ = error_code;
visitor_->HandleWarning(last_error_);
}
void BalsaFrame::HandleError(BalsaFrameEnums::ErrorCode error_code) {
last_error_ = error_code;
parse_state_ = BalsaFrameEnums::ERROR;
visitor_->HandleError(last_error_);
}
BalsaHeadersEnums::ContentLengthStatus BalsaFrame::ProcessContentLengthLine(
HeaderLines::size_type line_idx, size_t* length) {
const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
const char* stream_begin = headers_->OriginalHeaderStreamBegin();
const char* line_end = stream_begin + header_line.last_char_idx;
const char* value_begin = (stream_begin + header_line.value_begin_idx);
if (value_begin >= line_end) {
// There is no non-whitespace value data.
QUICHE_DVLOG(1) << "invalid content-length -- no non-whitespace value data";
return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
}
*length = 0;
while (value_begin < line_end) {
if (*value_begin < '0' || *value_begin > '9') {
// bad! content-length found, and couldn't parse all of it!
QUICHE_DVLOG(1)
<< "invalid content-length - non numeric character detected";
return BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
}
const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
size_t length_x_10 = *length * 10;
const size_t c = *value_begin - '0';
if (*length > kMaxDiv10 ||
(std::numeric_limits<size_t>::max() - length_x_10) < c) {
QUICHE_DVLOG(1) << "content-length overflow";
return BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
}
*length = length_x_10 + c;
++value_begin;
}
QUICHE_DVLOG(1) << "content_length parsed: " << *length;
return BalsaHeadersEnums::VALID_CONTENT_LENGTH;
}
void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
const char* stream_begin = headers_->OriginalHeaderStreamBegin();
const absl::string_view transfer_encoding(
stream_begin + header_line.value_begin_idx,
header_line.last_char_idx - header_line.value_begin_idx);
if (absl::EqualsIgnoreCase(transfer_encoding, kChunked)) {
headers_->transfer_encoding_is_chunked_ = true;
return;
}
if (absl::EqualsIgnoreCase(transfer_encoding, kIdentity)) {
headers_->transfer_encoding_is_chunked_ = false;
return;
}
HandleError(BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING);
}
bool BalsaFrame::CheckHeaderLinesForInvalidChars(const Lines& lines,
const BalsaHeaders* headers) {
// Read from the beginning of the first line to the end of the last line.
// Note we need to add the first line's offset as in the case of a trailer
// it's non-zero.
const char* stream_begin =
headers->OriginalHeaderStreamBegin() + lines.front().first;
const char* stream_end =
headers->OriginalHeaderStreamBegin() + lines.back().second;
bool found_invalid = false;
for (const char* c = stream_begin; c < stream_end; c++) {
if (header_properties::IsInvalidHeaderChar(*c)) {
found_invalid = true;
invalid_chars_[*c]++;
}
}
return found_invalid;
}
void BalsaFrame::ProcessHeaderLines(const Lines& lines, bool is_trailer,
BalsaHeaders* headers) {
QUICHE_DCHECK(!lines.empty());
QUICHE_DVLOG(1) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
if (is_request() && track_invalid_chars()) {
if (CheckHeaderLinesForInvalidChars(lines, headers)) {
if (invalid_chars_error_enabled()) {
HandleError(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
return;
}
HandleWarning(BalsaFrameEnums::INVALID_HEADER_CHARACTER);
}
}
// There is no need to attempt to process headers (resp. trailers)
// if no header (resp. trailer) lines exist.
//
// The last line of the message, which is an empty line, is never a header
// (resp. trailer) line. Furthermore, the first line of the message is not
// a header line. Therefore there are at least two (resp. one) lines in the
// message which are not header (resp. trailer) lines.
//
// Thus, we test to see if we have more than two (resp. one) lines total
// before attempting to parse any header (resp. trailer) lines.
if (lines.size() <= (is_trailer ? 1 : 2)) {
return;
}
HeaderLines::size_type content_length_idx = 0;
HeaderLines::size_type transfer_encoding_idx = 0;
const char* stream_begin = headers->OriginalHeaderStreamBegin();
// Parse the rest of the header or trailer data into key-value pairs.
if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
return;
}
// At this point, we've parsed all of the headers/trailers. Time to look
// for those headers which we require for framing or for format errors.
const HeaderLines::size_type lines_size = headers->header_lines_.size();
for (HeaderLines::size_type i = 0; i < lines_size; ++i) {
const HeaderLineDescription& line = headers->header_lines_[i];
const absl::string_view key(stream_begin + line.first_char_idx,
line.key_end_idx - line.first_char_idx);
QUICHE_DVLOG(2) << "[" << i << "]: " << key << " key_len: " << key.length();
// If a header begins with either lowercase or uppercase 'c' or 't', then
// the header may be one of content-length, connection, content-encoding
// or transfer-encoding. These headers are special, as they change the way
// that the message is framed, and so the framer is required to search
// for them. However, first check for a formatting error, and skip
// special header treatment on trailer lines (when is_trailer is true).
if (key.empty() || key[0] == ' ') {
parse_state_ = BalsaFrameEnums::ERROR;
HandleError(is_trailer ? BalsaFrameEnums::INVALID_TRAILER_FORMAT
: BalsaFrameEnums::INVALID_HEADER_FORMAT);
return;
}
if (is_trailer) {
continue;
}
if (absl::EqualsIgnoreCase(key, kContentLength)) {
size_t length = 0;
BalsaHeadersEnums::ContentLengthStatus content_length_status =
ProcessContentLengthLine(i, &length);
if (content_length_idx == 0) {
content_length_idx = i + 1;
headers->content_length_status_ = content_length_status;
headers->content_length_ = length;
content_length_remaining_ = length;
continue;
}
if ((headers->content_length_status_ != content_length_status) ||
((headers->content_length_status_ ==
BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
(http_validation_policy().disallow_multiple_content_length() ||
length != headers->content_length_))) {
HandleError(BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS);
return;
}
continue;
}
if (absl::EqualsIgnoreCase(key, kTransferEncoding)) {
if (transfer_encoding_idx != 0) {
HandleError(BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS);
return;
}
transfer_encoding_idx = i + 1;
}
}
if (!is_trailer) {
if (http_validation_policy()
.disallow_transfer_encoding_with_content_length() &&
content_length_idx != 0 && transfer_encoding_idx != 0) {
HandleError(BalsaFrameEnums::BOTH_TRANSFER_ENCODING_AND_CONTENT_LENGTH);
return;
}
if (headers->transfer_encoding_is_chunked_) {
headers->content_length_ = 0;
headers->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
content_length_remaining_ = 0;
}
if (transfer_encoding_idx != 0) {
ProcessTransferEncodingLine(transfer_encoding_idx - 1);
}
}
}
void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
// For responses, can't have a body if the request was a HEAD, or if it is
// one of these response-codes. rfc2616 section 4.3
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
int response_code = headers_->parsed_response_code_;
if (!is_request_ && (request_was_head_ ||
!BalsaHeaders::ResponseCanHaveBody(response_code))) {
// There is no body.
return;
}
if (headers_->transfer_encoding_is_chunked_) {
// Note that
// if ( Transfer-Encoding: chunked && Content-length: )
// then Transfer-Encoding: chunked trumps.
// This is as specified in the spec.
// rfc2616 section 4.4.3
parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
return;
}
// Errors parsing content-length definitely can cause
// protocol errors/warnings
switch (headers_->content_length_status_) {
// If we have a content-length, and it is parsed
// properly, there are two options.
// 1) zero content, in which case the message is done, and
// 2) nonzero content, in which case we have to
// consume the body.
case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
if (headers_->content_length_ == 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
} else {
parse_state_ = BalsaFrameEnums::READING_CONTENT;
}
break;
case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
// If there were characters left-over after parsing the
// content length, we should flag an error and stop.
HandleError(BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH);
break;
// We can have: no transfer-encoding, no content length, and no
// connection: close...
// Unfortunately, this case doesn't seem to be covered in the spec.
// We'll assume that the safest thing to do here is what the google
// binaries before 2008 already do, which is to assume that
// everything until the connection is closed is body.
case BalsaHeadersEnums::NO_CONTENT_LENGTH:
if (is_request_) {
const absl::string_view method = headers_->request_method();
// POSTs and PUTs should have a detectable body length. If they
// do not we consider it an error.
if (method != "POST" && method != "PUT") {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
break;
} else if (!allow_reading_until_close_for_request_) {
HandleError(BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH);
break;
}
}
parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
HandleWarning(BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH);
break;
// The COV_NF_... statements here provide hints to the apparatus
// which computes coverage reports/ratios that this code is never
// intended to be executed, and should technically be impossible.
// COV_NF_START
default:
QUICHE_LOG(FATAL) << "Saw a content_length_status: "
<< headers_->content_length_status_
<< " which is unknown.";
// COV_NF_END
}
}
size_t BalsaFrame::ProcessHeaders(const char* message_start,
size_t message_length) {
const char* const original_message_start = message_start;
const char* const message_end = message_start + message_length;
const char* message_current = message_start;
const char* checkpoint = message_start;
if (message_length == 0) {
return message_current - original_message_start;
}
while (message_current < message_end) {
size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
// Yes, we could use strchr (assuming null termination), or
// memchr, but as it turns out that is slower than this tight loop
// for the input that we see.
if (!saw_non_newline_char_) {
do {
const char c = *message_current;
if (c != '\r' && c != '\n') {
if (CHAR_LE(c, ' ')) {
HandleError(BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST);
return message_current - original_message_start;
}
break;
}
++message_current;
if (message_current == message_end) {
return message_current - original_message_start;
}
} while (true);
saw_non_newline_char_ = true;
message_start = message_current;
checkpoint = message_current;
}
while (message_current < message_end) {
if (*message_current != '\n') {
++message_current;
continue;
}
const size_t relative_idx = message_current - message_start;
const size_t message_current_idx = 1 + base_idx + relative_idx;
lines_.push_back(std::make_pair(last_slash_n_idx_, message_current_idx));
if (lines_.size() == 1) {
headers_->WriteFromFramer(checkpoint, 1 + message_current - checkpoint);
checkpoint = message_current + 1;
const char* begin = headers_->OriginalHeaderStreamBegin();
QUICHE_DVLOG(1) << "First line "
<< std::string(begin, lines_[0].second);
QUICHE_DVLOG(1) << "is_request_: " << is_request_;
ProcessFirstLine(begin, begin + lines_[0].second);
if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
break;
}
if (parse_state_ == BalsaFrameEnums::ERROR) {
return message_current - original_message_start;
}
}
const size_t chars_since_last_slash_n =
(message_current_idx - last_slash_n_idx_);
last_slash_n_idx_ = message_current_idx;
if (chars_since_last_slash_n > 2) {
// false positive.
++message_current;
continue;
}
if ((chars_since_last_slash_n == 1) ||
(((message_current > message_start) &&
(*(message_current - 1) == '\r')) ||
(last_char_was_slash_r_))) {
break;
}
++message_current;
}
if (message_current == message_end) {
continue;
}
++message_current;
QUICHE_DCHECK(message_current >= message_start);
if (message_current > message_start) {
headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
}
// Check if we have exceeded maximum headers length
// Although we check for this limit before and after we call this function
// we check it here as well to make sure that in case the visitor changed
// the max_header_length_ (for example after processing the first line)
// we handle it gracefully.
if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
return message_current - original_message_start;
}
// Since we know that we won't be writing any more bytes of the header,
// we tell that to the headers object. The headers object may make
// more efficient allocation decisions when this is signaled.
headers_->DoneWritingFromFramer();
visitor_->OnHeaderInput(headers_->GetReadablePtrFromHeaderStream());
// Ok, now that we've written everything into our header buffer, it is
// time to process the header lines (extract proper values for headers
// which are important for framing).
ProcessHeaderLines(lines_, false /*is_trailer*/, headers_);
if (parse_state_ == BalsaFrameEnums::ERROR) {
return message_current - original_message_start;
}
if (continue_headers_ != nullptr &&
headers_->parsed_response_code_ == kContinueStatusCode) {
// Save the headers from this 100 Continue response but reset everything
// else to prepare for the next set of headers.
BalsaHeaders saved_continue_headers = std::move(*headers_);
Reset();
*continue_headers_ = std::move(saved_continue_headers);
visitor_->ContinueHeaderDone();
checkpoint = message_start = message_current;
continue;
}
AssignParseStateAfterHeadersHaveBeenParsed();
if (parse_state_ == BalsaFrameEnums::ERROR) {
return message_current - original_message_start;
}
visitor_->ProcessHeaders(*headers_);
visitor_->HeaderDone();
if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
visitor_->MessageDone();
}
return message_current - original_message_start;
}
// If we've gotten to here, it means that we've consumed all of the
// available input. We need to record whether or not the last character we
// saw was a '\r' so that a subsequent call to ProcessInput correctly finds
// a header framing that is split across the two calls.
last_char_was_slash_r_ = (*(message_end - 1) == '\r');
QUICHE_DCHECK(message_current >= message_start);
if (message_current > message_start) {
headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
}
return message_current - original_message_start;
}
size_t BalsaFrame::BytesSafeToSplice() const {
switch (parse_state_) {
case BalsaFrameEnums::READING_CHUNK_DATA:
return chunk_length_remaining_;
case BalsaFrameEnums::READING_UNTIL_CLOSE:
return std::numeric_limits<size_t>::max();
case BalsaFrameEnums::READING_CONTENT:
return content_length_remaining_;
default:
return 0;
}
}
void BalsaFrame::BytesSpliced(size_t bytes_spliced) {
switch (parse_state_) {
case BalsaFrameEnums::READING_CHUNK_DATA:
if (chunk_length_remaining_ < bytes_spliced) {
HandleError(BalsaFrameEnums::
CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
return;
}
chunk_length_remaining_ -= bytes_spliced;
if (chunk_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
}
return;
case BalsaFrameEnums::READING_UNTIL_CLOSE:
return;
case BalsaFrameEnums::READING_CONTENT:
if (content_length_remaining_ < bytes_spliced) {
HandleError(BalsaFrameEnums::
CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT);
return;
}
content_length_remaining_ -= bytes_spliced;
if (content_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->MessageDone();
}
return;
default:
HandleError(BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO);
return;
}
}
size_t BalsaFrame::ProcessInput(const char* input, size_t size) {
const char* current = input;
const char* on_entry = current;
const char* end = current + size;
QUICHE_DCHECK(headers_ != nullptr);
if (headers_ == nullptr) {
return 0;
}
if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
// Yes, we still have to check this here as the user can change the
// max_header_length amount!
// Also it is possible that we have reached the maximum allowed header size,
// and we have more to consume (remember we are still inside
// READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
if (header_length > max_header_length_ ||
(header_length == max_header_length_ && size > 0)) {
HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
return current - input;
}
const size_t bytes_to_process =
std::min(max_header_length_ - header_length, size);
current += ProcessHeaders(input, bytes_to_process);
// If we are still reading headers check if we have crossed the headers
// limit. Note that we check for >= as opposed to >. This is because if
// header_length_after equals max_header_length_ and we are still in the
// parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
// sure that the headers limit will be crossed later on
if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
// Note that headers_ is valid only if we are still reading headers.
const size_t header_length_after =
headers_->GetReadableBytesFromHeaderStream();
if (header_length_after >= max_header_length_) {
HandleError(BalsaFrameEnums::HEADERS_TOO_LONG);
}
}
return current - input;
}
if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
parse_state_ == BalsaFrameEnums::ERROR) {
// Can do nothing more 'till we're reset.
return current - input;
}
QUICHE_DCHECK_LE(current, end);
if (current == end) {
return current - input;
}
while (true) {
switch (parse_state_) {
case BalsaFrameEnums::READING_CHUNK_LENGTH:
// In this state we read the chunk length.
// Note that once we hit a character which is not in:
// [0-9;A-Fa-f\n], we transition to a different state.
//
QUICHE_DCHECK_LE(current, end);
while (true) {
if (current == end) {
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
return current - input;
}
const char c = *current;
++current;
static const signed char kBad = -1;
static const signed char kDelimiter = -2;
// valid cases:
// "09123\n" // -> 09123
// "09123\r\n" // -> 09123
// "09123 \n" // -> 09123
// "09123 \r\n" // -> 09123
// "09123 12312\n" // -> 09123
// "09123 12312\r\n" // -> 09123
// "09123; foo=bar\n" // -> 09123
// "09123; foo=bar\r\n" // -> 09123
// "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF
// "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF
// invalid cases:
// "[ \t]+[^\n]*\n"
// "FFFFFFFFFFFFFFFFF\r\n" (would overflow)
// "\r\n"
// "\n"
signed char addition = kBad;
// clang-format off
switch (c) {
case '0': addition = 0; break;
case '1': addition = 1; break;
case '2': addition = 2; break;
case '3': addition = 3; break;
case '4': addition = 4; break;
case '5': addition = 5; break;
case '6': addition = 6; break;
case '7': addition = 7; break;
case '8': addition = 8; break;
case '9': addition = 9; break;
case 'a': addition = 0xA; break;
case 'b': addition = 0xB; break;
case 'c': addition = 0xC; break;
case 'd': addition = 0xD; break;
case 'e': addition = 0xE; break;
case 'f': addition = 0xF; break;
case 'A': addition = 0xA; break;
case 'B': addition = 0xB; break;
case 'C': addition = 0xC; break;
case 'D': addition = 0xD; break;
case 'E': addition = 0xE; break;
case 'F': addition = 0xF; break;
case '\t':
case '\n':
case '\r':
case ' ':
case ';':
addition = kDelimiter;
break;
default:
// Leave addition == kBad
break;
}
// clang-format on
if (addition >= 0) {
chunk_length_character_extracted_ = true;
size_t length_x_16 = chunk_length_remaining_ * 16;
const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
if ((chunk_length_remaining_ > kMaxDiv16) ||
(std::numeric_limits<size_t>::max() - length_x_16) <
static_cast<size_t>(addition)) {
// overflow -- asked for a chunk-length greater than 2^64 - 1!!
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
HandleError(BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW);
return current - input;
}
chunk_length_remaining_ = length_x_16 + addition;
continue;
}
if (!chunk_length_character_extracted_ || addition == kBad) {
// ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
// characters were converted, or an unexpected character was
// seen.
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
HandleError(BalsaFrameEnums::INVALID_CHUNK_LENGTH);
return current - input;
}
break;
}
--current;
parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
visitor_->OnChunkLength(chunk_length_remaining_);
continue;
case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
// TODO(phython): Convert this scanning to be 16 bytes at a time if
// there is data to be read.
const char* extensions_start = current;
size_t extensions_length = 0;
QUICHE_DCHECK_LE(current, end);
while (true) {
if (current == end) {
visitor_->OnChunkExtensionInput(
absl::string_view(extensions_start, extensions_length));
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
return current - input;
}
const char c = *current;
if (c == '\r' || c == '\n') {
extensions_length = (extensions_start == current)
? 0
: current - extensions_start - 1;
}
++current;
if (c == '\n') {
break;
}
}
chunk_length_character_extracted_ = false;
visitor_->OnChunkExtensionInput(
absl::string_view(extensions_start, extensions_length));
if (chunk_length_remaining_ != 0) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
continue;
}
HeaderFramingFound('\n');
parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
continue;
}
case BalsaFrameEnums::READING_CHUNK_DATA:
while (current < end) {
if (chunk_length_remaining_ == 0) {
break;
}
// read in the chunk
size_t bytes_remaining = end - current;
size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining)
? chunk_length_remaining_
: bytes_remaining;
const char* tmp_current = current + consumed_bytes;
visitor_->OnRawBodyInput(
absl::string_view(on_entry, tmp_current - on_entry));
visitor_->OnBodyChunkInput(
absl::string_view(current, consumed_bytes));
on_entry = current = tmp_current;
chunk_length_remaining_ -= consumed_bytes;
}
if (chunk_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
continue;
}
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
return current - input;
case BalsaFrameEnums::READING_CHUNK_TERM:
QUICHE_DCHECK_LE(current, end);
while (true) {
if (current == end) {
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
return current - input;
}
const char c = *current;
++current;
if (c == '\n') {
break;
}
}
parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
continue;
case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
QUICHE_DCHECK_LE(current, end);
while (true) {
if (current == end) {
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
return current - input;
}
const char c = *current;
if (HeaderFramingFound(c) != 0) {
// If we've found a "\r\n\r\n", then the message
// is done.
++current;
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
visitor_->MessageDone();
return current - input;
}
// If not, however, since the spec only suggests that the
// client SHOULD indicate the presence of trailers, we get to
// *test* that they did or didn't.
// If all of the bytes we've seen since:
// OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
// are either '\r', or '\n', then we can assume that we don't yet
// know if we need to parse headers, or if the next byte will make
// the HeaderFramingFound condition (above) true.
if (!HeaderFramingMayBeFound()) {
break;
}
// If HeaderFramingMayBeFound(), then we have seen only characters
// '\r' or '\n'.
++current;
// Lets try again! There is no state change here.
}
// If (!HeaderFramingMayBeFound()), then we know that we must be
// reading the first non CRLF character of a trailer.
parse_state_ = BalsaFrameEnums::READING_TRAILER;
visitor_->OnRawBodyInput(
absl::string_view(on_entry, current - on_entry));
on_entry = current;
continue;
// TODO(yongfa): No leading whitespace is allowed before field-name per
// RFC2616. Leading whitespace will cause header parsing error too.
case BalsaFrameEnums::READING_TRAILER:
while (current < end) {
const char c = *current;
++current;
++trailer_length_;
if (trailer_ != nullptr) {
// Reuse the header length limit for trailer, which is just a bunch
// of headers.
if (trailer_length_ > max_header_length_) {
--current;
HandleError(BalsaFrameEnums::TRAILER_TOO_LONG);
return current - input;
}
if (LineFramingFound(c)) {
trailer_lines_.push_back(
std::make_pair(start_of_trailer_line_, trailer_length_));
start_of_trailer_line_ = trailer_length_;
}
}
if (HeaderFramingFound(c) != 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
if (trailer_ != nullptr) {
trailer_->WriteFromFramer(on_entry, current - on_entry);
trailer_->DoneWritingFromFramer();
ProcessHeaderLines(trailer_lines_, true /*is_trailer*/, trailer_);
if (parse_state_ == BalsaFrameEnums::ERROR) {
return current - input;
}
visitor_->ProcessTrailers(*trailer_);
}
visitor_->OnTrailerInput(
absl::string_view(on_entry, current - on_entry));
visitor_->MessageDone();
return current - input;
}
}
if (trailer_ != nullptr) {
trailer_->WriteFromFramer(on_entry, current - on_entry);
}
visitor_->OnTrailerInput(
absl::string_view(on_entry, current - on_entry));
return current - input;
case BalsaFrameEnums::READING_UNTIL_CLOSE: {
const size_t bytes_remaining = end - current;
if (bytes_remaining > 0) {
visitor_->OnRawBodyInput(absl::string_view(current, bytes_remaining));
visitor_->OnBodyChunkInput(
absl::string_view(current, bytes_remaining));
current += bytes_remaining;
}
return current - input;
}
case BalsaFrameEnums::READING_CONTENT:
while ((content_length_remaining_ != 0u) && current < end) {
// read in the content
const size_t bytes_remaining = end - current;
const size_t consumed_bytes =
(content_length_remaining_ < bytes_remaining)
? content_length_remaining_
: bytes_remaining;
visitor_->OnRawBodyInput(absl::string_view(current, consumed_bytes));
visitor_->OnBodyChunkInput(
absl::string_view(current, consumed_bytes));
current += consumed_bytes;
content_length_remaining_ -= consumed_bytes;
}
if (content_length_remaining_ == 0) {
parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
visitor_->MessageDone();
}
return current - input;
default:
// The state-machine should never be in a state that isn't handled
// above. This is a glaring logic error, and we should do something
// drastic to ensure that this gets looked-at and fixed.
QUICHE_LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE
<< " memory corruption?!"; // COV_NF_LINE
}
}
}
const int32_t BalsaFrame::kValidTerm1;
const int32_t BalsaFrame::kValidTerm1Mask;
const int32_t BalsaFrame::kValidTerm2;
const int32_t BalsaFrame::kValidTerm2Mask;
} // namespace quiche
#undef CHAR_LT
#undef CHAR_LE
#undef CHAR_GT
#undef CHAR_GE
#undef QUICHE_DCHECK_CHAR_GE