Introduce a BalsaHeaders API to fold continuation lines, and call it from BalsaFrame.

Protected by HTTP validation option sanitize_obs_fold_in_header_values.

PiperOrigin-RevId: 808694720
diff --git a/quiche/balsa/balsa_frame.cc b/quiche/balsa/balsa_frame.cc
index f1a80e5..d301ce1 100644
--- a/quiche/balsa/balsa_frame.cc
+++ b/quiche/balsa/balsa_frame.cc
@@ -458,7 +458,8 @@
 
 bool BalsaFrame::FindColonsAndParseIntoKeyValue(const Lines& lines,
                                                 bool is_trailer,
-                                                BalsaHeaders* headers) {
+                                                BalsaHeaders* headers,
+                                                bool* has_continuation_lines) {
   QUICHE_DCHECK(!lines.empty());
   const char* stream_begin = headers->OriginalHeaderStreamBegin();
   // The last line is always just a newline (and is uninteresting).
@@ -478,9 +479,9 @@
     const char* line_begin = stream_begin + lines[i].first;
 
     // Here we handle possible continuations.  Note that we do not replace
-    // the '\n' in the line before a continuation (at least, as of now),
-    // which implies that any code which looks for a value must deal with
-    // "\r\n", etc -within- the line (and not just at the end of it).
+    // the '\n' in the line before a continuation, but we do mark the line as
+    // having continuation lines so that the caller can deal with it later.
+    bool header_has_continuation_line = false;
     for (++i; i < lines_size_m1; ++i) {
       const char c = *(stream_begin + lines[i].first);
       if (CHAR_GT(c, ' ')) {
@@ -504,9 +505,10 @@
       // continuation) and continuation is allowed.
       HandleWarning(is_trailer ? BalsaFrameEnums::OBS_FOLD_IN_TRAILERS
                                : BalsaFrameEnums::OBS_FOLD_IN_HEADERS);
-
-      // If disallow_header_continuation_lines() is false, we neither reject nor
-      // normalize continuation lines, in violation of RFC7230.
+      if (http_validation_policy().sanitize_obs_fold_in_header_values) {
+        *has_continuation_lines = true;
+        header_has_continuation_line = true;
+      }
     }
     const char* line_end = stream_begin + lines[i - 1].second;
     QUICHE_DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
@@ -532,6 +534,8 @@
     headers->header_lines_.push_back(HeaderLineDescription(
         line_begin - stream_begin, line_end - stream_begin,
         line_end - stream_begin, line_end - stream_begin, 0));
+    headers->header_lines_.back().has_continuation_line =
+        header_has_continuation_line;
     if (current >= line_end) {
       if (http_validation_policy().require_header_colon) {
         HandleError(is_trailer ? BalsaFrameEnums::TRAILER_MISSING_COLON
@@ -738,10 +742,16 @@
   HeaderLines::size_type content_length_idx = 0;
   HeaderLines::size_type transfer_encoding_idx = 0;
   const char* stream_begin = headers->OriginalHeaderStreamBegin();
+  bool has_continuation_lines = false;
   // Parse the rest of the header or trailer data into key-value pairs.
-  if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers)) {
+  if (!FindColonsAndParseIntoKeyValue(lines, is_trailer, headers,
+                                      &has_continuation_lines)) {
     return;
   }
+  if (http_validation_policy().sanitize_obs_fold_in_header_values &&
+      has_continuation_lines) {
+    headers->FoldContinuationLines();
+  }
   // At this point, we've parsed all of the headers/trailers.  Time to look
   // for those headers which we require for framing or for format errors.
   const HeaderLines::size_type lines_size = headers->header_lines_.size();
@@ -954,11 +964,14 @@
           return message_current - original_message_start;
         }
       }
+      // Determine if this is the end of the headers.
       const size_t chars_since_last_slash_n =
           (message_current_idx - last_slash_n_idx_);
       last_slash_n_idx_ = message_current_idx;
       if (chars_since_last_slash_n > 2) {
-        // false positive.
+        // Optimization: if the line is longer than 2 characters, it must
+        // contain content and cannot be the terminating blank line. The loop
+        // continues to search for the next line.
         ++message_current;
         continue;
       }
diff --git a/quiche/balsa/balsa_frame.h b/quiche/balsa/balsa_frame.h
index 4902ab8..c61bcb3 100644
--- a/quiche/balsa/balsa_frame.h
+++ b/quiche/balsa/balsa_frame.h
@@ -259,7 +259,8 @@
 
   // Calls HandleError() and returns false on error.
   bool FindColonsAndParseIntoKeyValue(const Lines& lines, bool is_trailer,
-                                      BalsaHeaders* headers);
+                                      BalsaHeaders* headers,
+                                      bool* has_continuation_lines);
 
   void HandleError(BalsaFrameEnums::ErrorCode error_code);
   void HandleWarning(BalsaFrameEnums::ErrorCode error_code);
diff --git a/quiche/balsa/balsa_frame_test.cc b/quiche/balsa/balsa_frame_test.cc
index 9a958d4..a0dfcac 100644
--- a/quiche/balsa/balsa_frame_test.cc
+++ b/quiche/balsa/balsa_frame_test.cc
@@ -68,7 +68,9 @@
                                              const BalsaFrame::Lines& lines,
                                              bool is_trailer,
                                              BalsaHeaders* headers) {
-    balsa_frame->FindColonsAndParseIntoKeyValue(lines, is_trailer, headers);
+    bool has_continuation_lines = false;
+    balsa_frame->FindColonsAndParseIntoKeyValue(lines, is_trailer, headers,
+                                                &has_continuation_lines);
   }
 };
 
@@ -4891,6 +4893,45 @@
   EXPECT_EQ(BalsaFrameEnums::INVALID_HEADER_FORMAT, balsa_frame_.ErrorCode());
 }
 
+// Tests that continuation lines are sanitized according to RFC7230 Section
+// 3.2.4 when the sanitize_obs_fold_in_header_values policy is enabled.
+TEST_F(HTTPBalsaFrameTest, ContinuationLinesSanitized) {
+  HttpValidationPolicy http_validation_policy;
+  http_validation_policy.sanitize_obs_fold_in_header_values = true;
+  balsa_frame_.set_http_validation_policy(http_validation_policy);
+
+  const std::string message =
+      "GET / HTTP/1.1\r\n"
+      "key1: obs-\n fold\r\n"
+      "key2: obs-\r\n fold\r\n"
+      "key3: obs-\n\tfold\r\n"
+      "key4: obs-\r\n\tfold\r\n"
+      "key5: obs-\n   fold\r\n"
+      "key6: obs-\r\n   fold\r\n"
+      "key7: obs-\n \tfold\r\n"
+      "key8: obs-\r\n \tfold\r\n"
+      "\r\n";
+
+  FakeHeaders fake_headers;
+  // The number of spaces in the header value is not important, but is equal to
+  // the number of whitespace characters in the original header value.
+  fake_headers.AddKeyValue("key1", "obs-  fold");
+  fake_headers.AddKeyValue("key2", "obs-   fold");
+  fake_headers.AddKeyValue("key3", "obs-  fold");
+  fake_headers.AddKeyValue("key4", "obs-   fold");
+  fake_headers.AddKeyValue("key5", "obs-    fold");
+  fake_headers.AddKeyValue("key6", "obs-     fold");
+  fake_headers.AddKeyValue("key7", "obs-   fold");
+  fake_headers.AddKeyValue("key8", "obs-    fold");
+  EXPECT_CALL(visitor_mock_, ProcessHeaders(fake_headers));
+  EXPECT_CALL(visitor_mock_,
+              HandleWarning(BalsaFrameEnums::OBS_FOLD_IN_HEADERS))
+      .Times(8);
+
+  EXPECT_EQ(message.size(),
+            balsa_frame_.ProcessInput(message.data(), message.size()));
+}
+
 TEST_F(HTTPBalsaFrameTest, NullAtBeginningOrEndOfValue) {
   balsa_frame_.set_invalid_chars_level(BalsaFrame::InvalidCharsLevel::kError);
 
diff --git a/quiche/balsa/balsa_headers.cc b/quiche/balsa/balsa_headers.cc
index 5f67152..ae4ce40 100644
--- a/quiche/balsa/balsa_headers.cc
+++ b/quiche/balsa/balsa_headers.cc
@@ -913,6 +913,43 @@
   return true;
 }
 
+// Folds header lines that are marked as having continuation lines (and then
+// unmarks them as having continuation lines). We can assume that any \r\n and
+// any \n is being continued since it wouldn't show up as one header line
+// otherwise. We therefore need to replace any \r\n and any \n with spaces, and
+// any subsequent spaces or tabs with spaces.
+//
+// See: https://tools.ietf.org/html/rfc7230#section-3.2.4
+void BalsaHeaders::FoldContinuationLines() {
+  const int header_lines_size = header_lines_.size();
+  for (int i = 0; i < header_lines_size; ++i) {
+    HeaderLineDescription& desc = header_lines_[i];
+    if (!desc.skip && desc.has_continuation_line) {
+      bool processing_continuation = false;
+      const char* begin = GetPtr(desc.buffer_base_idx) + desc.value_begin_idx;
+      const char* end = GetPtr(desc.buffer_base_idx) + desc.last_char_idx;
+      for (char* c = const_cast<char*>(begin); c < end; ++c) {
+        if (processing_continuation && (*c == '\t' || *c == ' ')) {
+          *c = ' ';
+          continue;
+        }
+        if (*c == '\n') {
+          // We can safely assume the next character is a space or a tab because
+          // we wouldn't have included \n in the header line otherwise.
+          *c = ' ';
+          if (c != begin && *(c - 1) == '\r') {
+            *(c - 1) = ' ';
+          }
+          processing_continuation = true;
+          continue;
+        }
+        processing_continuation = false;
+      }
+      desc.has_continuation_line = false;
+    }
+  }
+}
+
 void BalsaHeaders::DumpToPrefixedString(const char* spaces,
                                         std::string* str) const {
   const absl::string_view firstline = first_line();
diff --git a/quiche/balsa/balsa_headers.h b/quiche/balsa/balsa_headers.h
index 7b42e3e..5c40458 100644
--- a/quiche/balsa/balsa_headers.h
+++ b/quiche/balsa/balsa_headers.h
@@ -404,6 +404,7 @@
     size_t last_char_idx;
     BalsaBuffer::Blocks::size_type buffer_base_idx;
     bool skip;
+    bool has_continuation_line = false;
   };
 
   using HeaderTokenList = std::vector<absl::string_view>;
@@ -846,6 +847,8 @@
                                       const absl::string_view value)>
           fn) const override;
 
+  void FoldContinuationLines();
+
   void DumpToPrefixedString(const char* spaces, std::string* str) const;
 
   absl::string_view first_line() const {
diff --git a/quiche/balsa/http_validation_policy.h b/quiche/balsa/http_validation_policy.h
index a29373f..829a994 100644
--- a/quiche/balsa/http_validation_policy.h
+++ b/quiche/balsa/http_validation_policy.h
@@ -97,6 +97,10 @@
   // containing multiple consecutive spaces will be rejected.
   FirstLineValidationOption sanitize_firstline_spaces =
       FirstLineValidationOption::NONE;
+
+  // If true, the parser will replace obs-fold in header field values with one
+  // or more space characters.
+  bool sanitize_obs_fold_in_header_values = false;
 };
 
 }  // namespace quiche