Add HttpValidationPolicy controlling semicolon delimitation of chunk-exts

Balsa currently accepts requests where `chunk-size` contains illegal whitespace (SP or HTAB). For example, Balsa parses an `1 A` chunk-size as `1` and the chunk extension as ` A`. This is slightly incorrect.

There are two primary cases to consider:

1. `chunk-size` followed by `chunk-ext` + `CRLF` framing
2. `chunk-size` followed only by `CRLF` framing

For `chunk-size` without a `chunk-ext`, any trailing `SP/HTAB` is always illegal.

For `chunk-size` with a `chunk-ext`, there actually is a case where it’s valid to have whitespace after `chunk-size`. If that `chunk-ext` begins with `BWS`, it is perfectly legal to have `SP` or `HTAB` following the `chunk-size`. However, that is legal if and only if, the `BWS` is part of a `chunk-ext` and not superfluous before a `CRLF`.

Unfortunately, Balsa does not have any enforcement that `chunk-ext` contain a `;` which makes it difficult to differentiate between the `SP` / `HTAB` being `BWS` in a chunk-ext or erroneous trailing whitespace so we add enforcement that the `chunk-ext` contains a `;` and, if it is present, it is only preceded by `SP` or `HTAB`.

Protected by unused http validation policy.

PiperOrigin-RevId: 874812254
diff --git a/quiche/balsa/balsa_frame.cc b/quiche/balsa/balsa_frame.cc
index 65f63dc..2d08b8d 100644
--- a/quiche/balsa/balsa_frame.cc
+++ b/quiche/balsa/balsa_frame.cc
@@ -1237,10 +1237,10 @@
         continue;
 
       case BalsaFrameEnums::READING_CHUNK_EXTENSION: {
-        // TODO(phython): Convert this scanning to be 16 bytes at a time if
-        // there is data to be read.
         const char* extensions_start = current;
         size_t extensions_length = 0;
+        bool found_semicolon = false;
+        bool found_non_bws_before_semicolon = false;
         QUICHE_DCHECK_LE(current, end);
         while (true) {
           if (current == end) {
@@ -1251,6 +1251,12 @@
             return current - input;
           }
           const char c = *current;
+          if (c == ';') {
+            found_semicolon = true;
+          }
+          if (!found_semicolon && (c != ' ' && c != '\t')) {
+            found_non_bws_before_semicolon = true;
+          }
           if (!IsValidChunkExtensionCharacter(c, current, input, end)) {
             HandleError(BalsaFrameEnums::INVALID_CHUNK_EXTENSION);
             return current - input;
@@ -1270,6 +1276,14 @@
           }
         }
 
+        if (http_validation_policy_
+                .require_semicolon_delimited_chunk_extension &&
+            extensions_length > 0 &&
+            (!found_semicolon || found_non_bws_before_semicolon)) {
+          HandleError(BalsaFrameEnums::INVALID_CHUNK_EXTENSION);
+          return current - input;
+        }
+
         chunk_length_character_extracted_ = false;
         visitor_->OnChunkExtensionInput(
             absl::string_view(extensions_start, extensions_length));
diff --git a/quiche/balsa/balsa_frame_test.cc b/quiche/balsa/balsa_frame_test.cc
index 3bf7873..8967716 100644
--- a/quiche/balsa/balsa_frame_test.cc
+++ b/quiche/balsa/balsa_frame_test.cc
@@ -24,6 +24,7 @@
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_replace.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/span.h"
 #include "quiche/balsa/balsa_enums.h"
 #include "quiche/balsa/balsa_headers.h"
 #include "quiche/balsa/balsa_visitor_interface.h"
@@ -47,6 +48,7 @@
 using ::testing::Pointee;
 using ::testing::Property;
 using ::testing::Range;
+using ::testing::Sequence;
 using ::testing::StrEq;
 using ::testing::StrictMock;
 
@@ -5101,6 +5103,207 @@
   EXPECT_EQ(BalsaFrameEnums::BALSA_NO_ERROR, balsa_frame_.ErrorCode());
 }
 
+TEST_F(HTTPBalsaFrameTest, MoreChunkExtensions) {
+  struct TestCase {
+    absl::string_view chunks;  // chunks to process.
+    absl::Span<const size_t> expected_chunk_sizes;
+    absl::Span<const absl::string_view> expected_chunk_extensions;
+    HttpValidationPolicy policy;
+    BalsaFrameEnums::ErrorCode expected_error;
+  };
+
+  HttpValidationPolicy strict_chunks_policy;
+  strict_chunks_policy.disallow_stray_data_after_chunk = true;
+  strict_chunks_policy.disallow_lone_lf_in_chunk_extension = true;
+  strict_chunks_policy.disallow_lone_cr_in_chunk_extension = true;
+  strict_chunks_policy.require_chunked_body_end_with_crlf_crlf = true;
+
+  HttpValidationPolicy strict_chunk_and_ext_validation;
+  strict_chunk_and_ext_validation.disallow_stray_data_after_chunk = true;
+  strict_chunk_and_ext_validation.disallow_lone_lf_in_chunk_extension = true;
+  strict_chunk_and_ext_validation.disallow_lone_cr_in_chunk_extension = true;
+  strict_chunk_and_ext_validation.require_chunked_body_end_with_crlf_crlf =
+      true;
+  strict_chunk_and_ext_validation.require_semicolon_delimited_chunk_extension =
+      true;
+
+  std::vector<TestCase> cases = {
+      // no body, just the last-chunk
+      {"0\r\n"
+       "\r\n",
+       {0},
+       {""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // no body, just last chunk with valid extension
+      {"0;chunked_extension=\"foobar\"quote\"\"\r\n"
+       "\r\n",
+       {0},
+       {";chunked_extension=\"foobar\"quote\"\""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Two valid chunks, last chunk has non-delimited extension
+      {"1;ext1\r\n"
+       "A\r\n"
+       "1;ext2\r\n"
+       "B\r\n"
+       "0 ext3\r\n"  // non-semicolon delimited extension
+       "\r\n",
+       {1, 1, 0},
+       {";ext1", ";ext2"},  // last-chunks's extension is rejected
+       strict_chunk_and_ext_validation,
+       BalsaFrameEnums::INVALID_CHUNK_EXTENSION},
+
+      // Two valid chunks, last chunk has non-delimited extension
+      {"1;ext1\r\n"
+       "A\r\n"
+       "1;ext2\r\n"
+       "B\r\n"
+       "0 ext3\r\n"  // non-semicolon delimited extension
+       "\r\n",
+       {1, 1, 0},
+       {";ext1", ";ext2", " ext3"},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Trailing WS before semicolon
+      {"1 \t;ext\r\n"  // BWS before semicolon is valid
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1, 0},
+       {" \t;ext", ""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Non BWS before semicolon
+      {"1 \tnon-bws;ext\r\n"
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1, 0},
+       {" \tnon-bws;ext", ""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Non BWS before semicolon
+      {"1 \tnon-bws;ext\r\n"
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1},
+       {},
+       strict_chunk_and_ext_validation,
+       BalsaFrameEnums::INVALID_CHUNK_EXTENSION},
+
+      // BWS with no extension
+      {"1 \t \r\n"
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1},
+       {},
+       strict_chunk_and_ext_validation,
+       BalsaFrameEnums::INVALID_CHUNK_EXTENSION},
+
+      // BWS with no extension is invalid
+      {"1 \t \r\n"
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1, 0},
+       {" \t ", ""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Trailing BWS after semicolon
+      {"1; \t ext\r\n"
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1, 0},
+       {"; \t ext", ""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Trailing illegal characters before semicolon
+      {"1ERROR;ext\r\n"  // not valid hex
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {},
+       {},
+       strict_chunks_policy,
+       BalsaFrameEnums::INVALID_CHUNK_LENGTH},
+
+      // Extension with semicolon & CRLF
+      {"1;ext=\";foo\"\r\n"
+       "Q\r\n"
+       "0\r\n"
+       "\r\n",
+       {1, 0},
+       {";ext=\";foo\"", ""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+
+      // Bare semicolon
+      {"A;\r\n"
+       "0123456789\r\n"
+       "0\r\n"
+       "\r\n",
+       {10, 0},
+       {";", ""},
+       strict_chunks_policy,
+       BalsaFrameEnums::BALSA_NO_ERROR},
+  };
+
+  for (const TestCase& test : cases) {
+    SCOPED_TRACE(absl::StrCat("Testing chunks: ", absl::CEscape(test.chunks)));
+    NiceMock<BalsaVisitorMock> visitor_mock;
+    BalsaFrame balsa_frame;
+    BalsaHeaders headers;
+    balsa_frame.set_is_request(true);
+    balsa_frame.set_balsa_headers(&headers);
+    balsa_frame.set_http_validation_policy(test.policy);
+    balsa_frame.set_balsa_visitor(&visitor_mock);
+
+    std::string message_headers =
+        "POST / HTTP/1.1\r\n"
+        "Transfer-Encoding:  chunked\r\n"
+        "\r\n";
+
+    ASSERT_EQ(balsa_frame.ProcessInput(message_headers.data(),
+                                       message_headers.size()),
+              message_headers.size());
+    ASSERT_FALSE(balsa_frame.Error());
+
+    Sequence size_sequence, extension_sequence;
+    for (size_t chunk_size : test.expected_chunk_sizes) {
+      EXPECT_CALL(visitor_mock, OnChunkLength(chunk_size))
+          .InSequence(size_sequence);
+    }
+
+    for (absl::string_view extension : test.expected_chunk_extensions) {
+      EXPECT_CALL(visitor_mock, OnChunkExtensionInput(extension))
+          .InSequence(extension_sequence);
+    }
+
+    size_t bytes_consumed =
+        balsa_frame.ProcessInput(test.chunks.data(), test.chunks.size());
+    EXPECT_EQ(balsa_frame.ErrorCode(), test.expected_error);
+
+    if (test.expected_error == BalsaFrameEnums::BALSA_NO_ERROR) {
+      EXPECT_EQ(bytes_consumed, test.chunks.size());
+      EXPECT_TRUE(balsa_frame.MessageFullyRead());
+      EXPECT_FALSE(balsa_frame.Error());
+    }
+
+    Mock::VerifyAndClearExpectations(&visitor_mock);
+  }
+}
+
 TEST_F(HTTPBalsaFrameTest, MostRestrictiveTest) {
   BalsaFrame balsa_frame;
   BalsaHeaders headers;
diff --git a/quiche/balsa/balsa_fuzz_util.cc b/quiche/balsa/balsa_fuzz_util.cc
index fc4ed71..58b15fa 100644
--- a/quiche/balsa/balsa_fuzz_util.cc
+++ b/quiche/balsa/balsa_fuzz_util.cc
@@ -23,7 +23,7 @@
       fuzztest::Arbitrary<bool>(), fuzztest::Arbitrary<bool>(),
       fuzztest::Arbitrary<bool>(), ArbitraryFirstLineValidationOption(),
       fuzztest::Arbitrary<bool>(), fuzztest::Arbitrary<bool>(),
-      fuzztest::Arbitrary<bool>());
+      fuzztest::Arbitrary<bool>(), fuzztest::Arbitrary<bool>());
 }
 
 fuzztest::Domain<HttpValidationPolicy::FirstLineValidationOption>
diff --git a/quiche/balsa/http_validation_policy.h b/quiche/balsa/http_validation_policy.h
index a197071..7df7b7d 100644
--- a/quiche/balsa/http_validation_policy.h
+++ b/quiche/balsa/http_validation_policy.h
@@ -116,6 +116,14 @@
   // https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2
   bool disallow_invalid_request_methods = false;
 
+  // Chunk extensions are optional but, if they are present, they MUST be
+  // preceded by a semicolon and follow the grammar:
+  // chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+  // where BWS is SP or HTAB. See
+  // https://datatracker.ietf.org/doc/html/rfc9112#name-chunk-extensions for
+  // more information
+  bool require_semicolon_delimited_chunk_extension = false;
+
   template <typename Sink>
   friend void AbslStringify(Sink& sink, FirstLineValidationOption option) {
     switch (option) {
@@ -153,7 +161,8 @@
                  "sanitize_firstline_spaces=%v, "
                  "sanitize_obs_fold_in_header_values=%v, "
                  "disallow_stray_data_after_chunk=%v, "
-                 "disallow_invalid_request_methods=%v}",
+                 "disallow_invalid_request_methods=%v, "
+                 "require_semicolon_delimited_chunk_extension=%v}",
                  policy.disallow_header_continuation_lines,
                  policy.require_header_colon,
                  policy.disallow_multiple_content_length,
@@ -172,7 +181,8 @@
                  policy.sanitize_firstline_spaces,
                  policy.sanitize_obs_fold_in_header_values,
                  policy.disallow_stray_data_after_chunk,
-                 policy.disallow_invalid_request_methods);
+                 policy.disallow_invalid_request_methods,
+                 policy.require_semicolon_delimited_chunk_extension);
   }
 };
 
@@ -197,7 +207,8 @@
         quiche::HttpValidationPolicy::FirstLineValidationOption::SANITIZE,
     .sanitize_obs_fold_in_header_values = true,
     .disallow_stray_data_after_chunk = true,
-    .disallow_invalid_request_methods = true};
+    .disallow_invalid_request_methods = true,
+    .require_semicolon_delimited_chunk_extension = true};
 
 }  // namespace quiche