Add helper methods to check a string's RFC 9110, 5.6.2 compliance https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2 shows the grammar for a `token` and gives an _inclusive_ listing of characters. Balsa does not currently have a helper which exactly matches this listing of characters. The closest thing we have is the _exclusive_ list `kInvalidHeaderKeyCharList` which _almost_ matches this inclusive list with the exception of `:` for the set of US-ASCII characters. We add the helper here for use by future changes. Protected by adding helper method for future CL. PiperOrigin-RevId: 826236709
diff --git a/quiche/balsa/header_properties.cc b/quiche/balsa/header_properties.cc index b0e7891..03ebe56 100644 --- a/quiche/balsa/header_properties.cc +++ b/quiche/balsa/header_properties.cc
@@ -11,6 +11,16 @@ namespace { +// The set of characters allowed in HTTP `token`s. See +// https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2 +inline constexpr unsigned char kValidTokenCharList[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '#', '$', + '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~'}; + using MultivaluedHeadersSet = absl::flat_hash_set<absl::string_view, StringPieceCaseHash, StringPieceCaseEqual>; @@ -128,6 +138,14 @@ return invalidCharTable; } +constexpr std::array<bool, 256> buildValidTokenCharLookupTable() { + std::array<bool, 256> validTokenCharTable{}; + for (uint8_t c : kValidTokenCharList) { + validTokenCharTable[c] = true; + } + return validTokenCharTable; +} + } // anonymous namespace bool IsMultivaluedHeader(absl::string_view header) { @@ -157,6 +175,23 @@ return invalidCharTable[c]; } +bool IsValidTokenChar(uint8_t c) { + static constexpr std::array<bool, 256> validTokenCharTable = + buildValidTokenCharLookupTable(); + return validTokenCharTable[c]; +} + +bool IsValidToken(absl::string_view value) { + if (value.empty()) { + return false; + } + for (const char c : value) { + if (!IsValidTokenChar(static_cast<uint8_t>(c))) { + return false; + } + } + return true; +} bool HasInvalidHeaderChars(absl::string_view value) { for (const char c : value) { if (IsInvalidHeaderChar(c)) {
diff --git a/quiche/balsa/header_properties.h b/quiche/balsa/header_properties.h index 79588ea..a540e79 100644 --- a/quiche/balsa/header_properties.h +++ b/quiche/balsa/header_properties.h
@@ -73,6 +73,15 @@ QUICHE_EXPORT bool IsInvalidHeaderChar(uint8_t c); QUICHE_EXPORT bool HasInvalidHeaderChars(absl::string_view value); +// Returns true if the given `char` is in the set of ASCII characters valid for +// `token`s specified in +// https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2. Many lexical +// entries in the grammar are tokens: method, parameter-names, protocol names, +// content-codings etc. Note that this differs from `IsInvalidHeaderKeyChar` +// which considers `:` to be a valid character for header keys. +QUICHE_EXPORT bool IsValidTokenChar(uint8_t c); +QUICHE_EXPORT bool IsValidToken(absl::string_view value); + // Returns true if `value` contains a character not allowed in the path // component of a URI. QUICHE_EXPORT bool HasInvalidPathChar(absl::string_view value);
diff --git a/quiche/balsa/header_properties_test.cc b/quiche/balsa/header_properties_test.cc index 980c419..ad983f1 100644 --- a/quiche/balsa/header_properties_test.cc +++ b/quiche/balsa/header_properties_test.cc
@@ -1,5 +1,9 @@ #include "quiche/balsa/header_properties.h" +#include <string> + +#include "absl/container/flat_hash_set.h" +#include "absl/strings/string_view.h" #include "quiche/common/platform/api/quiche_test.h" namespace quiche::header_properties::test { @@ -140,5 +144,60 @@ EXPECT_TRUE(HasInvalidQueryChar("query_with_angle<brackets>also_bad")); } +TEST(HeaderPropertiesTest, IsValidTokenVsHasInvalidHeaderChars) { + absl::flat_hash_set<unsigned char> mismatch = {':'}; + for (int c = 0; c < 128; ++c) { + if (mismatch.contains(c)) { + continue; + } + + unsigned char u_c = static_cast<unsigned char>(c); + std::string s(1, u_c); + EXPECT_EQ(IsValidToken(s), !IsInvalidHeaderKeyChar(u_c)) + << "char: [" << u_c << "], int = [" << c << "]"; + } +} + +TEST(HeaderPropertiesTest, IsValidTokenEmptyAndMultiChar) { + EXPECT_TRUE(IsValidToken("a")); + EXPECT_TRUE(IsValidToken("GET")); + EXPECT_TRUE(IsValidToken("GET'")); + EXPECT_TRUE(IsValidToken("a-b-c")); + EXPECT_TRUE(IsValidToken("!#$%&'*+-.^_`|~")); + EXPECT_TRUE(IsValidToken("abcefghijklmnopqrstuvwxyz0123456789")); + EXPECT_TRUE( + IsValidToken("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789" + "!#$%&'*+-.^_`|~")); + + EXPECT_FALSE(IsValidToken("G ET")); + EXPECT_FALSE(IsValidToken("G,ET")); + EXPECT_FALSE(IsValidToken("G\tET")); + EXPECT_FALSE(IsValidToken(absl::string_view("G\0ET", 3))); + EXPECT_FALSE(IsValidToken("GET\"")); + EXPECT_FALSE(IsValidToken("GET\x85")); + EXPECT_FALSE(IsValidToken("GET(")); + EXPECT_FALSE(IsValidToken("GET)")); + EXPECT_FALSE(IsValidToken("GET{")); + EXPECT_FALSE(IsValidToken("GET}")); + EXPECT_FALSE(IsValidToken("GET}")); + EXPECT_FALSE(IsValidToken("GET@")); + EXPECT_FALSE(IsValidToken("GET[")); + EXPECT_FALSE(IsValidToken("GET\\")); + EXPECT_FALSE(IsValidToken("GET]")); + EXPECT_FALSE(IsValidToken("GET:")); + EXPECT_FALSE(IsValidToken("GET;")); + EXPECT_FALSE(IsValidToken("GET?")); + EXPECT_FALSE(IsValidToken("GET=")); + EXPECT_FALSE(IsValidToken("GET/")); + EXPECT_FALSE(IsValidToken("GET\"")); + EXPECT_FALSE(IsValidToken("GET<")); + EXPECT_FALSE(IsValidToken("GET>")); + EXPECT_FALSE(IsValidToken("GET,")); + EXPECT_FALSE(IsValidToken("GET\x7F")); + EXPECT_FALSE(IsValidToken("")); +} + } // namespace } // namespace quiche::header_properties::test