Add helper methods to check a string's RFC 9110, 5.6.2 compliance
https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2 shows the grammar for a `token` and gives an _inclusive_ listing of characters. Balsa does not currently have a helper which exactly matches this listing of characters. The closest thing we have is the _exclusive_ list `kInvalidHeaderKeyCharList` which _almost_ matches this inclusive list with the exception of `:` for the set of US-ASCII characters. We add the helper here for use by future changes.
Protected by adding helper method for future CL.
PiperOrigin-RevId: 826236709
diff --git a/quiche/balsa/header_properties.cc b/quiche/balsa/header_properties.cc
index b0e7891..03ebe56 100644
--- a/quiche/balsa/header_properties.cc
+++ b/quiche/balsa/header_properties.cc
@@ -11,6 +11,16 @@
namespace {
+// The set of characters allowed in HTTP `token`s. See
+// https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2
+inline constexpr unsigned char kValidTokenCharList[] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '#', '$',
+ '%', '&', '\'', '*', '+', '-', '.', '^', '_', '`', '|', '~'};
+
using MultivaluedHeadersSet =
absl::flat_hash_set<absl::string_view, StringPieceCaseHash,
StringPieceCaseEqual>;
@@ -128,6 +138,14 @@
return invalidCharTable;
}
+constexpr std::array<bool, 256> buildValidTokenCharLookupTable() {
+ std::array<bool, 256> validTokenCharTable{};
+ for (uint8_t c : kValidTokenCharList) {
+ validTokenCharTable[c] = true;
+ }
+ return validTokenCharTable;
+}
+
} // anonymous namespace
bool IsMultivaluedHeader(absl::string_view header) {
@@ -157,6 +175,23 @@
return invalidCharTable[c];
}
+bool IsValidTokenChar(uint8_t c) {
+ static constexpr std::array<bool, 256> validTokenCharTable =
+ buildValidTokenCharLookupTable();
+ return validTokenCharTable[c];
+}
+
+bool IsValidToken(absl::string_view value) {
+ if (value.empty()) {
+ return false;
+ }
+ for (const char c : value) {
+ if (!IsValidTokenChar(static_cast<uint8_t>(c))) {
+ return false;
+ }
+ }
+ return true;
+}
bool HasInvalidHeaderChars(absl::string_view value) {
for (const char c : value) {
if (IsInvalidHeaderChar(c)) {
diff --git a/quiche/balsa/header_properties.h b/quiche/balsa/header_properties.h
index 79588ea..a540e79 100644
--- a/quiche/balsa/header_properties.h
+++ b/quiche/balsa/header_properties.h
@@ -73,6 +73,15 @@
QUICHE_EXPORT bool IsInvalidHeaderChar(uint8_t c);
QUICHE_EXPORT bool HasInvalidHeaderChars(absl::string_view value);
+// Returns true if the given `char` is in the set of ASCII characters valid for
+// `token`s specified in
+// https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2. Many lexical
+// entries in the grammar are tokens: method, parameter-names, protocol names,
+// content-codings etc. Note that this differs from `IsInvalidHeaderKeyChar`
+// which considers `:` to be a valid character for header keys.
+QUICHE_EXPORT bool IsValidTokenChar(uint8_t c);
+QUICHE_EXPORT bool IsValidToken(absl::string_view value);
+
// Returns true if `value` contains a character not allowed in the path
// component of a URI.
QUICHE_EXPORT bool HasInvalidPathChar(absl::string_view value);
diff --git a/quiche/balsa/header_properties_test.cc b/quiche/balsa/header_properties_test.cc
index 980c419..ad983f1 100644
--- a/quiche/balsa/header_properties_test.cc
+++ b/quiche/balsa/header_properties_test.cc
@@ -1,5 +1,9 @@
#include "quiche/balsa/header_properties.h"
+#include <string>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/string_view.h"
#include "quiche/common/platform/api/quiche_test.h"
namespace quiche::header_properties::test {
@@ -140,5 +144,60 @@
EXPECT_TRUE(HasInvalidQueryChar("query_with_angle<brackets>also_bad"));
}
+TEST(HeaderPropertiesTest, IsValidTokenVsHasInvalidHeaderChars) {
+ absl::flat_hash_set<unsigned char> mismatch = {':'};
+ for (int c = 0; c < 128; ++c) {
+ if (mismatch.contains(c)) {
+ continue;
+ }
+
+ unsigned char u_c = static_cast<unsigned char>(c);
+ std::string s(1, u_c);
+ EXPECT_EQ(IsValidToken(s), !IsInvalidHeaderKeyChar(u_c))
+ << "char: [" << u_c << "], int = [" << c << "]";
+ }
+}
+
+TEST(HeaderPropertiesTest, IsValidTokenEmptyAndMultiChar) {
+ EXPECT_TRUE(IsValidToken("a"));
+ EXPECT_TRUE(IsValidToken("GET"));
+ EXPECT_TRUE(IsValidToken("GET'"));
+ EXPECT_TRUE(IsValidToken("a-b-c"));
+ EXPECT_TRUE(IsValidToken("!#$%&'*+-.^_`|~"));
+ EXPECT_TRUE(IsValidToken("abcefghijklmnopqrstuvwxyz0123456789"));
+ EXPECT_TRUE(
+ IsValidToken("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789"
+ "!#$%&'*+-.^_`|~"));
+
+ EXPECT_FALSE(IsValidToken("G ET"));
+ EXPECT_FALSE(IsValidToken("G,ET"));
+ EXPECT_FALSE(IsValidToken("G\tET"));
+ EXPECT_FALSE(IsValidToken(absl::string_view("G\0ET", 3)));
+ EXPECT_FALSE(IsValidToken("GET\""));
+ EXPECT_FALSE(IsValidToken("GET\x85"));
+ EXPECT_FALSE(IsValidToken("GET("));
+ EXPECT_FALSE(IsValidToken("GET)"));
+ EXPECT_FALSE(IsValidToken("GET{"));
+ EXPECT_FALSE(IsValidToken("GET}"));
+ EXPECT_FALSE(IsValidToken("GET}"));
+ EXPECT_FALSE(IsValidToken("GET@"));
+ EXPECT_FALSE(IsValidToken("GET["));
+ EXPECT_FALSE(IsValidToken("GET\\"));
+ EXPECT_FALSE(IsValidToken("GET]"));
+ EXPECT_FALSE(IsValidToken("GET:"));
+ EXPECT_FALSE(IsValidToken("GET;"));
+ EXPECT_FALSE(IsValidToken("GET?"));
+ EXPECT_FALSE(IsValidToken("GET="));
+ EXPECT_FALSE(IsValidToken("GET/"));
+ EXPECT_FALSE(IsValidToken("GET\""));
+ EXPECT_FALSE(IsValidToken("GET<"));
+ EXPECT_FALSE(IsValidToken("GET>"));
+ EXPECT_FALSE(IsValidToken("GET,"));
+ EXPECT_FALSE(IsValidToken("GET\x7F"));
+ EXPECT_FALSE(IsValidToken(""));
+}
+
} // namespace
} // namespace quiche::header_properties::test