| // Copyright 2020 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <algorithm> |
| #include <string> |
| |
| #include "base/strings/escape.h" |
| |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| |
| namespace gurl_base { |
| namespace { |
| |
| struct EscapeCase { |
| const char* input; |
| const char* output; |
| }; |
| |
| struct EscapeForHTMLCase { |
| const char* input; |
| const char* expected_output; |
| }; |
| |
| struct UnescapeURLCase { |
| const char* input; |
| UnescapeRule::Type rules; |
| const char* output; |
| }; |
| |
| struct UnescapeAndDecodeCase { |
| const char* input; |
| |
| // The expected output when run through UnescapeURL. |
| const char* url_unescaped; |
| |
| // The expected output when run through UnescapeQuery. |
| const char* query_unescaped; |
| |
| // The expected output when run through UnescapeAndDecodeURLComponent. |
| const wchar_t* decoded; |
| }; |
| |
| struct AdjustOffsetCase { |
| const char* input; |
| size_t input_offset; |
| size_t output_offset; |
| }; |
| |
| TEST(EscapeTest, EscapeTextForFormSubmission) { |
| const EscapeCase escape_cases[] = { |
| {"foo", "foo"}, {"foo bar", "foo+bar"}, {"foo++", "foo%2B%2B"}}; |
| for (const auto& escape_case : escape_cases) { |
| EXPECT_EQ(escape_case.output, |
| EscapeQueryParamValue(escape_case.input, true)); |
| } |
| |
| const EscapeCase escape_cases_no_plus[] = { |
| {"foo", "foo"}, {"foo bar", "foo%20bar"}, {"foo++", "foo%2B%2B"}}; |
| for (const auto& escape_case : escape_cases_no_plus) { |
| EXPECT_EQ(escape_case.output, |
| EscapeQueryParamValue(escape_case.input, false)); |
| } |
| |
| // Test all the values in we're supposed to be escaping. |
| const std::string no_escape( |
| "abcdefghijklmnopqrstuvwxyz" |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "0123456789" |
| "!'()*-._~"); |
| for (int i = 0; i < 256; ++i) { |
| std::string in; |
| in.push_back(i); |
| std::string out = EscapeQueryParamValue(in, true); |
| if (0 == i) { |
| EXPECT_EQ(out, std::string("%00")); |
| } else if (32 == i) { |
| // Spaces are plus escaped like web forms. |
| EXPECT_EQ(out, std::string("+")); |
| } else if (no_escape.find(in) == std::string::npos) { |
| // Check %hex escaping |
| std::string expected = StringPrintf("%%%02X", i); |
| EXPECT_EQ(expected, out); |
| } else { |
| // No change for things in the no_escape list. |
| EXPECT_EQ(out, in); |
| } |
| } |
| } |
| |
| TEST(EscapeTest, EscapePath) { |
| ASSERT_EQ( |
| // Most of the character space we care about, un-escaped |
| EscapePath("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" |
| "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "[\\]^_`abcdefghijklmnopqrstuvwxyz" |
| "{|}~\x7f\x80\xff"), |
| // Escaped |
| "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" |
| "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" |
| "%7B%7C%7D~%7F%80%FF"); |
| } |
| |
| TEST(EscapeTest, EscapeUrlEncodedData) { |
| ASSERT_EQ( |
| // Most of the character space we care about, un-escaped |
| EscapeUrlEncodedData("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" |
| "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "[\\]^_`abcdefghijklmnopqrstuvwxyz" |
| "{|}~\x7f\x80\xff", |
| true), |
| // Escaped |
| "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" |
| "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" |
| "%7B%7C%7D~%7F%80%FF"); |
| } |
| |
| TEST(EscapeTest, EscapeUrlEncodedDataSpace) { |
| ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b"); |
| ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b"); |
| } |
| |
| TEST(EscapeTest, EscapeForHTML) { |
| const EscapeForHTMLCase tests[] = { |
| {"hello", "hello"}, |
| {"<hello>", "<hello>"}, |
| {"don\'t mess with me", "don't mess with me"}, |
| }; |
| for (const auto& test : tests) { |
| std::string result = EscapeForHTML(std::string(test.input)); |
| EXPECT_EQ(std::string(test.expected_output), result); |
| } |
| } |
| |
| TEST(EscapeTest, UnescapeForHTML) { |
| const EscapeForHTMLCase tests[] = { |
| {"", ""}, |
| {"<hello>", "<hello>"}, |
| {"don't mess with me", "don\'t mess with me"}, |
| {"<>&"'", "<>&\"'"}, |
| {"& lt; & ; &; '", "& lt; & ; &; '"}, |
| {"&", "&"}, |
| {""", "\""}, |
| {"'", "'"}, |
| {"<", "<"}, |
| {">", ">"}, |
| {"& &", "& &"}, |
| }; |
| for (const auto& test : tests) { |
| std::u16string result = UnescapeForHTML(ASCIIToUTF16(test.input)); |
| EXPECT_EQ(ASCIIToUTF16(test.expected_output), result); |
| } |
| } |
| |
| TEST(EscapeTest, EscapeExternalHandlerValue) { |
| ASSERT_EQ( |
| // Escaped |
| "%02%0A%1D%20!%22#$%25&'()*+,-./0123456789:;" |
| "%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz" |
| "%7B%7C%7D~%7F%80%FF", |
| // Most of the character space we care about, un-escaped |
| EscapeExternalHandlerValue("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" |
| "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "[\\]^_`abcdefghijklmnopqrstuvwxyz" |
| "{|}~\x7f\x80\xff")); |
| |
| ASSERT_EQ( |
| "!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_" |
| "abcdefghijklmnopqrstuvwxyz~", |
| EscapeExternalHandlerValue( |
| "!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_" |
| "abcdefghijklmnopqrstuvwxyz~")); |
| |
| ASSERT_EQ("%258k", EscapeExternalHandlerValue("%8k")); |
| ASSERT_EQ("a%25", EscapeExternalHandlerValue("a%")); |
| ASSERT_EQ("%25a", EscapeExternalHandlerValue("%a")); |
| ASSERT_EQ("a%258", EscapeExternalHandlerValue("a%8")); |
| ASSERT_EQ("%ab", EscapeExternalHandlerValue("%ab")); |
| ASSERT_EQ("%AB", EscapeExternalHandlerValue("%AB")); |
| |
| ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C", |
| EscapeExternalHandlerValue( |
| "http://example.com/path/sub?q=a|b|c&q=1|2|3#ref|")); |
| ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C", |
| EscapeExternalHandlerValue( |
| "http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C")); |
| ASSERT_EQ("http://[2001:db8:0:1]:80", |
| EscapeExternalHandlerValue("http://[2001:db8:0:1]:80")); |
| } |
| |
| TEST(EscapeTest, EscapeNonASCII) { |
| EXPECT_EQ("abc\n%2580%80", EscapeNonASCIIAndPercent("abc\n%80\x80")); |
| EXPECT_EQ("abc\n%80%80", EscapeNonASCII("abc\n%80\x80")); |
| } |
| |
| TEST(EscapeTest, DataURLWithAccentedCharacters) { |
| const std::string url = |
| "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3" |
| "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20"; |
| |
| OffsetAdjuster::Adjustments adjustments; |
| UnescapeAndDecodeUTF8URLComponentWithAdjustments(url, UnescapeRule::SPACES, |
| &adjustments); |
| } |
| |
| TEST(EscapeTest, UnescapeURLComponent) { |
| const UnescapeURLCase kUnescapeCases[] = { |
| {"", UnescapeRule::NORMAL, ""}, |
| {"%2", UnescapeRule::NORMAL, "%2"}, |
| {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, |
| {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, |
| {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::NONE, |
| "Some%20random text %25%2dOK"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, |
| "Some%20random text %25-OK"}, |
| {"Some%20random text %25%E1%A6", UnescapeRule::NORMAL, |
| "Some%20random text %25\xE1\xA6"}, |
| {"Some%20random text %25%E1%A6OK", UnescapeRule::NORMAL, |
| "Some%20random text %25\xE1\xA6OK"}, |
| {"Some%20random text %25%E1%A6%99OK", UnescapeRule::NORMAL, |
| "Some%20random text %25\xE1\xA6\x99OK"}, |
| |
| // BiDi Control characters should not be unescaped. |
| {"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL, |
| "Some%20random text %25%D8%9COK"}, |
| {"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%80%8EOK"}, |
| {"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%80%8FOK"}, |
| {"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%80%AAOK"}, |
| {"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%80%ABOK"}, |
| {"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%80%AEOK"}, |
| {"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%81%A6OK"}, |
| {"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL, |
| "Some%20random text %25%E2%81%A9OK"}, |
| |
| // Certain banned characters should not be unescaped. |
| // U+1F50F LOCK WITH INK PEN |
| {"Some%20random text %25%F0%9F%94%8FOK", UnescapeRule::NORMAL, |
| "Some%20random text %25%F0%9F%94%8FOK"}, |
| // U+1F510 CLOSED LOCK WITH KEY |
| {"Some%20random text %25%F0%9F%94%90OK", UnescapeRule::NORMAL, |
| "Some%20random text %25%F0%9F%94%90OK"}, |
| // U+1F512 LOCK |
| {"Some%20random text %25%F0%9F%94%92OK", UnescapeRule::NORMAL, |
| "Some%20random text %25%F0%9F%94%92OK"}, |
| // U+1F513 OPEN LOCK |
| {"Some%20random text %25%F0%9F%94%93OK", UnescapeRule::NORMAL, |
| "Some%20random text %25%F0%9F%94%93OK"}, |
| |
| // Spaces |
| {"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)", UnescapeRule::NORMAL, |
| "(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)"}, |
| {"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)", UnescapeRule::NORMAL, |
| "(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)"}, |
| {"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)", UnescapeRule::NORMAL, |
| "(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)"}, |
| {"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)", UnescapeRule::NORMAL, |
| "(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)"}, |
| {"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)", UnescapeRule::NORMAL, |
| "(%E2%80%AF)(%E2%81%9F)(%E3%80%80)"}, |
| {"(%E2%A0%80)", UnescapeRule::NORMAL, "(%E2%A0%80)"}, |
| |
| // Default Ignorable and Formatting characters should not be unescaped. |
| {"(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)", UnescapeRule::NORMAL, |
| "(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)"}, |
| {"(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)", UnescapeRule::NORMAL, |
| "(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)"}, |
| {"(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)", UnescapeRule::NORMAL, |
| "(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)"}, |
| {"(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)", UnescapeRule::NORMAL, |
| "(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)"}, |
| {"(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)", UnescapeRule::NORMAL, |
| "(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)"}, |
| {"(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)", UnescapeRule::NORMAL, |
| "(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)"}, |
| {"(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)", UnescapeRule::NORMAL, |
| "(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)"}, |
| {"(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)", UnescapeRule::NORMAL, |
| "(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)"}, |
| {"(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)", UnescapeRule::NORMAL, |
| "(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)"}, |
| {"(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)", UnescapeRule::NORMAL, |
| "(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)"}, |
| {"(%F0%93%90%B0)(%F0%93%90%B8)", UnescapeRule::NORMAL, |
| "(%F0%93%90%B0)(%F0%93%90%B8)"}, |
| // General Punctuation - Deprecated (U+206A--206F) |
| {"(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)", UnescapeRule::NORMAL, |
| "(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)"}, |
| // Variation selectors (U+FE00--FE0F) |
| {"(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)", UnescapeRule::NORMAL, |
| "(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)"}, |
| // Shorthand format controls (U+1BCA0--1BCA3) |
| {"(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)", UnescapeRule::NORMAL, |
| "(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)"}, |
| // Musical symbols beams and slurs (U+1D173--1D17A) |
| {"(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)", UnescapeRule::NORMAL, |
| "(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)"}, |
| // Tags block (U+E0000--E007F), includes unassigned points |
| {"(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)", UnescapeRule::NORMAL, |
| "(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)"}, |
| // Ideographic-specific variation selectors (U+E0100--E01EF) |
| {"(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)", UnescapeRule::NORMAL, |
| "(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)"}, |
| |
| // Two spoofing characters in a row should not be unescaped. |
| {"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"}, |
| // Non-spoofing characters surrounded by spoofing characters should be |
| // unescaped. |
| {"%D8%9C%C2%A1%D8%9C%C2%A1", UnescapeRule::NORMAL, |
| "%D8%9C\xC2\xA1%D8%9C\xC2\xA1"}, |
| // Invalid UTF-8 characters surrounded by spoofing characters should be |
| // unescaped. |
| {"%D8%9C%85%D8%9C%85", UnescapeRule::NORMAL, "%D8%9C\x85%D8%9C\x85"}, |
| // Test with enough trail bytes to overflow the CBU8_MAX_LENGTH-byte |
| // buffer. The first two bytes are a spoofing character as well. |
| {"%D8%9C%9C%9C%9C%9C%9C%9C%9C%9C%9C", UnescapeRule::NORMAL, |
| "%D8%9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C"}, |
| |
| {"Some%20random text %25%2dOK", UnescapeRule::SPACES, |
| "Some random text %25-OK"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::PATH_SEPARATORS, |
| "Some%20random text %25-OK"}, |
| {"Some%20random text %25%2dOK", |
| UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS, |
| "Some%20random text %-OK"}, |
| {"Some%20random text %25%2dOK", |
| UnescapeRule::SPACES | |
| UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS, |
| "Some random text %-OK"}, |
| {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, |
| {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, |
| // Certain URL-sensitive characters should not be unescaped unless asked. |
| {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", |
| UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, |
| {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", |
| UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS, |
| "Hello%20%13%10world ## ?? == && %% ++"}, |
| // We can neither escape nor unescape '@' since some websites expect it to |
| // be preserved as either '@' or "%40". |
| // See http://b/996720 and http://crbug.com/23933 . |
| {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, |
| // Control characters. |
| {"%01%02%03%04%05%06%07%08%09 %25", |
| UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS, |
| "%01%02%03%04%05%06%07%08%09 %"}, |
| {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, |
| |
| // '/' and '\\' should only be unescaped by PATH_SEPARATORS. |
| {"%2F%5C", UnescapeRule::PATH_SEPARATORS, "/\\"}, |
| }; |
| |
| for (const auto unescape_case : kUnescapeCases) { |
| EXPECT_EQ(unescape_case.output, |
| UnescapeURLComponent(unescape_case.input, unescape_case.rules)); |
| } |
| |
| // Test NULL character unescaping, which can't be tested above since those are |
| // just char pointers. |
| std::string input("Null"); |
| input.push_back(0); // Also have a NULL in the input. |
| input.append("%00%39Test"); |
| |
| std::string expected = "Null"; |
| expected.push_back(0); |
| expected.append("%009Test"); |
| EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); |
| } |
| |
| TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponentWithAdjustments) { |
| const UnescapeAndDecodeCase unescape_cases[] = { |
| {"%", "%", "%", L"%"}, |
| {"+", "+", " ", L"+"}, |
| {"%2+", "%2+", "%2 ", L"%2+"}, |
| {"+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"}, |
| {"Don't escape anything", "Don't escape anything", |
| "Don't escape anything", L"Don't escape anything"}, |
| {"+Invalid %escape %2+", "+Invalid %escape %2+", " Invalid %escape %2 ", |
| L"+Invalid %escape %2+"}, |
| {"Some random text %25%2dOK", "Some random text %25-OK", |
| "Some random text %25-OK", L"Some random text %25-OK"}, |
| {"%01%02%03%04%05%06%07%08%09", "%01%02%03%04%05%06%07%08%09", |
| "%01%02%03%04%05%06%07%08%09", L"%01%02%03%04%05%06%07%08%09"}, |
| {"%E4%BD%A0+%E5%A5%BD", "\xE4\xBD\xA0+\xE5\xA5\xBD", |
| "\xE4\xBD\xA0 \xE5\xA5\xBD", L"\x4f60+\x597d"}, |
| {"%ED%ED", // Invalid UTF-8. |
| "\xED\xED", "\xED\xED", L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. |
| }; |
| |
| for (const auto& unescape_case : unescape_cases) { |
| std::string unescaped = |
| UnescapeURLComponent(unescape_case.input, UnescapeRule::NORMAL); |
| EXPECT_EQ(std::string(unescape_case.url_unescaped), unescaped); |
| |
| unescaped = UnescapeURLComponent(unescape_case.input, |
| UnescapeRule::REPLACE_PLUS_WITH_SPACE); |
| EXPECT_EQ(std::string(unescape_case.query_unescaped), unescaped); |
| |
| // The adjustments argument is covered by the next test. |
| // |
| // TODO: Need to test unescape_spaces and unescape_percent. |
| std::u16string decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments( |
| unescape_case.input, UnescapeRule::NORMAL, nullptr); |
| EXPECT_EQ(WideToUTF16(unescape_case.decoded), decoded); |
| } |
| } |
| |
| TEST(EscapeTest, AdjustOffset) { |
| const AdjustOffsetCase adjust_cases[] = { |
| {"", 0, 0}, |
| {"test", 0, 0}, |
| {"test", 2, 2}, |
| {"test", 4, 4}, |
| {"test", std::string::npos, std::string::npos}, |
| {"%2dtest", 6, 4}, |
| {"%2dtest", 3, 1}, |
| {"%2dtest", 2, std::string::npos}, |
| {"%2dtest", 1, std::string::npos}, |
| {"%2dtest", 0, 0}, |
| {"test%2d", 2, 2}, |
| {"test%2e", 2, 2}, |
| {"%E4%BD%A0+%E5%A5%BD", 9, 1}, |
| {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, |
| {"%E4%BD%A0+%E5%A5%BD", 0, 0}, |
| {"%E4%BD%A0+%E5%A5%BD", 10, 2}, |
| {"%E4%BD%A0+%E5%A5%BD", 19, 3}, |
| |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos}, |
| {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3}, |
| |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6}, |
| {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10}, |
| |
| {"%ED%B0%80+%E5%A5%BD", 6, 6}, // not convertible to UTF-8 |
| }; |
| |
| for (const auto& adjust_case : adjust_cases) { |
| size_t offset = adjust_case.input_offset; |
| OffsetAdjuster::Adjustments adjustments; |
| UnescapeAndDecodeUTF8URLComponentWithAdjustments( |
| adjust_case.input, UnescapeRule::NORMAL, &adjustments); |
| OffsetAdjuster::AdjustOffset(adjustments, &offset); |
| EXPECT_EQ(adjust_case.output_offset, offset) |
| << "input=" << adjust_case.input |
| << " offset=" << adjust_case.input_offset; |
| } |
| } |
| |
| TEST(EscapeTest, UnescapeBinaryURLComponent) { |
| const UnescapeURLCase kTestCases[] = { |
| // Check that ASCII characters with special handling in |
| // UnescapeURLComponent() are still unescaped. |
| {"%09%20%25foo%2F", UnescapeRule::NORMAL, "\x09 %foo/"}, |
| |
| // UTF-8 Characters banned by UnescapeURLComponent() should also be |
| // unescaped. |
| {"Some random text %D8%9COK", UnescapeRule::NORMAL, |
| "Some random text \xD8\x9COK"}, |
| {"Some random text %F0%9F%94%8FOK", UnescapeRule::NORMAL, |
| "Some random text \xF0\x9F\x94\x8FOK"}, |
| |
| // As should invalid UTF-8 characters. |
| {"%A0%A0%E9%E9%A0%A0%A0%A0", UnescapeRule::NORMAL, |
| "\xA0\xA0\xE9\xE9\xA0\xA0\xA0\xA0"}, |
| |
| // And valid UTF-8 characters that are not banned by |
| // UnescapeURLComponent() should be unescaped, too! |
| {"%C2%A1%C2%A1", UnescapeRule::NORMAL, "\xC2\xA1\xC2\xA1"}, |
| |
| // '+' should be left alone by default |
| {"++%2B++", UnescapeRule::NORMAL, "+++++"}, |
| // But should magically be turned into a space if requested. |
| {"++%2B++", UnescapeRule::REPLACE_PLUS_WITH_SPACE, " + "}, |
| }; |
| |
| for (const auto& test_case : kTestCases) { |
| EXPECT_EQ(test_case.output, |
| UnescapeBinaryURLComponent(test_case.input, test_case.rules)); |
| } |
| |
| // Test NULL character unescaping, which can't be tested above since those are |
| // just char pointers. |
| std::string input("Null"); |
| input.push_back(0); // Also have a NULL in the input. |
| input.append("%00%39Test"); |
| |
| std::string expected("Null"); |
| expected.push_back(0); |
| expected.push_back(0); |
| expected.append("9Test"); |
| EXPECT_EQ(expected, UnescapeBinaryURLComponent(input)); |
| } |
| |
| TEST(EscapeTest, UnescapeBinaryURLComponentSafe) { |
| const struct TestCase { |
| const char* input; |
| // Expected output. Null if call is expected to fail when |
| // |fail_on_path_separators| is false. |
| const char* expected_output; |
| // Whether |input| has any escaped path separators. |
| bool has_path_separators; |
| } kTestCases[] = { |
| // Spaces, percents, and invalid UTF-8 characters are all successfully |
| // unescaped. |
| {"%20%25foo%81", " %foo\x81", false}, |
| |
| // Characters disallowed unconditionally. |
| {"foo%00", nullptr, false}, |
| {"foo%01", nullptr, false}, |
| {"foo%0A", nullptr, false}, |
| {"foo%0D", nullptr, false}, |
| |
| // Path separators. |
| {"foo%2F", "foo/", true}, |
| {"foo%5C", "foo\\", true}, |
| |
| // Characters that are considered invalid to escape are ignored if passed |
| // in unescaped. |
| {"foo\x01\r/\\", "foo\x01\r/\\", false}, |
| }; |
| |
| for (const auto& test_case : kTestCases) { |
| SCOPED_TRACE(test_case.input); |
| |
| std::string output = "foo"; |
| if (!test_case.expected_output) { |
| EXPECT_FALSE(UnescapeBinaryURLComponentSafe( |
| test_case.input, false /* fail_on_path_separators */, &output)); |
| EXPECT_TRUE(output.empty()); |
| EXPECT_FALSE(UnescapeBinaryURLComponentSafe( |
| test_case.input, true /* fail_on_path_separators */, &output)); |
| EXPECT_TRUE(output.empty()); |
| continue; |
| } |
| EXPECT_TRUE(UnescapeBinaryURLComponentSafe( |
| test_case.input, false /* fail_on_path_separators */, &output)); |
| EXPECT_EQ(test_case.expected_output, output); |
| if (test_case.has_path_separators) { |
| EXPECT_FALSE(UnescapeBinaryURLComponentSafe( |
| test_case.input, true /* fail_on_path_separators */, &output)); |
| EXPECT_TRUE(output.empty()); |
| } else { |
| output = "foo"; |
| EXPECT_TRUE(UnescapeBinaryURLComponentSafe( |
| test_case.input, true /* fail_on_path_separators */, &output)); |
| EXPECT_EQ(test_case.expected_output, output); |
| } |
| } |
| } |
| |
| TEST(EscapeTest, ContainsEncodedBytes) { |
| EXPECT_FALSE(ContainsEncodedBytes("abc/def", {'/', '\\'})); |
| EXPECT_FALSE(ContainsEncodedBytes("abc%2Fdef", {'%'})); |
| EXPECT_TRUE(ContainsEncodedBytes("abc%252Fdef", {'%'})); |
| EXPECT_TRUE(ContainsEncodedBytes("abc%2Fdef", {'/', '\\'})); |
| EXPECT_TRUE(ContainsEncodedBytes("abc%5Cdef", {'/', '\\'})); |
| EXPECT_TRUE(ContainsEncodedBytes("abc%2fdef", {'/', '\\'})); |
| |
| // Should be looking for byte values, not UTF-8 character values. |
| EXPECT_TRUE( |
| ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xc3')})); |
| EXPECT_FALSE( |
| ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xe9')})); |
| } |
| |
| } // namespace |
| } // namespace base |