Update googleurl to the latest version from Chromium The version used is 51d0fb56d0f3cbb4be624aba1148411a1bde1554 from Fri Dec 4 00:14:56 2020 +0000
diff --git a/AUTHORS b/AUTHORS index 160949c..a619990 100644 --- a/AUTHORS +++ b/AUTHORS
@@ -199,6 +199,7 @@ Chris Szurgot <szurgotc@amazon.com> Chris Tserng <tserng@amazon.com> Chris Vasselli <clindsay@gmail.com> +Chris Ye <hawkoyates@gmail.com> Christophe Dumez <ch.dumez@samsung.com> Christopher Dale <chrelad@gmail.com> Chunbo Hua <chunbo.hua@intel.com> @@ -259,6 +260,7 @@ Deokjin Kim <deokjin81.kim@samsung.com> Derek Halman <d.halman@gmail.com> Devlin Cronin <rdevlin.cronin@gmail.com> +Dhi Aurrahman <dio@rockybars.com> Diana Suvorova <diana.suvorova@gmail.com> Diego Ferreiro Val <elfogris@gmail.com> Dillon Sellars <dill.sellars@gmail.com> @@ -441,6 +443,7 @@ James Willcox <jwillcox@litl.com> Jan Rucka <ruckajan10@gmail.com> Jan Sauer <jan@jansauer.de> +Janusz Majnert <jmajnert@gmail.com> Janwar Dinata <j.dinata@gmail.com> Jared Shumway <jaredshumway94@gmail.com> Jared Sohn <jared.sohn@gmail.com> @@ -634,6 +637,7 @@ Luke Zarko <lukezarko@gmail.com> Luoxi Pan <l.panpax@gmail.com> Maarten Lankhorst <m.b.lankhorst@gmail.com> +Maciej Pawlowski <m.pawlowski@eyeo.com> Magnus Danielsson <fuzzac@gmail.com> Mahesh Kulkarni <mahesh.kk@samsung.com> Mahesh Machavolu <mahesh.ma@samsung.com> @@ -787,6 +791,7 @@ PaweÅ‚ Hajdan jr <phajdan.jr@gmail.com> Piotr Zarycki <piotr.zarycki@gmail.com> Payal Pandey <payal.pandey@samsung.com> +Pedro Tôrres <t0rr3s.p3dr0@gmail.com> Peng Hu <penghu@tencent.com> Peng Jiang <leiyi.jp@gmail.com> Peng Xinchao <pxinchao@gmail.com> @@ -860,6 +865,7 @@ Robert Bradford <robert.bradford@intel.com> Robert Goldberg <goldberg@adobe.com> Robert Hogan <robhogan@gmail.com> +Robert O'Callahan <rocallahan@gmail.com> Robert Nagy <robert.nagy@gmail.com> Robert Sesek <rsesek@bluestatic.org> Roger Zanoni <rogerzanoni@gmail.com> @@ -970,6 +976,7 @@ Soren Dreijer <dreijerbit@gmail.com> Sreerenj Balachandran <sreerenj.balachandran@intel.com> Srirama Chandra Sekhar Mogali <srirama.m@samsung.com> +Stacy Kim <stacy.kim@ucla.edu> Staphany Park <stapark008@gmail.com> Stephan Hartmann <stha09@googlemail.com> Stephen Searles <stephen.searles@gmail.com> @@ -1021,6 +1028,7 @@ Thomas Butter <tbutter@gmail.com> Thomas Conti <tomc@amazon.com> Thomas Nguyen <haitung.nguyen@avast.com> +Thomas Phillips <tphillips@snapchat.com> Thomas White <im.toms.inbox@gmail.com> Tiago Vignatti <tiago.vignatti@intel.com> Tibor Dusnoki <tibor.dusnoki.91@gmail.com>
diff --git a/base/compiler_specific.h b/base/compiler_specific.h index 0f960c1..fe3d499 100644 --- a/base/compiler_specific.h +++ b/base/compiler_specific.h
@@ -11,6 +11,19 @@ #error "Only clang-cl is supported on Windows, see https://crbug.com/988071" #endif +// This is a wrapper around `__has_cpp_attribute`, which can be used to test for +// the presence of an attribute. In case the compiler does not support this +// macro it will simply evaluate to 0. +// +// References: +// https://wg21.link/sd6#testing-for-the-presence-of-an-attribute-__has_cpp_attribute +// https://wg21.link/cpp.cond#:__has_cpp_attribute +#if defined(__has_cpp_attribute) +#define HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +#define HAS_CPP_ATTRIBUTE(x) 0 +#endif + // Annotate a variable indicating it's ok if the variable is not used. // (Typically used to silence a compiler warning when the assignment // is important for some other reason.) @@ -99,6 +112,20 @@ #define WARN_UNUSED_RESULT #endif +// In case the compiler supports it NO_UNIQUE_ADDRESS evaluates to the C++20 +// attribute [[no_unique_address]]. This allows annotating data members so that +// they need not have an address distinct from all other non-static data members +// of its class. +// +// References: +// * https://en.cppreference.com/w/cpp/language/attributes/no_unique_address +// * https://wg21.link/dcl.attr.nouniqueaddr +#if HAS_CPP_ATTRIBUTE(no_unique_address) +#define NO_UNIQUE_ADDRESS [[no_unique_address]] +#else +#define NO_UNIQUE_ADDRESS +#endif + // Tell the compiler a function is using a printf-style format string. // |format_param| is the one-based index of the format string parameter; // |dots_param| is the one-based index of the "..." parameter.
diff --git a/base/ranges/algorithm.h b/base/ranges/algorithm.h index 649059f..646b733 100644 --- a/base/ranges/algorithm.h +++ b/base/ranges/algorithm.h
@@ -11,6 +11,8 @@ #include <type_traits> #include <utility> +#include "polyfills/base/check.h" +#include "base/compiler_specific.h" #include "base/functional/identity.h" #include "base/functional/invoke.h" #include "base/ranges/functional.h" @@ -150,14 +152,13 @@ namespace ranges { -// C++14 implementation of std::ranges::in_fun_result. Note the because C++14 -// lacks the `no_unique_address` attribute it is commented out. +// C++14 implementation of std::ranges::in_fun_result. // // Reference: https://wg21.link/algorithms.results#:~:text=in_fun_result template <typename I, typename F> struct in_fun_result { - /* [[no_unique_address]] */ I in; - /* [[no_unique_address]] */ F fun; + NO_UNIQUE_ADDRESS I in; + NO_UNIQUE_ADDRESS F fun; template <typename I2, typename F2, @@ -3732,6 +3733,8 @@ Comp comp = {}, Proj1 proj1 = {}, Proj2 proj2 = {}) { + GURL_DCHECK(ranges::is_sorted(first1, last1, comp, proj1)); + GURL_DCHECK(ranges::is_sorted(first2, last2, comp, proj2)); // Needs to opt-in to all permutations, since std::includes expects // comp(proj1(lhs), proj2(rhs)) and comp(proj2(lhs), proj1(rhs)) to compile. return std::includes(
diff --git a/base/stl_util.h b/base/stl_util.h index 49fd272..f3d86ef 100644 --- a/base/stl_util.h +++ b/base/stl_util.h
@@ -25,6 +25,7 @@ #include "polyfills/base/check.h" #include "base/optional.h" +#include "base/ranges/algorithm.h" #include "base/template_util.h" namespace gurl_base { @@ -464,29 +465,11 @@ std::forward<Args>(args)...); } -// Returns true if the container is sorted. Requires constexpr std::begin/end, -// which exists for arrays in C++14. -// Note that std::is_sorted is constexpr beginning C++20 and this should be -// switched to use it when C++20 is supported. -template <typename Container> -constexpr bool STLIsSorted(const Container& cont) { - auto it = std::begin(cont); - const auto end = std::end(cont); - if (it == end) - return true; - - for (auto prev = it++; it != end; prev = it++) { - if (*it < *prev) - return false; - } - return true; -} - // Returns a new ResultType containing the difference of two sorted containers. template <typename ResultType, typename Arg1, typename Arg2> ResultType STLSetDifference(const Arg1& a1, const Arg2& a2) { - GURL_DCHECK(STLIsSorted(a1)); - GURL_DCHECK(STLIsSorted(a2)); + GURL_DCHECK(ranges::is_sorted(a1)); + GURL_DCHECK(ranges::is_sorted(a2)); ResultType difference; std::set_difference(a1.begin(), a1.end(), a2.begin(), a2.end(), @@ -497,8 +480,8 @@ // Returns a new ResultType containing the union of two sorted containers. template <typename ResultType, typename Arg1, typename Arg2> ResultType STLSetUnion(const Arg1& a1, const Arg2& a2) { - GURL_DCHECK(STLIsSorted(a1)); - GURL_DCHECK(STLIsSorted(a2)); + GURL_DCHECK(ranges::is_sorted(a1)); + GURL_DCHECK(ranges::is_sorted(a2)); ResultType result; std::set_union(a1.begin(), a1.end(), a2.begin(), a2.end(), @@ -510,8 +493,8 @@ // containers. template <typename ResultType, typename Arg1, typename Arg2> ResultType STLSetIntersection(const Arg1& a1, const Arg2& a2) { - GURL_DCHECK(STLIsSorted(a1)); - GURL_DCHECK(STLIsSorted(a2)); + GURL_DCHECK(ranges::is_sorted(a1)); + GURL_DCHECK(ranges::is_sorted(a2)); ResultType result; std::set_intersection(a1.begin(), a1.end(), a2.begin(), a2.end(), @@ -519,16 +502,6 @@ return result; } -// Returns true if the sorted container |a1| contains all elements of the sorted -// container |a2|. -template <typename Arg1, typename Arg2> -bool STLIncludes(const Arg1& a1, const Arg2& a2) { - GURL_DCHECK(STLIsSorted(a1)); - GURL_DCHECK(STLIsSorted(a2)); - return std::includes(a1.begin(), a1.end(), - a2.begin(), a2.end()); -} - // Erase/EraseIf are based on C++20's uniform container erasure API: // - https://eel.is/c++draft/libraryindex#:erase // - https://eel.is/c++draft/libraryindex#:erase_if
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index 1030416..f0f72cd 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc
@@ -480,22 +480,24 @@ const char* ref; const char* expected; } replace_cases[] = { - {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, - NULL, "/", "", "", "http://www.google.com/"}, + {"http://www.google.com/foo/bar.html?foo#bar", nullptr, nullptr, nullptr, + nullptr, nullptr, "/", "", "", "http://www.google.com/"}, {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"}, - {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", - "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"}, + {"file:///C:/foo/bar.txt", "http", nullptr, nullptr, "www.google.com", + "99", "/foo", "search", "ref", + "http://www.google.com:99/foo?search#ref"}, #ifdef WIN32 {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"}, #endif - {"filesystem:http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, - NULL, NULL, NULL, "/", "", "", "filesystem:http://www.google.com/foo/"}, + {"filesystem:http://www.google.com/foo/bar.html?foo#bar", nullptr, + nullptr, nullptr, nullptr, nullptr, "/", "", "", + "filesystem:http://www.google.com/foo/"}, // Lengthen the URL instead of shortening it, to test creation of // inner_url. - {"filesystem:http://www.google.com/foo/", NULL, NULL, NULL, NULL, NULL, - "bar.html", "foo", "bar", + {"filesystem:http://www.google.com/foo/", nullptr, nullptr, nullptr, + nullptr, nullptr, "bar.html", "foo", "bar", "filesystem:http://www.google.com/foo/bar.html?foo#bar"}, }; @@ -554,13 +556,17 @@ const char* expected; const char* inner_expected; } cases[] = { - {"http://www.google.com", "/", NULL}, - {"http://www.google.com/", "/", NULL}, - {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", NULL}, - {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", NULL}, - {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", NULL}, - {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", "/foo/bar.html?query", "/temporary"}, - {"filesystem:http://www.google.com/temporary/foo/bar.html?query", "/foo/bar.html?query", "/temporary"}, + {"http://www.google.com", "/", nullptr}, + {"http://www.google.com/", "/", nullptr}, + {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", + nullptr}, + {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", nullptr}, + {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", + nullptr}, + {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", + "/foo/bar.html?query", "/temporary"}, + {"filesystem:http://www.google.com/temporary/foo/bar.html?query", + "/foo/bar.html?query", "/temporary"}, }; for (size_t i = 0; i < gurl_base::size(cases); i++) {
diff --git a/url/origin.h b/url/origin.h index f4c1bf8..8cb3ef0 100644 --- a/url/origin.h +++ b/url/origin.h
@@ -56,8 +56,6 @@ } // namespace mojo namespace net { -class NetworkIsolationKey; -class OpaqueNonTransientNetworkIsolationKeyTest; class SchemefulSite; } // namespace net @@ -299,11 +297,9 @@ private: friend class blink::SecurityOrigin; - friend class net::NetworkIsolationKey; // SchemefulSite needs access to the serialization/deserialization logic which // includes the nonce. friend class net::SchemefulSite; - friend class net::OpaqueNonTransientNetworkIsolationKeyTest; friend class OriginTest; friend struct mojo::UrlOriginAdapter; friend struct ipc_fuzzer::FuzzTraits<Origin>;
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc index c541cae..265761b 100644 --- a/url/scheme_host_port.cc +++ b/url/scheme_host_port.cc
@@ -242,9 +242,6 @@ result.append(host_); } - if (port_ == 0) - return result; - // Omit the port component if the port matches with the default port // defined for the scheme, if any. int default_port = DefaultPortForScheme(scheme_.data(),
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc index 3e7bf65..a4cbdb3 100644 --- a/url/scheme_host_port_unittest.cc +++ b/url/scheme_host_port_unittest.cc
@@ -224,6 +224,7 @@ {"https://example.com:123/", "https://example.com:123"}, {"file:///etc/passwd", "file://"}, {"file://example.com/etc/passwd", "file://example.com"}, + {"https://example.com:0/", "https://example.com:0"}, }; for (const auto& test : cases) {
diff --git a/url/url_canon.h b/url/url_canon.h index c50cceb..84b3549 100644 --- a/url/url_canon.h +++ b/url/url_canon.h
@@ -500,7 +500,7 @@ Component* out_port); // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED -// if the scheme is unknown. +// if the scheme is unknown. Based on https://url.spec.whatwg.org/#default-port COMPONENT_EXPORT(URL) int DefaultPortForScheme(const char* scheme, int scheme_len);
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc index 6a94f50..005877a 100644 --- a/url/url_canon_stdurl.cc +++ b/url/url_canon_stdurl.cc
@@ -108,9 +108,11 @@ } // namespace - // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED // if the scheme is unknown. +// +// Please keep blink::DefaultPortForProtocol and url::DefaultPortForProtocol in +// sync. int DefaultPortForScheme(const char* scheme, int scheme_len) { int default_port = PORT_UNSPECIFIED; switch (scheme_len) {
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index 8f709be..55ef089 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc
@@ -144,26 +144,26 @@ const char* output; } utf_cases[] = { // Valid canonical input should get passed through & escaped. - {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"}, + {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"}, // Test a character that takes > 16 bits (U+10300 = old italic letter A) - {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"}, + {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"}, // Non-shortest-form UTF-8 characters are invalid. The bad bytes should // each be replaced with the invalid character (EF BF DB in UTF-8). - {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, - "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%E5%A5%BD"}, + {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", nullptr, false, + "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%E5%A5%BD"}, // Invalid UTF-8 sequences should be marked as invalid (the first // sequence is truncated). - {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"}, + {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"}, // Character going off the end. - {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"}, + {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"}, // ...same with low surrogates with no high surrogate. - {nullptr, L"\xdc00", false, "%EF%BF%BD"}, + {nullptr, L"\xdc00", false, "%EF%BF%BD"}, // Test a UTF-8 encoded surrogate value is marked as invalid. // ED A0 80 = U+D800 - {"\xed\xa0\x80", NULL, false, "%EF%BF%BD%EF%BF%BD%EF%BF%BD"}, + {"\xed\xa0\x80", nullptr, false, "%EF%BF%BD%EF%BF%BD%EF%BF%BD"}, // ...even when paired. - {"\xed\xa0\x80\xed\xb0\x80", nullptr, false, - "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD"}, + {"\xed\xa0\x80\xed\xb0\x80", nullptr, false, + "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD"}, }; std::string out_str; @@ -1068,89 +1068,100 @@ TEST(URLCanonTest, Path) { DualComponentCase path_cases[] = { - // ----- path collapsing tests ----- - {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, - {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, - {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, - {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, + // ----- path collapsing tests ----- + {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, + {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, + {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, + {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, // double dots followed by a slash or the end of the string count - {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, - {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, + {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, + {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, // don't count double dots when they aren't followed by a slash - {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, + {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, // some in the middle - {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true}, - {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", Component(0, 2), true}, + {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), + true}, + {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", + Component(0, 2), true}, // we should not be able to go above the root - {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, - {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true}, + {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, + {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), + true}, // escaped dots should be unescaped and treated the same as dots - {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, - {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, - {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar", Component(0, 6), true}, + {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, + {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, + {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", + "/..bar", Component(0, 6), true}, // Multiple slashes in a row should be preserved and treated like empty // directory names. - {"////../..", L"////../..", "//", Component(0, 2), true}, + {"////../..", L"////../..", "//", Component(0, 2), true}, - // ----- escaping tests ----- - {"/foo", L"/foo", "/foo", Component(0, 4), true}, + // ----- escaping tests ----- + {"/foo", L"/foo", "/foo", Component(0, 4), true}, // Valid escape sequence - {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, + {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, // Invalid escape sequence we should pass through unchanged. - {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, - {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, + {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, + {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, // Invalid escape sequence: bad characters should be treated the same as // the sourrounding text, not as escaped (in this case, UTF-8). - {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, - {"/foo%2\xc2\xa9zbar", NULL, "/foo%2%C2%A9zbar", Component(0, 16), true}, - {NULL, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22), true}, + {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, + {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), + true}, + {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", + Component(0, 22), true}, // Regular characters that are escaped should be unescaped - {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, + {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, // Funny characters that are unescaped should be escaped - {"/foo\x09\x91%91", NULL, "/foo%09%91%91", Component(0, 13), true}, - {NULL, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, + {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true}, + {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, // Invalid characters that are escaped should cause a failure. - {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, + {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, // Some characters should be passed through unchanged regardless of esc. - {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), true}, + {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), + true}, // Characters that are properly escaped should not have the case changed // of hex letters. - {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), true}, + {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), + true}, // Funny characters that are unescaped should be escaped - {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, + {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, // Backslashes should get converted to forward slashes - {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, + {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, // Hashes found in paths (possibly only when the caller explicitly sets // the path on an already-parsed URL) should be escaped. - {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, + {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, // %7f should be allowed and %3D should not be unescaped (these were wrong // in a previous version). - {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, + {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", + "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, // @ should be passed through unchanged (escaped or unescaped). - {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, + {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, // Nested escape sequences should result in escaping the leading '%' if // unescaping would result in a new escape sequence. - {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, - {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, - {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, + {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, + {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, + {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, // Make sure truncated "nested" escapes don't result in reading off the // string end. - {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, + {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, // Don't unescape the leading '%' if unescaping doesn't result in a valid // new escape sequence. - {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, - {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, + {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, + {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, // Don't erroneously downcast a UTF-16 charater in a way that makes it // look like part of an escape sequence. - {NULL, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, + {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, - // ----- encoding tests ----- + // ----- encoding tests ----- // Basic conversions - {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", Component(0, 37), true}, + {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", + L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + Component(0, 37), true}, // Invalid unicode characters should fail. We only do validation on // UTF-16 input, so this doesn't happen on 8-bit. - {"/\xef\xb7\x90zyx", NULL, "/%EF%B7%90zyx", Component(0, 13), true}, - {NULL, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, + {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true}, + {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, }; for (size_t i = 0; i < gurl_base::size(path_cases); i++) { @@ -1293,8 +1304,8 @@ // Escaping should be preserved unchanged, even invalid ones {"%41%a", L"%41%a", "#%41%a", Component(1, 5), true}, // Invalid UTF-8/16 input should be flagged and the input made valid - {"\xc2", NULL, "#%EF%BF%BD", Component(1, 9), true}, - {NULL, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true}, + {"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true}, + {nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true}, // Test a Unicode invalid character. {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true}, // Refs can have # signs and we should preserve them. @@ -1437,14 +1448,22 @@ TEST(URLCanonTest, ReplaceStandardURL) { ReplaceCase replace_cases[] = { // Common case of truncating the path. - {"http://www.google.com/foo?bar=baz#ref", NULL, NULL, NULL, NULL, NULL, "/", kDeleteComp, kDeleteComp, "http://www.google.com/"}, + {"http://www.google.com/foo?bar=baz#ref", nullptr, nullptr, nullptr, + nullptr, nullptr, "/", kDeleteComp, kDeleteComp, + "http://www.google.com/"}, // Replace everything - {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"}, + {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", + "host.com", "99", "/path", "query", "ref", + "https://me:pw@host.com:99/path?query#ref"}, // Replace nothing - {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"}, + {"http://a:b@google.com:22/foo?baz@cat", nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, + "http://a:b@google.com:22/foo?baz@cat"}, // Replace scheme with filesystem. The result is garbage, but you asked // for it. - {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"}, + {"http://a:b@google.com:22/foo?baz@cat", "filesystem", nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, + "filesystem://a:b@google.com:22/foo?baz@cat"}, }; for (size_t i = 0; i < gurl_base::size(replace_cases); i++) { @@ -1514,24 +1533,37 @@ TEST(URLCanonTest, ReplaceFileURL) { ReplaceCase replace_cases[] = { // Replace everything - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, + {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, "filer", nullptr, + "/foo", "b", "c", "file://filer/foo?b#c"}, // Replace nothing - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, - {"file:///Y:", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y:"}, - {"file:///Y:/", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y:/"}, - {"file:///./Y", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y"}, - {"file:///./Y:", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y:"}, + {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"}, + {"file:///Y:", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, "file:///Y:"}, + {"file:///Y:/", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, "file:///Y:/"}, + {"file:///./Y", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, "file:///Y"}, + {"file:///./Y:", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, "file:///Y:"}, // Clear non-path components (common) - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"}, + {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, kDeleteComp, kDeleteComp, "file:///C:/gaba"}, // Replace path with something that doesn't begin with a slash and make // sure it gets added properly. - {"file:///C:/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///home/gaba?query#ref"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///home/gaba"}, - {"file:///home/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"}, + {"file:///C:/gaba", nullptr, nullptr, nullptr, nullptr, nullptr, + "interesting/", nullptr, nullptr, "file:///interesting/"}, + {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, "filer", + nullptr, "/foo", "b", "c", "file://filer/foo?b#c"}, + {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, "file:///home/gaba?query#ref"}, + {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, kDeleteComp, kDeleteComp, "file:///home/gaba"}, + {"file:///home/gaba", nullptr, nullptr, nullptr, nullptr, nullptr, + "interesting/", nullptr, nullptr, "file:///interesting/"}, // Replace scheme -- shouldn't do anything. - {"file:///C:/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, + {"file:///C:/gaba?query#ref", "http", nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"}, }; for (size_t i = 0; i < gurl_base::size(replace_cases); i++) { @@ -1565,38 +1597,41 @@ TEST(URLCanonTest, ReplaceFileSystemURL) { ReplaceCase replace_cases[] = { // Replace everything in the outer URL. - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"}, + {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr, + nullptr, nullptr, "/foo", "b", "c", + "filesystem:file:///temporary/foo?b#c"}, // Replace nothing - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"}, + {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, + "filesystem:file:///temporary/gaba?query#ref"}, // Clear non-path components (common) - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, NULL, kDeleteComp, kDeleteComp, + {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, kDeleteComp, kDeleteComp, "filesystem:file:///temporary/gaba"}, // Replace path with something that doesn't begin with a slash and make // sure it gets added properly. - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, "interesting/", NULL, NULL, + {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr, + nullptr, nullptr, "interesting/", nullptr, nullptr, "filesystem:file:///temporary/interesting/?query#ref"}, // Replace scheme -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, + {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "filesystem:http://bar.com/t/gaba?query#ref"}, // Replace username -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL, - NULL, NULL, NULL, NULL, "filesystem:http://bar.com/t/gaba?query#ref"}, + {"filesystem:http://u:p@bar.com/t/gaba?query#ref", nullptr, "u2", nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, + "filesystem:http://bar.com/t/gaba?query#ref"}, // Replace password -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2", - NULL, NULL, NULL, NULL, NULL, + {"filesystem:http://u:p@bar.com/t/gaba?query#ref", nullptr, nullptr, + "pw2", nullptr, nullptr, nullptr, nullptr, nullptr, "filesystem:http://bar.com/t/gaba?query#ref"}, // Replace host -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", NULL, NULL, NULL, - "foo.com", NULL, NULL, NULL, NULL, + {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", nullptr, nullptr, + nullptr, "foo.com", nullptr, nullptr, nullptr, nullptr, "filesystem:http://bar.com/t/gaba?query#ref"}, // Replace port -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL, - NULL, "41", NULL, NULL, NULL, + {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", nullptr, nullptr, + nullptr, nullptr, "41", nullptr, nullptr, nullptr, "filesystem:http://bar.com:40/t/gaba?query#ref"}, }; @@ -1630,13 +1665,18 @@ TEST(URLCanonTest, ReplacePathURL) { ReplaceCase replace_cases[] = { // Replace everything - {"data:foo", "javascript", NULL, NULL, NULL, NULL, "alert('foo?');", NULL, NULL, "javascript:alert('foo?');"}, + {"data:foo", "javascript", nullptr, nullptr, nullptr, nullptr, + "alert('foo?');", nullptr, nullptr, "javascript:alert('foo?');"}, // Replace nothing - {"data:foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "data:foo"}, + {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, "data:foo"}, // Replace one or the other - {"data:foo", "javascript", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "javascript:foo"}, - {"data:foo", NULL, NULL, NULL, NULL, NULL, "bar", NULL, NULL, "data:bar"}, - {"data:foo", NULL, NULL, NULL, NULL, NULL, kDeleteComp, NULL, NULL, "data:"}, + {"data:foo", "javascript", nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, "javascript:foo"}, + {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, "bar", nullptr, + nullptr, "data:bar"}, + {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, kDeleteComp, + nullptr, nullptr, "data:"}, }; for (size_t i = 0; i < gurl_base::size(replace_cases); i++) {
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc index a1c38c2..b67b550 100644 --- a/url/url_parse_unittest.cc +++ b/url/url_parse_unittest.cc
@@ -492,21 +492,21 @@ const char* input; const char* expected; } file_cases[] = { - {"http://www.google.com", NULL}, - {"http://www.google.com/", ""}, - {"http://www.google.com/search", "search"}, - {"http://www.google.com/search/", ""}, - {"http://www.google.com/foo/bar.html?baz=22", "bar.html"}, - {"http://www.google.com/foo/bar.html#ref", "bar.html"}, - {"http://www.google.com/search/;param", ""}, - {"http://www.google.com/foo/bar.html;param#ref", "bar.html"}, - {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"}, - {"http://www.google.com/foo/bar.html?query#ref", "bar.html"}, - {"http://www.google.com/foo;/bar.html", "bar.html"}, - {"http://www.google.com/foo;/", ""}, - {"http://www.google.com/foo;", "foo"}, - {"http://www.google.com/;", ""}, - {"http://www.google.com/foo;bar;html", "foo"}, + {"http://www.google.com", nullptr}, + {"http://www.google.com/", ""}, + {"http://www.google.com/search", "search"}, + {"http://www.google.com/search/", ""}, + {"http://www.google.com/foo/bar.html?baz=22", "bar.html"}, + {"http://www.google.com/foo/bar.html#ref", "bar.html"}, + {"http://www.google.com/search/;param", ""}, + {"http://www.google.com/foo/bar.html;param#ref", "bar.html"}, + {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"}, + {"http://www.google.com/foo/bar.html?query#ref", "bar.html"}, + {"http://www.google.com/foo;/bar.html", "bar.html"}, + {"http://www.google.com/foo;/", ""}, + {"http://www.google.com/foo;", "foo"}, + {"http://www.google.com/;", ""}, + {"http://www.google.com/foo;bar;html", "foo"}, }; for (size_t i = 0; i < gurl_base::size(file_cases); i++) {