url/gurl_unittest.cc - googleurl - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "url/gurl.h"

 #include <stddef.h>

 #include "base/strings/string_number_conversions.h"
 #include "base/strings/utf_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "url/gurl_abstract_tests.h"
 #include "url/origin.h"
 #include "url/url_canon.h"
 #include "url/url_test_utils.h"

 namespace url {

 namespace {

 // Returns the canonicalized string for the given URL string for the
 // GURLTest.Types test.
 std::string TypesTestCase(const char* src) {
   GURL gurl(src);
   return gurl.possibly_invalid_spec();
 }

 }  // namespace

 // Different types of URLs should be handled differently, and handed off to
 // different canonicalizers.
 TEST(GURLTest, Types) {
   // URLs with unknown schemes should be treated as path URLs, even when they
   // have things like "://".
   EXPECT_EQ("something:///HOSTNAME.com/",
             TypesTestCase("something:///HOSTNAME.com/"));

   // Conversely, URLs with known schemes should always trigger standard URL
   // handling.
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));

 #ifdef WIN32
   // URLs that look like Windows absolute path specs.
   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
   EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
 #endif
 }

 // Test the basic creation and querying of components in a GURL. We assume that
 // the parser is already tested and works, so we are mostly interested if the
 // object does the right thing with the results.
 TEST(GURLTest, Components) {
   GURL empty_url(u"");
   EXPECT_TRUE(empty_url.is_empty());
   EXPECT_FALSE(empty_url.is_valid());

   GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
   EXPECT_FALSE(url.is_empty());
   EXPECT_TRUE(url.is_valid());
   EXPECT_TRUE(url.SchemeIs("http"));
   EXPECT_FALSE(url.SchemeIsFile());

   // This is the narrow version of the URL, which should match the wide input.
   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());

   EXPECT_EQ("http", url.scheme());
   EXPECT_EQ("user", url.username());
   EXPECT_EQ("pass", url.password());
   EXPECT_EQ("google.com", url.host());
   EXPECT_EQ("99", url.port());
   EXPECT_EQ(99, url.IntPort());
   EXPECT_EQ("/foo;bar", url.path());
   EXPECT_EQ("q=a", url.query());
   EXPECT_EQ("ref", url.ref());

   // Test parsing userinfo with special characters.
   GURL url_special_pass("http://user:%40!$&'()*+,;=:@google.com:12345");
   EXPECT_TRUE(url_special_pass.is_valid());
   // GURL canonicalizes some delimiters.
   EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password());
   EXPECT_EQ("google.com", url_special_pass.host());
   EXPECT_EQ("12345", url_special_pass.port());
 }

 TEST(GURLTest, Empty) {
   GURL url;
   EXPECT_FALSE(url.is_valid());
   EXPECT_EQ("", url.spec());

   EXPECT_EQ("", url.scheme());
   EXPECT_EQ("", url.username());
   EXPECT_EQ("", url.password());
   EXPECT_EQ("", url.host());
   EXPECT_EQ("", url.port());
   EXPECT_EQ(PORT_UNSPECIFIED, url.IntPort());
   EXPECT_EQ("", url.path());
   EXPECT_EQ("", url.query());
   EXPECT_EQ("", url.ref());
 }

 TEST(GURLTest, Copy) {
   GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");

   GURL url2(url);
   EXPECT_TRUE(url2.is_valid());

   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
   EXPECT_EQ("http", url2.scheme());
   EXPECT_EQ("user", url2.username());
   EXPECT_EQ("pass", url2.password());
   EXPECT_EQ("google.com", url2.host());
   EXPECT_EQ("99", url2.port());
   EXPECT_EQ(99, url2.IntPort());
   EXPECT_EQ("/foo;bar", url2.path());
   EXPECT_EQ("q=a", url2.query());
   EXPECT_EQ("ref", url2.ref());

   // Copying of invalid URL should be invalid
   GURL invalid;
   GURL invalid2(invalid);
   EXPECT_FALSE(invalid2.is_valid());
   EXPECT_EQ("", invalid2.spec());
   EXPECT_EQ("", invalid2.scheme());
   EXPECT_EQ("", invalid2.username());
   EXPECT_EQ("", invalid2.password());
   EXPECT_EQ("", invalid2.host());
   EXPECT_EQ("", invalid2.port());
   EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
   EXPECT_EQ("", invalid2.path());
   EXPECT_EQ("", invalid2.query());
   EXPECT_EQ("", invalid2.ref());
 }

 TEST(GURLTest, Assign) {
   GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");

   GURL url2;
   url2 = url;
   EXPECT_TRUE(url2.is_valid());

   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
   EXPECT_EQ("http", url2.scheme());
   EXPECT_EQ("user", url2.username());
   EXPECT_EQ("pass", url2.password());
   EXPECT_EQ("google.com", url2.host());
   EXPECT_EQ("99", url2.port());
   EXPECT_EQ(99, url2.IntPort());
   EXPECT_EQ("/foo;bar", url2.path());
   EXPECT_EQ("q=a", url2.query());
   EXPECT_EQ("ref", url2.ref());

   // Assignment of invalid URL should be invalid
   GURL invalid;
   GURL invalid2;
   invalid2 = invalid;
   EXPECT_FALSE(invalid2.is_valid());
   EXPECT_EQ("", invalid2.spec());
   EXPECT_EQ("", invalid2.scheme());
   EXPECT_EQ("", invalid2.username());
   EXPECT_EQ("", invalid2.password());
   EXPECT_EQ("", invalid2.host());
   EXPECT_EQ("", invalid2.port());
   EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
   EXPECT_EQ("", invalid2.path());
   EXPECT_EQ("", invalid2.query());
   EXPECT_EQ("", invalid2.ref());
 }

 // This is a regression test for http://crbug.com/309975.
 TEST(GURLTest, SelfAssign) {
   GURL a("filesystem:http://example.com/temporary/");
   // This should not crash.
   a = *&a;  // The *& defeats Clang's -Wself-assign warning.
 }

 TEST(GURLTest, CopyFileSystem) {
   GURL url(u"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref");

   GURL url2(url);
   EXPECT_TRUE(url2.is_valid());

   EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
   EXPECT_EQ("filesystem", url2.scheme());
   EXPECT_EQ("", url2.username());
   EXPECT_EQ("", url2.password());
   EXPECT_EQ("", url2.host());
   EXPECT_EQ("", url2.port());
   EXPECT_EQ(PORT_UNSPECIFIED, url2.IntPort());
   EXPECT_EQ("/foo;bar", url2.path());
   EXPECT_EQ("q=a", url2.query());
   EXPECT_EQ("ref", url2.ref());

   const GURL* inner = url2.inner_url();
   ASSERT_TRUE(inner);
   EXPECT_EQ("https", inner->scheme());
   EXPECT_EQ("", inner->username());
   EXPECT_EQ("", inner->password());
   EXPECT_EQ("google.com", inner->host());
   EXPECT_EQ("99", inner->port());
   EXPECT_EQ(99, inner->IntPort());
   EXPECT_EQ("/t", inner->path());
   EXPECT_EQ("", inner->query());
   EXPECT_EQ("", inner->ref());
 }

 TEST(GURLTest, IsValid) {
   const char* valid_cases[] = {
       "http://google.com",
       "unknown://google.com",
       "http://user:pass@google.com",
       "http://google.com:12345",
       "http://google.com:0",  // 0 is a valid port
       "http://google.com/path",
       "http://google.com//path",
       "http://google.com?k=v#fragment",
       "http://user:pass@google.com:12345/path?k=v#fragment",
       "http:/path",
       "http:path",
   };
   for (size_t i = 0; i < std::size(valid_cases); i++) {
     EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
         << "Case: " << valid_cases[i];
   }

   const char* invalid_cases[] = {
       "http://?k=v",
       "http:://google.com",
       "http//google.com",
       "http://google.com:12three45",
       "file://server:123",  // file: URLs cannot have a port
       "file://server:0",
       "://google.com",
       "path",
   };
   for (size_t i = 0; i < std::size(invalid_cases); i++) {
     EXPECT_FALSE(GURL(invalid_cases[i]).is_valid())
         << "Case: " << invalid_cases[i];
   }
 }

 TEST(GURLTest, ExtraSlashesBeforeAuthority) {
   // According to RFC3986, the hierarchical part for URI with an authority
   // must use only two slashes; GURL intentionally just ignores extra slashes
   // if there are more than 2, and parses the following part as an authority.
   GURL url("http:///host");
   EXPECT_EQ("host", url.host());
   EXPECT_EQ("/", url.path());
 }

 // Given invalid URLs, we should still get most of the components.
 TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
   constexpr struct InvalidURLTestExpectations {
     const char* url;
     const char* spec;
     const char* scheme;
     const char* host;
     const char* port;
     const char* path;
     // Extend as needed...
   } expectations[] = {
       {
           "http:google.com:foo",
           "http://google.com:foo/",
           "http",
           "google.com",
           "foo",
           "/",
       },
       {
           "https:google.com:foo",
           "https://google.com:foo/",
           "https",
           "google.com",
           "foo",
           "/",
       },
   };

   for (const auto& e : expectations) {
     const GURL url(e.url);
     EXPECT_FALSE(url.is_valid());
     EXPECT_EQ(e.spec, url.possibly_invalid_spec());
     EXPECT_EQ(e.scheme, url.scheme());
     EXPECT_EQ("", url.username());
     EXPECT_EQ("", url.password());
     EXPECT_EQ(e.host, url.host());
     EXPECT_EQ(e.port, url.port());
     EXPECT_EQ(PORT_INVALID, url.IntPort());
     EXPECT_EQ(e.path, url.path());
     EXPECT_EQ("", url.query());
     EXPECT_EQ("", url.ref());
   }
 }

 TEST(GURLTest, Resolve) {
   // The tricky cases for relative URL resolving are tested in the
   // canonicalizer unit test. Here, we just test that the GURL integration
   // works properly.
   struct ResolveCase {
     const char* base;
     const char* relative;
     bool expected_valid;
     const char* expected;
   } resolve_cases[] = {
       {"http://www.google.com/", "foo.html", true,
        "http://www.google.com/foo.html"},
       {"http://www.google.com/foo/", "bar", true,
        "http://www.google.com/foo/bar"},
       {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
       {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
       {"http://www.google.com/", "http://images.google.com/foo.html", true,
        "http://images.google.com/foo.html"},
       {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html",
        true, "http://images.google.com/foo.html"},
       {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b",
        true, "http://www.google.com/hello/world.html?a#b"},
       {"http://www.google.com/foo#bar", "#com", true,
        "http://www.google.com/foo#com"},
       {"http://www.google.com/", "Https:images.google.com", true,
        "https://images.google.com/"},
       // A non-standard base can be replaced with a standard absolute URL.
       {"data:blahblah", "http://google.com/", true, "http://google.com/"},
       {"data:blahblah", "http:google.com", true, "http://google.com/"},
       {"data:blahblah", "https:google.com", true, "https://google.com/"},
       // Filesystem URLs have different paths to test.
       {"filesystem:http://www.google.com/type/", "foo.html", true,
        "filesystem:http://www.google.com/type/foo.html"},
       {"filesystem:http://www.google.com/type/", "../foo.html", true,
        "filesystem:http://www.google.com/type/foo.html"},
       // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:"
       // or "x1:" valid schemes) when deciding if |relative| is an absolute url.
       {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true,
        "file:///some/dir/10.0.0.7:8080/foo.html"},
       {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"},
       {"file:///some/dir/", "x1://host", true, "x1://host"},
       {"file:///some/dir/", "X1://host", true, "x1://host"},
       {"file:///some/dir/", "x.://host", true, "x.://host"},
       {"file:///some/dir/", "x+://host", true, "x+://host"},
       {"file:///some/dir/", "x-://host", true, "x-://host"},
       {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"},
       {"file:///some/dir/", "://host", true, "file:///some/dir/://host"},
   };

   for (size_t i = 0; i < std::size(resolve_cases); i++) {
     // 8-bit code path.
     GURL input(resolve_cases[i].base);
     GURL output = input.Resolve(resolve_cases[i].relative);
     EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
     EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);

     // Wide code path.
     GURL inputw(gurl_base::UTF8ToUTF16(resolve_cases[i].base));
     GURL outputw =
         input.Resolve(gurl_base::UTF8ToUTF16(resolve_cases[i].relative));
     EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
     EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
     EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
   }
 }

 TEST(GURLTest, GetOrigin) {
   struct TestCase {
     const char* input;
     const char* expected;
   } cases[] = {
       {"http://www.google.com", "http://www.google.com/"},
       {"javascript:window.alert(\"hello,world\");", ""},
       {"http://user:pass@www.google.com:21/blah#baz",
        "http://www.google.com:21/"},
       {"http://user@www.google.com", "http://www.google.com/"},
       {"http://:pass@www.google.com", "http://www.google.com/"},
       {"http://:@www.google.com", "http://www.google.com/"},
       {"filesystem:http://www.google.com/temp/foo?q#b",
        "http://www.google.com/"},
       {"filesystem:http://user:pass@google.com:21/blah#baz",
        "http://google.com:21/"},
       {"blob:null/guid-goes-here", ""},
       {"blob:http://origin/guid-goes-here", "" /* should be http://origin/ */},
   };
   for (size_t i = 0; i < std::size(cases); i++) {
     GURL url(cases[i].input);
     GURL origin = url.DeprecatedGetOriginAsURL();
     EXPECT_EQ(cases[i].expected, origin.spec());
   }
 }

 TEST(GURLTest, GetAsReferrer) {
   struct TestCase {
     const char* input;
     const char* expected;
   } cases[] = {
     {"http://www.google.com", "http://www.google.com/"},
     {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/blah"},
     {"http://user@www.google.com", "http://www.google.com/"},
     {"http://:pass@www.google.com", "http://www.google.com/"},
     {"http://:@www.google.com", "http://www.google.com/"},
     {"http://www.google.com/temp/foo?q#b", "http://www.google.com/temp/foo?q"},
     {"not a url", ""},
     {"unknown-scheme://foo.html", ""},
     {"file:///tmp/test.html", ""},
     {"https://www.google.com", "https://www.google.com/"},
   };
   for (size_t i = 0; i < std::size(cases); i++) {
     GURL url(cases[i].input);
     GURL origin = url.GetAsReferrer();
     EXPECT_EQ(cases[i].expected, origin.spec());
   }
 }

 TEST(GURLTest, GetWithEmptyPath) {
   struct TestCase {
     const char* input;
     const char* expected;
   } cases[] = {
     {"http://www.google.com", "http://www.google.com/"},
     {"javascript:window.alert(\"hello, world\");", ""},
     {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
     {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
     {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"},
   };

   for (size_t i = 0; i < std::size(cases); i++) {
     GURL url(cases[i].input);
     GURL empty_path = url.GetWithEmptyPath();
     EXPECT_EQ(cases[i].expected, empty_path.spec());
   }
 }

 TEST(GURLTest, GetWithoutFilename) {
   struct TestCase {
     const char* input;
     const char* expected;
   } cases[] = {
     // Common Standard URLs.
     {"https://www.google.com",                    "https://www.google.com/"},
     {"https://www.google.com/",                   "https://www.google.com/"},
     {"https://www.google.com/maps.htm",           "https://www.google.com/"},
     {"https://www.google.com/maps/",              "https://www.google.com/maps/"},
     {"https://www.google.com/index.html",         "https://www.google.com/"},
     {"https://www.google.com/index.html?q=maps",  "https://www.google.com/"},
     {"https://www.google.com/index.html#maps/",   "https://www.google.com/"},
     {"https://foo:bar@www.google.com/maps.htm",   "https://foo:bar@www.google.com/"},
     {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"},
     {"https://www.google.com/maps/au/north",      "https://www.google.com/maps/au/"},
     {"https://www.google.com/maps/au/north/",     "https://www.google.com/maps/au/north/"},
     {"https://www.google.com/maps/au/index.html?q=maps#fragment/",     "https://www.google.com/maps/au/"},
     {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"},
     {"https://www.google.com/maps/au/north/?q=maps#fragment",          "https://www.google.com/maps/au/north/"},
     {"https://www.google.com/maps/au/north?q=maps#fragment",           "https://www.google.com/maps/au/"},
     // Less common standard URLs.
     {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
     {"file:///temporary/bar.html?baz=22","file:///temporary/"},
     {"ftp://foo/test/index.html",        "ftp://foo/test/"},
     {"gopher://foo/test/index.html",     "gopher://foo/test/"},
     {"ws://foo/test/index.html",         "ws://foo/test/"},
     // Non-standard, hierarchical URLs.
     {"chrome://foo/bar.html", "chrome://foo/"},
     {"httpa://foo/test/index.html", "httpa://foo/test/"},
     // Non-standard, non-hierarchical URLs.
     {"blob:https://foo.bar/test/index.html", ""},
     {"about:blank", ""},
     {"data:foobar", ""},
     {"scheme:opaque_data", ""},
     // Invalid URLs.
     {"foobar", ""},
   };

   for (size_t i = 0; i < std::size(cases); i++) {
     GURL url(cases[i].input);
     GURL without_filename = url.GetWithoutFilename();
     EXPECT_EQ(cases[i].expected, without_filename.spec()) << i;
   }
 }

 TEST(GURLTest, Replacements) {
   // The URL canonicalizer replacement test will handle most of these case.
   // The most important thing to do here is to check that the proper
   // canonicalizer gets called based on the scheme of the input.
   struct ReplaceCase {
     using ApplyReplacementsFunc = GURL(const GURL&);

     const char* base;
     ApplyReplacementsFunc* apply_replacements;
     const char* expected;
   } replace_cases[] = {
       {.base = "http://www.google.com/foo/bar.html?foo#bar",
        .apply_replacements =
            +[](const GURL& url) {
              GURL::Replacements replacements;
              replacements.SetPathStr("/");
              replacements.ClearQuery();
              replacements.ClearRef();
              return url.ReplaceComponents(replacements);
            },
        .expected = "http://www.google.com/"},
       {.base = "http://www.google.com/foo/bar.html?foo#bar",
        .apply_replacements =
            +[](const GURL& url) {
              GURL::Replacements replacements;
              replacements.SetSchemeStr("javascript");
              replacements.ClearUsername();
              replacements.ClearPassword();
              replacements.ClearHost();
              replacements.ClearPort();
              replacements.SetPathStr("window.open('foo');");
              replacements.ClearQuery();
              replacements.ClearRef();
              return url.ReplaceComponents(replacements);
            },
        .expected = "javascript:window.open('foo');"},
       {.base = "file:///C:/foo/bar.txt",
        .apply_replacements =
            +[](const GURL& url) {
              GURL::Replacements replacements;
              replacements.SetSchemeStr("http");
              replacements.SetHostStr("www.google.com");
              replacements.SetPortStr("99");
              replacements.SetPathStr("/foo");
              replacements.SetQueryStr("search");
              replacements.SetRefStr("ref");
              return url.ReplaceComponents(replacements);
            },
        .expected = "http://www.google.com:99/foo?search#ref"},
 #ifdef WIN32
       {.base = "http://www.google.com/foo/bar.html?foo#bar",
        .apply_replacements =
            +[](const GURL& url) {
              GURL::Replacements replacements;
              replacements.SetSchemeStr("file");
              replacements.ClearUsername();
              replacements.ClearPassword();
              replacements.ClearHost();
              replacements.ClearPort();
              replacements.SetPathStr("c:\\");
              replacements.ClearQuery();
              replacements.ClearRef();
              return url.ReplaceComponents(replacements);
            },
        .expected = "file:///C:/"},
 #endif
       {.base = "filesystem:http://www.google.com/foo/bar.html?foo#bar",
        .apply_replacements =
            +[](const GURL& url) {
              GURL::Replacements replacements;
              replacements.SetPathStr("/");
              replacements.ClearQuery();
              replacements.ClearRef();
              return url.ReplaceComponents(replacements);
            },
        .expected = "filesystem:http://www.google.com/foo/"},
       // Lengthen the URL instead of shortening it, to test creation of
       // inner_url.
       {.base = "filesystem:http://www.google.com/foo/",
        .apply_replacements =
            +[](const GURL& url) {
              GURL::Replacements replacements;
              replacements.SetPathStr("bar.html");
              replacements.SetQueryStr("foo");
              replacements.SetRefStr("bar");
              return url.ReplaceComponents(replacements);
            },
        .expected = "filesystem:http://www.google.com/foo/bar.html?foo#bar"},
   };

   for (const ReplaceCase& c : replace_cases) {
     GURL output = c.apply_replacements(GURL(c.base));

     EXPECT_EQ(c.expected, output.spec());

     EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
     if (output.SchemeIsFileSystem()) {
       // TODO(mmenke): inner_url()->spec() is currently the same as the spec()
       // for the GURL itself.  This should be fixed.
       // See https://crbug.com/619596
       EXPECT_EQ(c.expected, output.inner_url()->spec());
     }
   }
 }

 TEST(GURLTest, ClearFragmentOnDataUrl) {
   // http://crbug.com/291747 - a data URL may legitimately have trailing
   // whitespace in the spec after the ref is cleared. Test this does not trigger
   // the Parsed importing validation GURL_DCHECK in GURL.
   GURL url(" data: one # two ");
   EXPECT_TRUE(url.is_valid());

   // By default the trailing whitespace will have been stripped.
   EXPECT_EQ("data: one #%20two", url.spec());

   // Clear the URL's ref and observe the trailing whitespace.
   GURL::Replacements repl;
   repl.ClearRef();
   GURL url_no_ref = url.ReplaceComponents(repl);
   EXPECT_TRUE(url_no_ref.is_valid());
   EXPECT_EQ("data: one ", url_no_ref.spec());

   // Importing a parsed URL via this constructor overload will retain trailing
   // whitespace.
   GURL import_url(url_no_ref.spec(),
                   url_no_ref.parsed_for_possibly_invalid_spec(),
                   url_no_ref.is_valid());
   EXPECT_TRUE(import_url.is_valid());
   EXPECT_EQ(url_no_ref, import_url);
   EXPECT_EQ("data: one ", import_url.spec());
   EXPECT_EQ(" one ", import_url.path());

   // For completeness, test that re-parsing the same URL rather than importing
   // it trims the trailing whitespace.
   GURL reparsed_url(url_no_ref.spec());
   EXPECT_TRUE(reparsed_url.is_valid());
   EXPECT_EQ("data: one", reparsed_url.spec());
 }

 TEST(GURLTest, PathForRequest) {
   struct TestCase {
     const char* input;
     const char* expected;
     const char* inner_expected;
   } cases[] = {
       {"http://www.google.com", "/", nullptr},
       {"http://www.google.com/", "/", nullptr},
       {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22",
        nullptr},
       {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", nullptr},
       {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query",
        nullptr},
       {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref",
        "/foo/bar.html?query", "/temporary"},
       {"filesystem:http://www.google.com/temporary/foo/bar.html?query",
        "/foo/bar.html?query", "/temporary"},
   };

   for (size_t i = 0; i < std::size(cases); i++) {
     GURL url(cases[i].input);
     EXPECT_EQ(cases[i].expected, url.PathForRequest());
     EXPECT_EQ(cases[i].expected, url.PathForRequestPiece());
     EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
     if (url.inner_url() && cases[i].inner_expected) {
       EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
       EXPECT_EQ(cases[i].inner_expected,
                 url.inner_url()->PathForRequestPiece());
     }
   }
 }

 TEST(GURLTest, EffectiveIntPort) {
   struct PortTest {
     const char* spec;
     int expected_int_port;
   } port_tests[] = {
     // http
     {"http://www.google.com/", 80},
     {"http://www.google.com:80/", 80},
     {"http://www.google.com:443/", 443},

     // https
     {"https://www.google.com/", 443},
     {"https://www.google.com:443/", 443},
     {"https://www.google.com:80/", 80},

     // ftp
     {"ftp://www.google.com/", 21},
     {"ftp://www.google.com:21/", 21},
     {"ftp://www.google.com:80/", 80},

     // file - no port
     {"file://www.google.com/", PORT_UNSPECIFIED},
     {"file://www.google.com:443/", PORT_UNSPECIFIED},

     // data - no port
     {"data:www.google.com:90", PORT_UNSPECIFIED},
     {"data:www.google.com", PORT_UNSPECIFIED},

     // filesystem - no port
     {"filesystem:http://www.google.com:90/t/foo", PORT_UNSPECIFIED},
     {"filesystem:file:///t/foo", PORT_UNSPECIFIED},
   };

   for (size_t i = 0; i < std::size(port_tests); i++) {
     GURL url(port_tests[i].spec);
     EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
   }
 }

 TEST(GURLTest, IPAddress) {
   struct IPTest {
     const char* spec;
     bool expected_ip;
   } ip_tests[] = {
     {"http://www.google.com/", false},
     {"http://192.168.9.1/", true},
     {"http://192.168.9.1.2/", false},
     {"http://192.168.m.1/", false},
     {"http://2001:db8::1/", false},
     {"http://[2001:db8::1]/", true},
     {"", false},
     {"some random input!", false},
   };

   for (size_t i = 0; i < std::size(ip_tests); i++) {
     GURL url(ip_tests[i].spec);
     EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
   }
 }

 TEST(GURLTest, HostNoBrackets) {
   struct TestCase {
     const char* input;
     const char* expected_host;
     const char* expected_plainhost;
   } cases[] = {
     {"http://www.google.com", "www.google.com", "www.google.com"},
     {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
     {"http://[::]/", "[::]", "::"},

     // Don't require a valid URL, but don't crash either.
     {"http://[]/", "[]", ""},
     {"http://[x]/", "[x]", "x"},
     {"http://[x/", "[x", "[x"},
     {"http://x]/", "x]", "x]"},
     {"http://[/", "[", "["},
     {"http://]/", "]", "]"},
     {"", "", ""},
   };
   for (size_t i = 0; i < std::size(cases); i++) {
     GURL url(cases[i].input);
     EXPECT_EQ(cases[i].expected_host, url.host());
     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBracketsPiece());
   }
 }

 TEST(GURLTest, DomainIs) {
   GURL url_1("http://google.com/foo");
   EXPECT_TRUE(url_1.DomainIs("google.com"));

   // Subdomain and port are ignored.
   GURL url_2("http://www.google.com:99/foo");
   EXPECT_TRUE(url_2.DomainIs("google.com"));

   // Different top-level domain.
   GURL url_3("http://www.google.com.cn/foo");
   EXPECT_FALSE(url_3.DomainIs("google.com"));

   // Different host name.
   GURL url_4("http://www.iamnotgoogle.com/foo");
   EXPECT_FALSE(url_4.DomainIs("google.com"));

   // The input must be lower-cased otherwise DomainIs returns false.
   GURL url_5("http://www.google.com/foo");
   EXPECT_FALSE(url_5.DomainIs("Google.com"));

   // If the URL is invalid, DomainIs returns false.
   GURL invalid_url("google.com");
   EXPECT_FALSE(invalid_url.is_valid());
   EXPECT_FALSE(invalid_url.DomainIs("google.com"));

   GURL url_with_escape_chars("https://www.,.test");
   EXPECT_TRUE(url_with_escape_chars.is_valid());
   EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test");
   EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test"));
 }

 TEST(GURLTest, DomainIsTerminatingDotBehavior) {
   // If the host part ends with a dot, it matches input domains
   // with or without a dot.
   GURL url_with_dot("http://www.google.com./foo");
   EXPECT_TRUE(url_with_dot.DomainIs("google.com"));
   EXPECT_TRUE(url_with_dot.DomainIs("google.com."));
   EXPECT_TRUE(url_with_dot.DomainIs(".com"));
   EXPECT_TRUE(url_with_dot.DomainIs(".com."));

   // But, if the host name doesn't end with a dot and the input
   // domain does, then it's considered to not match.
   GURL url_without_dot("http://google.com/foo");
   EXPECT_FALSE(url_without_dot.DomainIs("google.com."));

   // If the URL ends with two dots, it doesn't match.
   GURL url_with_two_dots("http://www.google.com../foo");
   EXPECT_FALSE(url_with_two_dots.DomainIs("google.com"));
 }

 TEST(GURLTest, DomainIsWithFilesystemScheme) {
   GURL url_1("filesystem:http://www.google.com:99/foo/");
   EXPECT_TRUE(url_1.DomainIs("google.com"));

   GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/");
   EXPECT_FALSE(url_2.DomainIs("google.com"));
 }

 // Newlines should be stripped from inputs.
 TEST(GURLTest, Newlines) {
   // Constructor.
   GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
   EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
   EXPECT_FALSE(
       url_1.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

   // Relative path resolver.
   GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
   EXPECT_EQ("http://www.google.com/foo", url_2.spec());
   EXPECT_FALSE(
       url_2.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

   // Constructor.
   GURL url_3(" \t ht\ntp://\twww.goo\rgle.com/as\ndf< \n ");
   EXPECT_EQ("http://www.google.com/asdf%3C", url_3.spec());
   EXPECT_TRUE(
       url_3.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

   // Relative path resolver.
   GURL url_4 = url_1.Resolve(" \n /fo\to<\r ");
   EXPECT_EQ("http://www.google.com/foo%3C", url_4.spec());
   EXPECT_TRUE(
       url_4.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

   // Note that newlines are NOT stripped from ReplaceComponents.
 }

 TEST(GURLTest, IsStandard) {
   GURL a("http:foo/bar");
   EXPECT_TRUE(a.IsStandard());

   GURL b("foo:bar/baz");
   EXPECT_FALSE(b.IsStandard());

   GURL c("foo://bar/baz");
   EXPECT_FALSE(c.IsStandard());

   GURL d("cid:bar@baz");
   EXPECT_FALSE(d.IsStandard());
 }

 TEST(GURLTest, SchemeIsHTTPOrHTTPS) {
   EXPECT_TRUE(GURL("http://bar/").SchemeIsHTTPOrHTTPS());
   EXPECT_TRUE(GURL("HTTPS://BAR").SchemeIsHTTPOrHTTPS());
   EXPECT_FALSE(GURL("ftp://bar/").SchemeIsHTTPOrHTTPS());
 }

 TEST(GURLTest, SchemeIsWSOrWSS) {
   EXPECT_TRUE(GURL("WS://BAR/").SchemeIsWSOrWSS());
   EXPECT_TRUE(GURL("wss://bar/").SchemeIsWSOrWSS());
   EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
 }

 TEST(GURLTest, SchemeIsCryptographic) {
   EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic());
   EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic());
   EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic());

   EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic());
   EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic());
   EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic());

   EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic());
   EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
 }

 TEST(GURLTest, SchemeIsCryptographicStatic) {
   EXPECT_TRUE(GURL::SchemeIsCryptographic("https"));
   EXPECT_TRUE(GURL::SchemeIsCryptographic("wss"));
   EXPECT_FALSE(GURL::SchemeIsCryptographic("http"));
   EXPECT_FALSE(GURL::SchemeIsCryptographic("ws"));
   EXPECT_FALSE(GURL::SchemeIsCryptographic("ftp"));
 }

 TEST(GURLTest, SchemeIsBlob) {
   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
   EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
   EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
 }

 TEST(GURLTest, SchemeIsLocal) {
   EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsLocal());
   EXPECT_TRUE(GURL("blob://bar/").SchemeIsLocal());
   EXPECT_TRUE(GURL("DATA:TEXT/HTML,BAR").SchemeIsLocal());
   EXPECT_TRUE(GURL("data:text/html,bar").SchemeIsLocal());
   EXPECT_TRUE(GURL("ABOUT:BAR").SchemeIsLocal());
   EXPECT_TRUE(GURL("about:bar").SchemeIsLocal());
   EXPECT_TRUE(GURL("FILESYSTEM:HTTP://FOO.EXAMPLE/BAR").SchemeIsLocal());
   EXPECT_TRUE(GURL("filesystem:http://foo.example/bar").SchemeIsLocal());

   EXPECT_FALSE(GURL("http://bar/").SchemeIsLocal());
   EXPECT_FALSE(GURL("file:///bar").SchemeIsLocal());
 }

 // Tests that the 'content' of the URL is properly extracted. This can be
 // complex in cases such as multiple schemes (view-source:http:) or for
 // javascript URLs. See GURL::GetContent for more details.
 TEST(GURLTest, ContentForNonStandardURLs) {
   struct TestCase {
     const char* url;
     const char* expected;
   } cases[] = {
       {"null", ""},
       {"not-a-standard-scheme:this is arbitrary content",
        "this is arbitrary content"},

       // When there are multiple schemes, only the first is excluded from the
       // content. Note also that for e.g. 'http://', the '//' is part of the
       // content not the scheme.
       {"view-source:http://example.com/path", "http://example.com/path"},
       {"blob:http://example.com/GUID", "http://example.com/GUID"},
       {"blob://http://example.com/GUID", "//http://example.com/GUID"},
       {"blob:http://user:password@example.com/GUID",
        "http://user:password@example.com/GUID"},

       // The octothorpe character ('#') marks the end of the URL content, and
       // the start of the fragment. It should not be included in the content.
       {"http://www.example.com/GUID#ref", "www.example.com/GUID"},
       {"http://me:secret@example.com/GUID/#ref", "me:secret@example.com/GUID/"},
       {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
        "text/html,Question?%3Cdiv%20style=%22color:%20"},

       // TODO(mkwst): This seems like a bug. https://crbug.com/513600
       {"filesystem:http://example.com/path", "/"},

       // Javascript URLs include '#' symbols in their content.
       {"javascript:#", "#"},
       {"javascript:alert('#');", "alert('#');"},
   };

   for (const auto& test : cases) {
     GURL url(test.url);
     EXPECT_EQ(test.expected, url.GetContent()) << test.url;
   }
 }

 // Tests that the URL path is properly extracted for unusual URLs. This can be
 // complex in cases such as multiple schemes (view-source:http:) or when
 // octothorpes ('#') are involved.
 TEST(GURLTest, PathForNonStandardURLs) {
   struct TestCase {
     const char* url;
     const char* expected;
   } cases[] = {
       {"null", ""},
       {"not-a-standard-scheme:this is arbitrary content",
        "this is arbitrary content"},
       {"view-source:http://example.com/path", "http://example.com/path"},
       {"blob:http://example.com/GUID", "http://example.com/GUID"},
       {"blob://http://example.com/GUID", "//http://example.com/GUID"},
       {"blob:http://user:password@example.com/GUID",
        "http://user:password@example.com/GUID"},

       {"http://www.example.com/GUID#ref", "/GUID"},
       {"http://me:secret@example.com/GUID/#ref", "/GUID/"},
       {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
        "text/html,Question"},

       // TODO(mkwst): This seems like a bug. https://crbug.com/513600
       {"filesystem:http://example.com/path", "/"},
   };

   for (const auto& test : cases) {
     GURL url(test.url);
     EXPECT_EQ(test.expected, url.path()) << test.url;
   }
 }

 TEST(GURLTest, EqualsIgnoringRef) {
   const struct {
     const char* url_a;
     const char* url_b;
     bool are_equals;
   } kTestCases[] = {
       // No ref.
       {"http://a.com", "http://a.com", true},
       {"http://a.com", "http://b.com", false},

       // Same Ref.
       {"http://a.com#foo", "http://a.com#foo", true},
       {"http://a.com#foo", "http://b.com#foo", false},

       // Different Refs.
       {"http://a.com#foo", "http://a.com#bar", true},
       {"http://a.com#foo", "http://b.com#bar", false},

       // One has a ref, the other doesn't.
       {"http://a.com#foo", "http://a.com", true},
       {"http://a.com#foo", "http://b.com", false},

       // Empty refs.
       {"http://a.com#", "http://a.com#", true},
       {"http://a.com#", "http://a.com", true},

       // URLs that differ only by their last character.
       {"http://aaa", "http://aab", false},
       {"http://aaa#foo", "http://aab#foo", false},

       // Different size of the part before the ref.
       {"http://123#a", "http://123456#a", false},

       // Blob URLs
       {"blob:http://a.com#foo", "blob:http://a.com#foo", true},
       {"blob:http://a.com#foo", "blob:http://a.com#bar", true},
       {"blob:http://a.com#foo", "blob:http://b.com#bar", false},

       // Filesystem URLs
       {"filesystem:http://a.com#foo", "filesystem:http://a.com#foo", true},
       {"filesystem:http://a.com#foo", "filesystem:http://a.com#bar", true},
       {"filesystem:http://a.com#foo", "filesystem:http://b.com#bar", false},

       // Data URLs
       {"data:text/html,a#foo", "data:text/html,a#bar", true},
       {"data:text/html,a#foo", "data:text/html,a#foo", true},
       {"data:text/html,a#foo", "data:text/html,b#foo", false},
   };

   for (const auto& test_case : kTestCases) {
     SCOPED_TRACE(testing::Message()
                  << std::endl
                  << "url_a = " << test_case.url_a << std::endl
                  << "url_b = " << test_case.url_b << std::endl);
     // A versus B.
     EXPECT_EQ(test_case.are_equals,
               GURL(test_case.url_a).EqualsIgnoringRef(GURL(test_case.url_b)));
     // B versus A.
     EXPECT_EQ(test_case.are_equals,
               GURL(test_case.url_b).EqualsIgnoringRef(GURL(test_case.url_a)));
   }
 }

 TEST(GURLTest, DebugAlias) {
   GURL url("https://foo.com/bar");
   DEBUG_ALIAS_FOR_GURL(url_debug_alias, url);
   EXPECT_STREQ("https://foo.com/bar", url_debug_alias);
 }

 TEST(GURLTest, InvalidHost) {
   // This contains an invalid percent escape (%T%) and also a valid
   // percent escape that's not 7-bit ascii (%ae), so that the unescaped
   // host contains both an invalid percent escape and invalid UTF-8.
   GURL url("http://%T%Ae");

   EXPECT_FALSE(url.is_valid());
   EXPECT_TRUE(url.SchemeIs(url::kHttpScheme));

   // The invalid percent escape becomes an escaped percent sign (%25), and the
   // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as
   // UTF-8.
   EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD");
 }

 TEST(GURLTest, PortZero) {
   GURL port_zero_url("http://127.0.0.1:0/blah");

   // https://url.spec.whatwg.org/#port-state says that the port 1) consists of
   // ASCII digits (this excludes negative numbers) and 2) cannot be greater than
   // 2^16-1.  This means that port=0 should be valid.
   EXPECT_TRUE(port_zero_url.is_valid());
   EXPECT_EQ("0", port_zero_url.port());
   EXPECT_EQ("127.0.0.1", port_zero_url.host());
   EXPECT_EQ("http", port_zero_url.scheme());

   // https://crbug.com/1065532: SchemeHostPort would previously incorrectly
   // consider port=0 to be invalid.
   SchemeHostPort scheme_host_port(port_zero_url);
   EXPECT_TRUE(scheme_host_port.IsValid());
   EXPECT_EQ(port_zero_url.scheme(), scheme_host_port.scheme());
   EXPECT_EQ(port_zero_url.host(), scheme_host_port.host());
   EXPECT_EQ(port_zero_url.port(),
             gurl_base::NumberToString(scheme_host_port.port()));

   // https://crbug.com/1065532: The SchemeHostPort problem above would lead to
   // bizarre results below - resolved origin would incorrectly be returned as an
   // opaque origin derived from |another_origin|.
   url::Origin another_origin = url::Origin::Create(GURL("http://other.com"));
   url::Origin resolved_origin =
       url::Origin::Resolve(port_zero_url, another_origin);
   EXPECT_FALSE(resolved_origin.opaque());
   EXPECT_EQ(port_zero_url.scheme(), resolved_origin.scheme());
   EXPECT_EQ(port_zero_url.host(), resolved_origin.host());
   EXPECT_EQ(port_zero_url.port(), gurl_base::NumberToString(resolved_origin.port()));

   // port=0 and default HTTP port are different.
   GURL default_port("http://127.0.0.1/foo");
   EXPECT_EQ(0, SchemeHostPort(port_zero_url).port());
   EXPECT_EQ(80, SchemeHostPort(default_port).port());
   url::Origin default_port_origin = url::Origin::Create(default_port);
   EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin));
 }

 class GURLTestTraits {
  public:
   using UrlType = GURL;

   static UrlType CreateUrlFromString(gurl_base::StringPiece s) { return GURL(s); }
   static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
   static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }

   // Only static members.
   GURLTestTraits() = delete;
 };

 INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits);

 }  // namespace url