Update googleurl to the latest version from Chromium
Version used is 222c2227273db27cb5724809c986923147fab4bd from
Tue Nov 2 18:13:07 2021 +0000.
Change-Id: I9a0f35bb724137f99bc3b76439055fa6cc80aef4
diff --git a/AUTHORS b/AUTHORS
index 1aa2922..012e246 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -18,6 +18,7 @@
Aaryaman Vasishta <jem456.vasishta@gmail.com>
Abdu Ameen <abdu.ameen000@gmail.com>
Abhijeet Kandalkar <abhijeet.k@samsung.com>
+Abhinav Vij <abhinav.vij@samsung.com>
Abhishek Agarwal <abhishek.a21@samsung.com>
Abhishek Kanike <abhishek.ka@samsung.com>
Abhishek Singh <abhi.rathore@samsung.com>
@@ -31,6 +32,7 @@
Adam Yi <i@adamyi.com>
Addanki Gandhi Kishor <kishor.ag@samsung.com>
Adenilson Cavalcanti <a.cavalcanti@samsung.com>
+Aditi Singh <a20.singh@samsung.com>
Aditya Bhargava <heuristicist@gmail.com>
Adrian Belgun <adrian.belgun@intel.com>
Adrian Ratiu <adrian.ratiu@collabora.corp-partner.google.com>
@@ -42,6 +44,7 @@
Akos Kiss <akiss@inf.u-szeged.hu>
Aku Kotkavuo <a.kotkavuo@partner.samsung.com>
Aldo Culquicondor <alculquicondor@gmail.com>
+Alec Petridis <alecthechop@gmail.com>
Aleksandar Stojiljkovic <aleksandar.stojiljkovic@intel.com>
Aleksei Gurianov <gurianov@gmail.com>
Alex Chronopoulos <achronop@gmail.com>
@@ -89,6 +92,7 @@
Andrei Borza <andrei.borza@gmail.com>
Andrei Parvu <andrei.prv@gmail.com>
Andrei Parvu <parvu@adobe.com>
+Andreu Botella <andreu@andreubotella.com>
Andrew Boyarshin <andrew.boyarshin@gmail.com>
Andrew Brampton <me@bramp.net>
Andrew Brindamour <abrindamour@bluejeans.com>
@@ -103,6 +107,7 @@
Ankur Verma <ankur1.verma@samsung.com>
Anna Henningsen <anna@addaleax.net>
Anne Kao <annekao94@gmail.com>
+Anshul Jain <anshul.jain@samsung.com>
Anssi Hannula <anssi.hannula@iki.fi>
Anthony Halliday <anth.halliday12@gmail.com>
Anton Bershanskiy <bershanskiy@pm.me>
@@ -127,7 +132,7 @@
Asami Doi <d0iasm.pub@gmail.com>
Ashish Kumar Gupta <guptaag@amazon.com>
Ashlin Joseph <ashlin.j@samsung.com>
-Ashutosh <coder.commando@gmail.com>
+Ashutosh <codingtosh@gmail.com>
Asish Singh <asish.singh@samsung.com>
Attila Dusnoki <dati91@gmail.com>
Avinaash Doreswamy <avi.nitk@samsung.com>
@@ -204,6 +209,7 @@
Chaobin Zhang <zhchbin@gmail.com>
Charles Vaughn <cvaughn@gmail.com>
Cheng Zhao <zcbenz@gmail.com>
+Cheng Yu <yuzichengcode@gmail.com>
Choongwoo Han <cwhan.tunz@gmail.com>
Chris Greene <cwgreene@amazon.com>
Chris Harrelson <chrishtr@gmail.com>
@@ -271,12 +277,14 @@
Deepak Mohan <hop2deep@gmail.com>
Deepak Sharma <deepak.sharma@amd.com>
Deepak Singla <deepak.s@samsung.com>
+Deniz Eren Evrendilek <devrendilek@gmail.com>
Deokjin Kim <deokjin81.kim@samsung.com>
Derek Halman <d.halman@gmail.com>
Devlin Cronin <rdevlin.cronin@gmail.com>
Dhi Aurrahman <dio@rockybars.com>
Di Wu <meetwudi@gmail.com>
Diana Suvorova <diana.suvorova@gmail.com>
+Diego Fernández Santos <agujaydedal@gmail.com>
Diego Ferreiro Val <elfogris@gmail.com>
Dillon Sellars <dill.sellars@gmail.com>
Divya Bansal <divya.bansal@samsung.com>
@@ -309,6 +317,7 @@
Eero Häkkinen <eero.hakkinen@intel.com>
Egor Starkov <egor.starkov@samsung.com>
Ehsan Akhgari <ehsan.akhgari@gmail.com>
+Ehsan Akhgari <ehsan@mightyapp.com>
Elan Ruusamäe <elan.ruusamae@gmail.com>
Ergun Erdogmus <erdogmusergun@gmail.com>
Eric Ahn <byungwook.ahn@gmail.com>
@@ -337,6 +346,7 @@
Fernando Jiménez Moreno <ferjmoreno@gmail.com>
Finbar Crago <finbar.crago@gmail.com>
François Beaufort <beaufort.francois@gmail.com>
+François Devatine <devatine@verizonmedia.com>
Francois Kritzinger <francoisk777@gmail.com>
Francois Marier <francois@brave.com>
Francois Rauch <leopardb@gmail.com>
@@ -416,7 +426,9 @@
Hwanseung Lee <hs1217.lee@samsung.com>
Hyemi Shin <hyemi.sin@samsung.com>
HyeockJin Kim <kherootz@gmail.com>
+Hyojeong Kim <42.4.hyojekim@gmail.com>
Hyungchan Kim <inlinechan@gmail.com>
+Hyungun Kim <khw3754@gmail.com>
Hyungwook Lee <hyungwook.lee@navercorp.com>
Hyungwook Lee <withlhw@gmail.com>
HyunJi Kim <hjkim3323@gmail.com>
@@ -451,6 +463,7 @@
Jaeseok Yoon <yjaeseok@gmail.com>
Jaewon Choi <jaewon.james.choi@gmail.com>
Jaeyong Bae <jdragon.bae@gmail.com>
+Jagdish Chourasia <jagdish.c@samsung.com>
Jaime Soriano Pastor <jsorianopastor@gmail.com>
Jake Helfert <jake@helfert.us>
Jake Hendy <me@jakehendy.com>
@@ -477,6 +490,7 @@
Jeado Ko <haibane84@gmail.com>
Jeffrey C <jeffreyca16@gmail.com>
Jeffrey Yeung <jeffrey.yeung@poly.com>
+Jeong A Shin <jeonga@khu.ac.kr>
Jeongeun Kim <je_julie.kim@samsung.com>
Jeongmin Kim <kimwjdalsl@gmail.com>
Jeongwoo Park <jwoo.park@navercorp.com>
@@ -558,6 +572,7 @@
Joyer Huang <collger@gmail.com>
Juan Cruz Viotti <jv@jviotti.com>
Juan Jose Lopez Jaimez <jj.lopezjaimez@gmail.com>
+Juba Borgohain <chromiumjuba@gmail.com>
Juhui Lee <juhui24.lee@samsung.com>
Julian Geppert <spctstr@gmail.com>
Julien Brianceau <jbriance@cisco.com>
@@ -565,6 +580,7 @@
Julien Racle <jracle@logitech.com>
Jun Fang <jun_fang@foxitsoftware.com>
Jun Jiang <jun.a.jiang@intel.com>
+Jungchang Park <valley84265@gmail.com>
Junchao Han <junchao.han@intel.com>
Junghoon Lee <sjh836@gmail.com>
Junghyuk Yoo <wjdgurdl272@gmail.com>
@@ -595,6 +611,7 @@
Kaustubh Atrawalkar <kaustubh.ra@gmail.com>
Ke He <ke.he@intel.com>
Keene Pan <keenepan@linpus.com>
+Keiichiro Nagashima <n4ag3a2sh1i@gmail.com>
Keita Suzuki <keitasuzuki.park@gmail.com>
Keita Yoshimoto <y073k3@gmail.com>
Keith Chen <keitchen@amazon.com>
@@ -603,6 +620,7 @@
Kenneth Zhou <knthzh@gmail.com>
Kenny Levinsen <kl@kl.wtf>
Keonho Kim <keonho07.kim@samsung.com>
+Ketan Atri <ketan.atri@samsung.com>
Ketan Goyal <ketan.goyal@samsung.com>
Kevin Gibbons <bakkot@gmail.com>
Kevin Lee Helpingstine <sig11@reprehensible.net>
@@ -693,6 +711,7 @@
Marc des Garets <marc.desgarets@googlemail.com>
Marcin Wiacek <marcin@mwiacek.com>
Marco Rodrigues <gothicx@gmail.com>
+Marcos Caceres <marcos@marcosc.com>
Mariam Ali <alimariam@noogler.google.com>
Mario Pistrich <m.pistrich@gmail.com>
Mario Sanchez Prada <mario.prada@samsung.com>
@@ -736,6 +755,7 @@
Md. Hasanur Rashid <hasanur.r@samsung.com>
Md Jobed Hossain <jobed.h@samsung.com>
Md Sami Uddin <md.sami@samsung.com>
+Micha Hanselmann <micha.hanselmann@gmail.com>
Michael Cirone <mikecirone@gmail.com>
Michael Constant <mconst@gmail.com>
Michael Forney <mforney@mforney.org>
@@ -760,6 +780,7 @@
Milton Chiang <milton.chiang@mediatek.com>
Milutin Smiljanic <msmiljanic.gm@gmail.com>
Minchul Kang <tegongkang@gmail.com>
+Mingeun Park <mindal99546@gmail.com>
Minggang Wang <minggang.wang@intel.com>
Mingmin Xie <melvinxie@gmail.com>
Minjeong Kim <deoxyribonucleicacid150@gmail.com>
@@ -795,6 +816,7 @@
Naveen Kumar S G <naveensg@samsung.com>
Nayan Kumar K <qtc746@motorola.com>
Nayeem Hasan <nayeemhasan.nh01@gmail.com>
+Nayeon Kim <skdus3373@gmail.com>
Neal Gompa <ngompa13@gmail.com>
Ned Williamson <nedwilliamson@gmail.com>
Nedeljko Babic <nedeljko.babic@imgtec.com>
@@ -888,6 +910,7 @@
Raghavendra Ghatage <r.ghatage@samsung.com>
Raghu Ram Nagaraj <r.nagaraj@samsung.com>
Rahul Gupta <rahul.g@samsung.com>
+Rahul Yadav <rahul.yadav@samsung.com>
Rajesh Mahindra <rmahindra@uber.com>
Rajneesh Rana <rajneesh.r@samsung.com>
Raman Tenneti <raman.tenneti@gmail.com>
@@ -906,7 +929,7 @@
Réda Housni Alaoui <alaoui.rda@gmail.com>
Refael Ackermann <refack@gmail.com>
Rémi Arnaud <jsremi@gmail.com>
-Renata Hodovan <rhodovan.u-szeged@partner.samsung.com>
+Renata Hodovan <hodovan.renata@gmail.com>
Rene Bolldorf <rb@radix.io>
Rene Ladan <r.c.ladan@gmail.com>
Richard Baranyi <lordprotector@gmail.com>
@@ -931,6 +954,7 @@
Rosen Dash <rosen.dash@gmail.com>
Ross Kirsling <rkirsling@gmail.com>
Ross Wollman <ross.wollman@gmail.com>
+Ruan Beihong <ruanbeihong@gmail.com>
ruben <chromium@hybridsource.org>
Ruben Bridgewater <ruben@bridgewater.de>
Ruben Terrazas <rubentopo@gmail.com>
@@ -1019,6 +1043,7 @@
Shreyas Gopal <shreyas.g@samsung.com>
Shreyas VA <v.a.shreyas@gmail.com>
Shubham Agrawal <shubag@amazon.com>
+Shubham Gupta <shubh.gupta@samsung.com>
Siba Samal <siba.samal@samsung.com>
Siddharth Bagai <b.siddharth@samsung.com>
Siddharth Shankar <funkysidd@gmail.com>
@@ -1079,7 +1104,10 @@
Taeheon Kim <skyrabbits1@gmail.com>
Taeho Nam <thn7440@gmail.com>
Taehoon Lee <taylor.hoon@gmail.com>
+Taeseong Yu <yugeeklab@gmail.com>
+Taeyeon Kim <ssg9732@gmail.com>
Tae Shin <taeshindev@gmail.com>
+Takaaki Suzuki <takaakisuzuki.14@gmail.com>
Takashi Fujita <tgfjt.mail@gmail.com>
Takeshi Kurosawa <taken.spc@gmail.com>
Tanay Chowdhury <tanay.c@samsung.com>
@@ -1241,6 +1269,7 @@
Zoltan Kuscsik <zoltan.kuscsik@linaro.org>
Zsolt Borbely <zsborbely.u-szeged@partner.samsung.com>
方觉 (Fang Jue) <fangjue23303@gmail.com>
+迷渡 <justjavac@gmail.com>
# Please DO NOT APPEND here. See comments at the top of the file.
# END individuals section.
@@ -1296,6 +1325,8 @@
Pengutronix e.K. <*@pengutronix.de>
Rakuten Kobo Inc. <*@kobo.com>
Rakuten Kobo Inc. <*@rakuten.com>
+Red Hat Inc. <*@redhat.com>
+Semihalf <*@semihalf.com>
Seznam.cz, a.s. <*@firma.seznam.cz>
Slack Technologies Inc. <*@slack-corp.com>
Spotify AB <*@spotify.com>
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index 6651220..58a7d0d 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -51,7 +51,7 @@
// Annotate a function indicating it should not be inlined.
// Use like:
// NOINLINE void DoStuff() { ... }
-#if defined(COMPILER_GCC)
+#if defined(COMPILER_GCC) || defined(__clang__)
#define NOINLINE __attribute__((noinline))
#elif defined(COMPILER_MSVC)
#define NOINLINE __declspec(noinline)
diff --git a/base/containers/span.h b/base/containers/span.h
index d43814e..550eec8 100644
--- a/base/containers/span.h
+++ b/base/containers/span.h
@@ -19,7 +19,6 @@
#include "base/containers/contiguous_iterator.h"
#include "base/cxx17_backports.h"
#include "base/cxx20_to_address.h"
-#include "base/macros.h"
#include "base/template_util.h"
namespace gurl_base {
diff --git a/base/cxx17_backports.h b/base/cxx17_backports.h
index 77d689a..81e573f 100644
--- a/base/cxx17_backports.h
+++ b/base/cxx17_backports.h
@@ -10,8 +10,12 @@
#include <initializer_list>
#include <memory>
#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
#include "polyfills/base/check.h"
+#include "base/functional/invoke.h"
namespace gurl_base {
@@ -105,7 +109,27 @@
template <typename T>
constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
- return clamp(v, lo, hi, std::less<T>{});
+ return gurl_base::clamp(v, lo, hi, std::less<T>{});
+}
+
+// C++14 implementation of C++17's std::apply():
+// https://en.cppreference.com/w/cpp/utility/apply
+namespace internal {
+template <class F, class Tuple, std::size_t... I>
+constexpr decltype(auto) apply_impl(F&& f,
+ Tuple&& t,
+ std::index_sequence<I...>) {
+ return gurl_base::invoke(std::forward<F>(f),
+ std::get<I>(std::forward<Tuple>(t))...);
+}
+} // namespace internal
+
+template <class F, class Tuple>
+constexpr decltype(auto) apply(F&& f, Tuple&& t) {
+ return internal::apply_impl(
+ std::forward<F>(f), std::forward<Tuple>(t),
+ std::make_index_sequence<
+ std::tuple_size<std::remove_reference_t<Tuple>>::value>{});
}
} // namespace base
diff --git a/base/debug/leak_annotations.h b/base/debug/leak_annotations.h
index dc50246..b551552 100644
--- a/base/debug/leak_annotations.h
+++ b/base/debug/leak_annotations.h
@@ -5,7 +5,6 @@
#ifndef BASE_DEBUG_LEAK_ANNOTATIONS_H_
#define BASE_DEBUG_LEAK_ANNOTATIONS_H_
-#include "base/macros.h"
#include "build/build_config.h"
// This file defines macros which can be used to annotate intentional memory
@@ -26,9 +25,12 @@
class ScopedLeakSanitizerDisabler {
public:
ScopedLeakSanitizerDisabler() { __lsan_disable(); }
+
+ ScopedLeakSanitizerDisabler(const ScopedLeakSanitizerDisabler&) = delete;
+ ScopedLeakSanitizerDisabler& operator=(const ScopedLeakSanitizerDisabler&) =
+ delete;
+
~ScopedLeakSanitizerDisabler() { __lsan_enable(); }
- private:
- DISALLOW_COPY_AND_ASSIGN(ScopedLeakSanitizerDisabler);
};
#define ANNOTATE_SCOPED_MEMORY_LEAK \
diff --git a/base/strings/safe_sprintf.cc b/base/strings/safe_sprintf.cc
index 0569da1..c9a69ec 100644
--- a/base/strings/safe_sprintf.cc
+++ b/base/strings/safe_sprintf.cc
@@ -126,6 +126,9 @@
DEBUG_CHECK(size <= kSSizeMax);
}
+ Buffer(const Buffer&) = delete;
+ Buffer& operator=(const Buffer&) = delete;
+
~Buffer() {
// The code calling the constructor guaranteed that there was enough space
// to store a trailing NUL -- and in debug builds, we are actually
@@ -270,8 +273,6 @@
// was sufficiently big. This number always excludes the trailing NUL byte
// and it is guaranteed to never grow bigger than kSSizeMax-1.
size_t count_;
-
- DISALLOW_COPY_AND_ASSIGN(Buffer);
};
diff --git a/base/strings/safe_sprintf_unittest.cc b/base/strings/safe_sprintf_unittest.cc
index cde415d..be8af13 100644
--- a/base/strings/safe_sprintf_unittest.cc
+++ b/base/strings/safe_sprintf_unittest.cc
@@ -466,14 +466,17 @@
internal::SetSafeSPrintfSSizeMaxForTest(sz);
}
+ ScopedSafeSPrintfSSizeMaxSetter(const ScopedSafeSPrintfSSizeMaxSetter&) =
+ delete;
+ ScopedSafeSPrintfSSizeMaxSetter& operator=(
+ const ScopedSafeSPrintfSSizeMaxSetter&) = delete;
+
~ScopedSafeSPrintfSSizeMaxSetter() {
internal::SetSafeSPrintfSSizeMaxForTest(old_ssize_max_);
}
private:
size_t old_ssize_max_;
-
- DISALLOW_COPY_AND_ASSIGN(ScopedSafeSPrintfSSizeMaxSetter);
};
#endif
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index 8a22e8d..f01722d 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h
@@ -23,6 +23,7 @@
#include <stddef.h>
+#include <algorithm>
#include <iosfwd>
#include <limits>
#include <string>
diff --git a/build/build_config.h b/build/build_config.h
index daf51ff..63cec87 100644
--- a/build/build_config.h
+++ b/build/build_config.h
@@ -90,6 +90,8 @@
#define OS_AIX 1
#elif defined(__asmjs__) || defined(__wasm__)
#define OS_ASMJS 1
+#elif defined(__MVS__)
+#define OS_ZOS 1
#else
#error Please add support for your platform in build/build_config.h
#endif
@@ -112,7 +114,7 @@
defined(OS_FREEBSD) || defined(OS_IOS) || defined(OS_LINUX) || \
defined(OS_CHROMEOS) || defined(OS_MAC) || defined(OS_NACL) || \
defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_QNX) || \
- defined(OS_SOLARIS)
+ defined(OS_SOLARIS) || defined(OS_ZOS)
#define OS_POSIX 1
#endif
diff --git a/copy.bara.sky b/copy.bara.sky
index 33c0f00..ffe4f61 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -64,6 +64,7 @@
"base/debug/alias.h",
"base/export_template.h",
"base/logging.h",
+ "base/metrics/histogram_macros.h",
"base/notreached.h",
"base/trace_event/memory_usage_estimator.h",
"third_party/perfetto/include/perfetto/tracing/traced_value.h",
diff --git a/polyfills/base/metrics/histogram_macros.h b/polyfills/base/metrics/histogram_macros.h
index 127c53c..efd8cff 100644
--- a/polyfills/base/metrics/histogram_macros.h
+++ b/polyfills/base/metrics/histogram_macros.h
@@ -6,6 +6,7 @@
#define POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_
#define UMA_HISTOGRAM_ENUMERATION(name, ...) do {} while(false)
+#define UMA_HISTOGRAM_BOOLEAN(name, ...) do {} while(false)
#endif /* POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_ */
diff --git a/url/gurl.cc b/url/gurl.cc
index 18a46f1..474919a 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -238,7 +238,7 @@
output.Complete();
- ProcessFileOrFileSystemURLAfterReplaceComponents(result);
+ result.ProcessFileSystemURLAfterReplaceComponents();
return result;
}
@@ -258,41 +258,28 @@
output.Complete();
- ProcessFileOrFileSystemURLAfterReplaceComponents(result);
+ result.ProcessFileSystemURLAfterReplaceComponents();
return result;
}
-void GURL::ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const {
- if (!url.is_valid_)
+void GURL::ProcessFileSystemURLAfterReplaceComponents() {
+ if (!is_valid_)
return;
- if (url.SchemeIsFileSystem()) {
- url.inner_url_ =
- std::make_unique<GURL>(url.spec_.data(), url.parsed_.Length(),
- *url.parsed_.inner_parsed(), true);
+ if (SchemeIsFileSystem()) {
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
-#ifdef WIN32
- if (url.SchemeIsFile()) {
- // On Win32, some file URLs created through ReplaceComponents used to lose
- // its hostname after getting reparsed (e.g. when it's sent through IPC) due
- // to special handling of file URLs with Windows-drive paths in the URL
- // parser. To make the behavior for URLs modified through ReplaceComponents
- // (instead of getting fully reparsed) the same, immediately reparse the
- // URL here to trigger the special handling.
- // See https://crbug.com/1214098.
- url = GURL(url.spec());
- }
-#endif
}
-GURL GURL::GetOrigin() const {
+GURL GURL::DeprecatedGetOriginAsURL() const {
// This doesn't make sense for invalid or nonstandard URLs, so return
// the empty URL.
if (!is_valid_ || !IsStandard())
return GURL();
if (SchemeIsFileSystem())
- return inner_url_->GetOrigin();
+ return inner_url_->DeprecatedGetOriginAsURL();
url::Replacements<char> replacements;
replacements.ClearUsername();
diff --git a/url/gurl.h b/url/gurl.h
index c70c5a4..64a2ee3 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -197,7 +197,13 @@
//
// It is an error to get the origin of an invalid URL. The result
// will be the empty URL.
- GURL GetOrigin() const;
+ //
+ // WARNING: Please avoid converting urls into origins if at all possible!
+ // //docs/security/origin-vs-url.md is a list of gotchas that can result. Such
+ // conversions will likely return a wrong result for about:blank and/or
+ // in the presence of iframe.sandbox attribute. Prefer to get origins directly
+ // from the source (e.g. RenderFrameHost::GetLastCommittedOrigin).
+ GURL DeprecatedGetOriginAsURL() const;
// A helper function to return a GURL stripped from the elements that are not
// supposed to be sent as HTTP referrer: username, password and ref fragment.
@@ -468,7 +474,7 @@
return gurl_base::StringPiece(&spec_[comp.begin], comp.len);
}
- void ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const;
+ void ProcessFileSystemURLAfterReplaceComponents();
// The actual text of the URL, in canonical ASCII form.
std::string spec_;
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index f3b9f3c..68817af 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -398,7 +398,7 @@
};
for (size_t i = 0; i < gurl_base::size(cases); i++) {
GURL url(cases[i].input);
- GURL origin = url.GetOrigin();
+ GURL origin = url.DeprecatedGetOriginAsURL();
EXPECT_EQ(cases[i].expected, origin.spec());
}
}
@@ -558,23 +558,34 @@
// http://crbug.com/291747 - a data URL may legitimately have trailing
// whitespace in the spec after the ref is cleared. Test this does not trigger
// the Parsed importing validation GURL_DCHECK in GURL.
- GURL url(" data: one ? two # three ");
+ GURL url(" data: one # two ");
+ EXPECT_TRUE(url.is_valid());
// By default the trailing whitespace will have been stripped.
- EXPECT_EQ("data: one ?%20two%20#%20three", url.spec());
+ EXPECT_EQ("data: one #%20two", url.spec());
+
+ // Clear the URL's ref and observe the trailing whitespace.
GURL::Replacements repl;
repl.ClearRef();
GURL url_no_ref = url.ReplaceComponents(repl);
-
- EXPECT_EQ("data: one ?%20two%20", url_no_ref.spec());
+ EXPECT_TRUE(url_no_ref.is_valid());
+ EXPECT_EQ("data: one ", url_no_ref.spec());
// Importing a parsed URL via this constructor overload will retain trailing
// whitespace.
GURL import_url(url_no_ref.spec(),
url_no_ref.parsed_for_possibly_invalid_spec(),
url_no_ref.is_valid());
+ EXPECT_TRUE(import_url.is_valid());
EXPECT_EQ(url_no_ref, import_url);
- EXPECT_EQ(import_url.query(), "%20two%20");
+ EXPECT_EQ("data: one ", import_url.spec());
+ EXPECT_EQ(" one ", import_url.path());
+
+ // For completeness, test that re-parsing the same URL rather than importing
+ // it trims the trailing whitespace.
+ GURL reparsed_url(url_no_ref.spec());
+ EXPECT_TRUE(reparsed_url.is_valid());
+ EXPECT_EQ("data: one", reparsed_url.spec());
}
TEST(GURLTest, PathForRequest) {
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h
index 82d1f55..4aaf495 100644
--- a/url/origin_abstract_tests.h
+++ b/url/origin_abstract_tests.h
@@ -370,15 +370,7 @@
{"file:///etc/passwd", {"file", "", 0}},
{"file://example.com/etc/passwd", {"file", "example.com", 0}},
{"file:///", {"file", "", 0}},
-
-#ifdef WIN32
- // TODO(https://crbug.com/1214098): Consider unifying URL parsing behavior
- // on all platforms (or at least make sure that serialization always
- // round-trips - see https://crbug.com/1214098).
- {"file://hostname/C:/dir/file.txt", {"file", "", 0}},
-#else
{"file://hostname/C:/dir/file.txt", {"file", "hostname", 0}},
-#endif
// HTTP URLs
{"http://example.com/", {"http", "example.com", 80}},
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc
index a8bde47..e55c9d5 100644
--- a/url/scheme_host_port_unittest.cc
+++ b/url/scheme_host_port_unittest.cc
@@ -16,12 +16,14 @@
class SchemeHostPortTest : public testing::Test {
public:
SchemeHostPortTest() = default;
+
+ SchemeHostPortTest(const SchemeHostPortTest&) = delete;
+ SchemeHostPortTest& operator=(const SchemeHostPortTest&) = delete;
+
~SchemeHostPortTest() override = default;
private:
url::ScopedSchemeRegistryForTests scoped_registry_;
-
- DISALLOW_COPY_AND_ASSIGN(SchemeHostPortTest);
};
void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc
index 8f6c2f8..b0740a2 100644
--- a/url/url_canon_fileurl.cc
+++ b/url/url_canon_fileurl.cc
@@ -4,6 +4,7 @@
// Functions for canonicalizing "file:" URLs.
+#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -14,6 +15,44 @@
namespace {
+bool IsLocalhost(const char* spec, int begin, int end) {
+ if (begin > end)
+ return false;
+ return gurl_base::StringPiece(&spec[begin], end - begin) == "localhost";
+}
+
+bool IsLocalhost(const char16_t* spec, int begin, int end) {
+ if (begin > end)
+ return false;
+ return gurl_base::StringPiece16(&spec[begin], end - begin) == u"localhost";
+}
+
+template <typename CHAR>
+int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
+ if (begin > end)
+ return -1;
+
+ // First guess the beginning of the drive letter.
+ // If there is something that looks like a drive letter in the spec between
+ // begin and end, store its position in drive_letter_pos.
+ int drive_letter_pos =
+ DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
+ if (drive_letter_pos < begin)
+ return -1;
+
+ // Check if the path up to the drive letter candidate can be canonicalized as
+ // "/".
+ Component sub_path = MakeRange(begin, drive_letter_pos);
+ RawCanonOutput<1024> output;
+ Component output_path;
+ bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
+ if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
+ return -1;
+ }
+
+ return drive_letter_pos;
+}
+
#ifdef WIN32
// Given a pointer into the spec, this copies and canonicalizes the drive
@@ -21,41 +60,16 @@
// spec, it won't do anything. The index of the next character in the input
// spec is returned (after the colon when a drive spec is found, the begin
// offset if one is not).
-template<typename CHAR>
-int FileDoDriveSpec(const CHAR* spec, int begin, int end,
- CanonOutput* output) {
- // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
- // /./c:/foo, (with backslashes instead of slashes as well). The code
- // first guesses the beginning of the drive letter, then verifies that the
- // path up to that point can be canonicalised as "/". If it can, then the
- // found drive letter is indeed a drive letter, otherwise the path has no
- // drive letter in it.
- if (begin > end) // Nothing to search in.
- return begin; // Found no letter, so didn't consum any characters.
-
- // If there is something that looks like a drive letter in the spec between
- // being and end, store its position in drive_letter_pos.
- int drive_letter_pos =
- DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
+template <typename CHAR>
+int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
+ int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
if (drive_letter_pos < begin)
- return begin; // Found no letter, so didn't consum any characters.
+ return begin;
- // Check if the path up to the drive letter candidate can be canonicalized as
- // "/".
- Component sub_path = MakeRange(begin, drive_letter_pos);
- Component output_path;
- const int initial_length = output->length();
- bool success = CanonicalizePath(spec, sub_path, output, &output_path);
- if (!success || output_path.len != 1 ||
- output->at(output_path.begin) != '/') {
- // Undo writing the canonicalized path.
- output->set_length(initial_length);
- return begin; // Found no letter, so didn't consum any characters.
- }
+ // By now, a valid drive letter is confirmed at position drive_letter_pos,
+ // followed by a valid drive letter separator (a colon or a pipe).
- // By now, "/" has been written to the output and a valid drive letter is
- // confirmed at position drive_letter_pos, followed by a valid drive letter
- // separator (a colon or a pipe).
+ output->push_back('/');
// Normalize Windows drive letters to uppercase.
if (gurl_base::IsAsciiLower(spec[drive_letter_pos]))
@@ -88,9 +102,12 @@
// drive colon (if any, Windows only), or the first slash of the path.
bool success = true;
if (after_drive < path.end()) {
- // Use the regular path canonicalizer to canonicalize the rest of the
- // path. Give it a fake output component to write into. DoCanonicalizeFile
- // will compute the full path component.
+ // Use the regular path canonicalizer to canonicalize the rest of the path
+ // after the drive.
+ //
+ // Give it a fake output component to write into, since we will be
+ // calculating the out_path ourselves (consisting of both the drive and the
+ // path we canonicalize here).
Component sub_path = MakeRange(after_drive, path.end());
Component fake_output_path;
success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
@@ -120,19 +137,33 @@
output->Append("file://", 7);
new_parsed->scheme.len = 4;
+ // If the host is localhost, and the path starts with a Windows drive letter,
+ // remove the host component. This does the following transformation:
+ // file://localhost/C:/hello.txt -> file:///C:/hello.txt
+ //
+ // Note: we do this on every platform per URL Standard, not just Windows.
+ //
+ // TODO(https://crbug.com/688961): According to the latest URL spec, this
+ // transformation should be done regardless of the path.
+ Component host_range = parsed.host;
+ if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
+ FindWindowsDriveLetter(source.path, parsed.path.begin,
+ parsed.path.end()) >= parsed.path.begin) {
+ host_range.reset();
+ }
+
// Append the host. For many file URLs, this will be empty. For UNC, this
// will be present.
// TODO(brettw) This doesn't do any checking for host name validity. We
// should probably handle validity checking of UNC hosts differently than
// for regular IP hosts.
- bool success = CanonicalizeHost(source.host, parsed.host,
- output, &new_parsed->host);
+ bool success =
+ CanonicalizeHost(source.host, host_range, output, &new_parsed->host);
success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
output, &new_parsed->path);
+
CanonicalizeQuery(source.query, parsed.query, query_converter,
output, &new_parsed->query);
-
- // Ignore failure for refs since the URL can probably still be loaded.
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
return success;
@@ -140,6 +171,14 @@
} // namespace
+int FindWindowsDriveLetter(const char* spec, int begin, int end) {
+ return DoFindWindowsDriveLetter(spec, begin, end);
+}
+
+int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
+ return DoFindWindowsDriveLetter(spec, begin, end);
+}
+
bool CanonicalizeFileURL(const char* spec,
int spec_len,
const Parsed& parsed,
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index abcf615..7a97522 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -2,7 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <unordered_set>
+
#include "polyfills/base/check.h"
+#include "polyfills/base/metrics/histogram_macros.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -129,6 +132,8 @@
bool* has_non_ascii) {
*has_non_ascii = false;
+ std::unordered_set<char> escaped_chars_to_measure;
+
bool success = true;
for (int i = 0; i < host_len; ++i) {
unsigned int source = host[i];
@@ -156,6 +161,7 @@
} else if (replacement == kEsc) {
// This character is valid but should be escaped.
AppendEscapedChar(source, output);
+ escaped_chars_to_measure.insert(source);
} else {
// Common case, the given character is valid in a hostname, the lookup
// table tells us the canonical representation of that character (lower
@@ -170,6 +176,16 @@
*has_non_ascii = true;
}
}
+ if (success) {
+ bool did_escape = !escaped_chars_to_measure.empty();
+ UMA_HISTOGRAM_BOOLEAN("URL.Host.DidEscape", did_escape);
+ if (did_escape) {
+ for (char c : escaped_chars_to_measure) {
+ UMA_HISTOGRAM_ENUMERATION("URL.Host.EscapeChar",
+ EscapedHostCharToEnum(c));
+ }
+ }
+ }
return success;
}
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index 99541bd..961a3be 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -435,4 +435,49 @@
#endif // !WIN32
+EscapedHostChar EscapedHostCharToEnum(char c) {
+ switch (c) {
+ case ' ':
+ return EscapedHostChar::kSpace;
+ case '!':
+ return EscapedHostChar::kBang;
+ case '"':
+ return EscapedHostChar::kDoubleQuote;
+ case '#':
+ return EscapedHostChar::kHash;
+ case '$':
+ return EscapedHostChar::kDollar;
+ case '&':
+ return EscapedHostChar::kAmpersand;
+ case '\'':
+ return EscapedHostChar::kSingleQuote;
+ case '(':
+ return EscapedHostChar::kLeftParen;
+ case ')':
+ return EscapedHostChar::kRightParen;
+ case '*':
+ return EscapedHostChar::kAsterisk;
+ case ',':
+ return EscapedHostChar::kComma;
+ case '<':
+ return EscapedHostChar::kLeftAngle;
+ case '=':
+ return EscapedHostChar::kEquals;
+ case '>':
+ return EscapedHostChar::kRightAngle;
+ case '@':
+ return EscapedHostChar::kAt;
+ case '`':
+ return EscapedHostChar::kBackTick;
+ case '{':
+ return EscapedHostChar::kLeftCurly;
+ case '|':
+ return EscapedHostChar::kPipe;
+ case '}':
+ return EscapedHostChar::kRightCurly;
+ default:
+ return EscapedHostChar::kUnknown;
+ }
+}
+
} // namespace url
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 11e0f7a..6601587 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -418,6 +418,17 @@
int path_begin_in_output,
CanonOutput* output);
+// Find the position of a bona fide Windows drive letter in the given path. If
+// no leading drive letter is found, -1 is returned. This function correctly
+// treats /c:/foo and /./c:/foo as having drive letters, and /def/c:/foo as not
+// having a drive letter.
+//
+// Exported for tests.
+COMPONENT_EXPORT(URL)
+int FindWindowsDriveLetter(const char* spec, int begin, int end);
+COMPONENT_EXPORT(URL)
+int FindWindowsDriveLetter(const char16_t* spec, int begin, int end);
+
#ifndef WIN32
// Implementations of Windows' int-to-string conversions
@@ -445,6 +456,35 @@
#endif // WIN32
+// These values are logged to UMA. Entries should not be renumbered and
+// numeric values should never be reused. Please keep in sync with
+// "URLHostEscapedHostChar" in src/tools/metrics/histograms/enums.xml.
+enum class EscapedHostChar {
+ kUnknown = 0,
+ kSpace = 1,
+ kBang = 2,
+ kDoubleQuote = 3,
+ kHash = 4,
+ kDollar = 5,
+ kAmpersand = 6,
+ kSingleQuote = 7,
+ kLeftParen = 8,
+ kRightParen = 9,
+ kAsterisk = 10,
+ kComma = 11,
+ kLeftAngle = 12,
+ kEquals = 13,
+ kRightAngle = 14,
+ kAt = 15,
+ kBackTick = 16,
+ kLeftCurly = 17,
+ kPipe = 18,
+ kRightCurly = 19,
+ kMaxValue = kRightCurly,
+};
+
+COMPONENT_EXPORT(URL) EscapedHostChar EscapedHostCharToEnum(char c);
+
} // namespace url
#endif // URL_URL_CANON_INTERNAL_H_
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc
index f047d7f..309f596 100644
--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc
@@ -543,10 +543,7 @@
// have a host, we want to use the special host detection logic for file
// URLs provided by DoResolveAbsoluteFile(), as opposed to the generic host
// detection logic, for consistency with parsing file URLs from scratch.
- // This also handles the special case where the URL is only slashes,
- // since that doesn't have a host part either.
- if (base_is_file &&
- (num_slashes >= 2 || num_slashes == relative_component.len)) {
+ if (base_is_file && num_slashes >= 2) {
return DoResolveAbsoluteFile(relative_url, relative_component,
query_converter, output, out_parsed);
}
diff --git a/url/url_canon_stdstring.h b/url/url_canon_stdstring.h
index 6d23abf..cef33cd 100644
--- a/url/url_canon_stdstring.h
+++ b/url/url_canon_stdstring.h
@@ -37,6 +37,10 @@
class COMPONENT_EXPORT(URL) StdStringCanonOutput : public CanonOutput {
public:
StdStringCanonOutput(std::string* str);
+
+ StdStringCanonOutput(const StdStringCanonOutput&) = delete;
+ StdStringCanonOutput& operator=(const StdStringCanonOutput&) = delete;
+
~StdStringCanonOutput() override;
// Must be called after writing has completed but before the string is used.
@@ -46,7 +50,6 @@
protected:
std::string* str_;
- DISALLOW_COPY_AND_ASSIGN(StdStringCanonOutput);
};
// An extension of the Replacements class that allows the setters to use
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc
index c7e7454..d7e4197 100644
--- a/url/url_canon_stdurl.cc
+++ b/url/url_canon_stdurl.cc
@@ -103,6 +103,10 @@
// Ref: ignore failure for this, since the page can probably still be loaded.
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+ // Carry over the flag for potentially dangling markup:
+ if (parsed.potentially_dangling_markup)
+ new_parsed->potentially_dangling_markup = true;
+
return success;
}
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index aa2a8ce..d7dc876 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -6,8 +6,10 @@
#include <stddef.h>
#include "base/cxx17_backports.h"
+#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/gtest_util.h"
+#include "base/test/metrics/histogram_tester.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -2373,6 +2375,11 @@
// is not file.
{"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
{"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+ // Cross-platform relative file: resolution behavior.
+ {"file://host/a", true, true, "/", true, true, true, "file://host/"},
+ {"file://host/a", true, true, "//", true, true, true, "file:///"},
+ {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
+ {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
// Ensure that ports aren't allowed for hosts relative to a file url.
// Although the result string shows a host:port portion, the call to
// resolve the relative URL returns false, indicating parse failure,
@@ -2510,6 +2517,47 @@
}
}
+TEST(URLCanonTest, FindWindowsDriveLetter) {
+ struct TestCase {
+ gurl_base::StringPiece spec;
+ int begin;
+ int end; // -1 for end of spec
+ int expected_drive_letter_pos;
+ } cases[] = {
+ {"/", 0, -1, -1},
+
+ {"c:/foo", 0, -1, 0},
+ {"/c:/foo", 0, -1, 1},
+ {"//c:/foo", 0, -1, -1}, // "//" does not canonicalize to "/"
+ {"\\C|\\foo", 0, -1, 1},
+ {"/cd:/foo", 0, -1, -1}, // "/c" does not canonicalize to "/"
+ {"/./c:/foo", 0, -1, 3},
+ {"/.//c:/foo", 0, -1, -1}, // "/.//" does not canonicalize to "/"
+ {"/././c:/foo", 0, -1, 5},
+ {"/abc/c:/foo", 0, -1, -1}, // "/abc/" does not canonicalize to "/"
+ {"/abc/./../c:/foo", 0, -1, 10},
+
+ {"/c:/c:/foo", 3, -1, 4}, // actual input is "/c:/foo"
+ {"/c:/foo", 3, -1, -1}, // actual input is "/foo"
+ {"/c:/foo", 0, 1, -1}, // actual input is "/"
+ };
+
+ for (const auto& c : cases) {
+ int end = c.end;
+ if (end == -1)
+ end = c.spec.size();
+
+ EXPECT_EQ(c.expected_drive_letter_pos,
+ FindWindowsDriveLetter(c.spec.data(), c.begin, end))
+ << "for " << c.spec << "[" << c.begin << ":" << end << "] (UTF-8)";
+
+ std::u16string spec16 = gurl_base::ASCIIToUTF16(c.spec);
+ EXPECT_EQ(c.expected_drive_letter_pos,
+ FindWindowsDriveLetter(spec16.data(), c.begin, end))
+ << "for " << c.spec << "[" << c.begin << ":" << end << "] (UTF-16)";
+ }
+}
+
TEST(URLCanonTest, IDNToASCII) {
RawCanonOutputW<1024> output;
@@ -2561,4 +2609,49 @@
output.set_length(0);
}
+TEST(URLCanonTest, EscapedHostCharToEnum) {
+ EXPECT_EQ(EscapedHostChar::kSpace, EscapedHostCharToEnum(' '));
+ EXPECT_EQ(EscapedHostChar::kBang, EscapedHostCharToEnum('!'));
+ EXPECT_EQ(EscapedHostChar::kDoubleQuote, EscapedHostCharToEnum('"'));
+ EXPECT_EQ(EscapedHostChar::kHash, EscapedHostCharToEnum('#'));
+ EXPECT_EQ(EscapedHostChar::kDollar, EscapedHostCharToEnum('$'));
+ EXPECT_EQ(EscapedHostChar::kAmpersand, EscapedHostCharToEnum('&'));
+ EXPECT_EQ(EscapedHostChar::kSingleQuote, EscapedHostCharToEnum('\''));
+ EXPECT_EQ(EscapedHostChar::kLeftParen, EscapedHostCharToEnum('('));
+ EXPECT_EQ(EscapedHostChar::kRightParen, EscapedHostCharToEnum(')'));
+ EXPECT_EQ(EscapedHostChar::kAsterisk, EscapedHostCharToEnum('*'));
+ EXPECT_EQ(EscapedHostChar::kComma, EscapedHostCharToEnum(','));
+ EXPECT_EQ(EscapedHostChar::kLeftAngle, EscapedHostCharToEnum('<'));
+ EXPECT_EQ(EscapedHostChar::kEquals, EscapedHostCharToEnum('='));
+ EXPECT_EQ(EscapedHostChar::kRightAngle, EscapedHostCharToEnum('>'));
+ EXPECT_EQ(EscapedHostChar::kAt, EscapedHostCharToEnum('@'));
+ EXPECT_EQ(EscapedHostChar::kBackTick, EscapedHostCharToEnum('`'));
+ EXPECT_EQ(EscapedHostChar::kLeftCurly, EscapedHostCharToEnum('{'));
+ EXPECT_EQ(EscapedHostChar::kPipe, EscapedHostCharToEnum('|'));
+ EXPECT_EQ(EscapedHostChar::kRightCurly, EscapedHostCharToEnum('}'));
+
+ EXPECT_EQ(EscapedHostChar::kUnknown, EscapedHostCharToEnum('a'));
+ EXPECT_EQ(EscapedHostChar::kUnknown, EscapedHostCharToEnum('\\'));
+}
+
+TEST(URLCanonTest, EscapedHostCharHistograms) {
+ std::string input("foo <bar>");
+
+ Component in_comp(0, input.size());
+ Component out_comp;
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+
+ gurl_base::HistogramTester histogram_tester;
+ bool success = CanonicalizeHost(input.data(), in_comp, &output, &out_comp);
+ ASSERT_TRUE(success);
+ histogram_tester.ExpectBucketCount("URL.Host.DidEscape", 1, 1);
+ histogram_tester.ExpectBucketCount("URL.Host.EscapeChar",
+ EscapedHostChar::kSpace, 1);
+ histogram_tester.ExpectBucketCount("URL.Host.EscapeChar",
+ EscapedHostChar::kLeftAngle, 1);
+ histogram_tester.ExpectBucketCount("URL.Host.EscapeChar",
+ EscapedHostChar::kRightAngle, 1);
+}
+
} // namespace url
diff --git a/url/url_constants.cc b/url/url_constants.cc
index 9da6426..d7a5de7 100644
--- a/url/url_constants.cc
+++ b/url/url_constants.cc
@@ -29,6 +29,7 @@
const char kQuicTransportScheme[] = "quic-transport";
const char kTelScheme[] = "tel";
const char kUrnScheme[] = "urn";
+const char kUuidInPackageScheme[] = "uuid-in-package";
const char kWsScheme[] = "ws";
const char kWssScheme[] = "wss";
diff --git a/url/url_constants.h b/url/url_constants.h
index dcd7c90..69a72f5 100644
--- a/url/url_constants.h
+++ b/url/url_constants.h
@@ -33,6 +33,7 @@
COMPONENT_EXPORT(URL) extern const char kQuicTransportScheme[];
COMPONENT_EXPORT(URL) extern const char kTelScheme[];
COMPONENT_EXPORT(URL) extern const char kUrnScheme[];
+COMPONENT_EXPORT(URL) extern const char kUuidInPackageScheme[];
COMPONENT_EXPORT(URL) extern const char kWsScheme[];
COMPONENT_EXPORT(URL) extern const char kWssScheme[];
diff --git a/url/url_file.h b/url/url_file.h
index 6ad79b4..c15c8f5 100644
--- a/url/url_file.h
+++ b/url/url_file.h
@@ -13,15 +13,11 @@
namespace url {
-#ifdef WIN32
-
// We allow both "c:" and "c|" as drive identifiers.
inline bool IsWindowsDriveSeparator(char16_t ch) {
return ch == ':' || ch == '|';
}
-#endif // WIN32
-
// Returns the index of the next slash in the input after the given index, or
// spec_len if the end of the input is reached.
template<typename CHAR>
@@ -32,8 +28,6 @@
return idx;
}
-#ifdef WIN32
-
// DoesContainWindowsDriveSpecUntil returns the least number between
// start_offset and max_offset such that the spec has a valid drive
// specification starting at that offset. Otherwise it returns -1. This function
@@ -74,6 +68,8 @@
spec_len) == start_offset;
}
+#ifdef WIN32
+
// Returns true if the start_offset in the given text looks like it begins a
// UNC path, for example "\\". This function explicitly handles start_offset
// values that are equal to or larger than the spec_len to simplify callers.
diff --git a/url/url_parse_file.cc b/url/url_parse_file.cc
index ceb75d8..77a622f 100644
--- a/url/url_parse_file.cc
+++ b/url/url_parse_file.cc
@@ -42,48 +42,24 @@
namespace {
-// A subcomponent of DoInitFileURL, the input of this function should be a UNC
+// A subcomponent of DoParseFileURL, the input of this function should be a UNC
// path name, with the index of the first character after the slashes following
// the scheme given in |after_slashes|. This will initialize the host, path,
// query, and ref, and leave the other output components untouched
-// (DoInitFileURL handles these for us).
-template<typename CHAR>
+// (DoParseFileURL handles these for us).
+template <typename CHAR>
void DoParseUNC(const CHAR* spec,
int after_slashes,
int spec_len,
- Parsed* parsed) {
+ Parsed* parsed) {
int next_slash = FindNextSlash(spec, after_slashes, spec_len);
- if (next_slash == spec_len) {
- // No additional slash found, as in "file://foo", treat the text as the
- // host with no path (this will end up being UNC to server "foo").
- int host_len = spec_len - after_slashes;
- if (host_len)
- parsed->host = Component(after_slashes, host_len);
- else
- parsed->host.reset();
- parsed->path.reset();
- return;
- }
-#ifdef WIN32
- // See if we have something that looks like a path following the first
- // component. As in "file://localhost/c:/", we get "c:/" out. We want to
- // treat this as a having no host but the path given. Works on Windows only.
- if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) {
- parsed->host.reset();
- ParsePathInternal(spec, MakeRange(next_slash, spec_len),
- &parsed->path, &parsed->query, &parsed->ref);
- return;
- }
-#endif
-
- // Otherwise, everything up until that first slash we found is the host name,
- // which will end up being the UNC host. For example "file://foo/bar.txt"
- // will get a server name of "foo" and a path of "/bar". Later, on Windows,
- // this should be treated as the filename "\\foo\bar.txt" in proper UNC
- // notation.
- int host_len = next_slash - after_slashes;
- if (host_len)
+ // Everything up until that first slash we found (or end of string) is the
+ // host name, which will end up being the UNC host. For example,
+ // "file://foo/bar.txt" will get a server name of "foo" and a path of "/bar".
+ // Later, on Windows, this should be treated as the filename "\\foo\bar.txt"
+ // in proper UNC notation.
+ if (after_slashes < next_slash)
parsed->host = MakeRange(after_slashes, next_slash);
else
parsed->host.reset();
@@ -98,7 +74,7 @@
// A subcomponent of DoParseFileURL, the input should be a local file, with the
// beginning of the path indicated by the index in |path_begin|. This will
// initialize the host, path, query, and ref, and leave the other output
-// components untouched (DoInitFileURL handles these for us).
+// components untouched (DoParseFileURL handles these for us).
template<typename CHAR>
void DoParseLocalFile(const CHAR* spec,
int path_begin,
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc
index b23dcf8..7cd3fe8 100644
--- a/url/url_parse_unittest.cc
+++ b/url/url_parse_unittest.cc
@@ -374,8 +374,8 @@
{"FiLe:c|", "FiLe", NULL, NULL, NULL, -1, "c|", NULL, NULL},
{"FILE:/\\\\/server/file", "FILE", NULL, NULL, "server", -1, "/file", NULL, NULL},
{"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL},
-{"file://localhost/c:/", "file", NULL, NULL, NULL, -1, "/c:/", NULL, NULL},
-{"file://127.0.0.1/c|\\", "file", NULL, NULL, NULL, -1, "/c|\\", NULL, NULL},
+{"file://localhost/c:/", "file", NULL, NULL, "localhost", -1, "/c:/", NULL, NULL},
+{"file://127.0.0.1/c|\\", "file", NULL, NULL, "127.0.0.1", -1, "/c|\\", NULL, NULL},
{"file:/", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
{"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
// If there is a Windows drive letter, treat any number of slashes as the
diff --git a/url/url_util.cc b/url/url_util.cc
index 470da30..3162fdf 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -217,6 +217,13 @@
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
+ // Trim leading C0 control characters and spaces.
+ int begin = 0;
+ TrimURL(spec, &begin, &spec_len, trim_path_end);
+ GURL_DCHECK(0 <= begin && begin <= spec_len);
+ spec += begin;
+ spec_len -= begin;
+
output->ReserveSizeIfNeeded(spec_len);
// Remove any whitespace from the middle of the relative URL if necessary.
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index 158e3d6..a455ff6 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -5,7 +5,10 @@
#include <stddef.h>
#include "base/cxx17_backports.h"
+#include "base/strings/string_piece.h"
+#include "testing/gtest/include/gtest/gtest-message.h"
#include "testing/gtest/include/gtest/gtest.h"
+#include "absl/types/optional.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_stdstring.h"
@@ -17,12 +20,14 @@
class URLUtilTest : public testing::Test {
public:
URLUtilTest() = default;
+
+ URLUtilTest(const URLUtilTest&) = delete;
+ URLUtilTest& operator=(const URLUtilTest&) = delete;
+
~URLUtilTest() override = default;
private:
ScopedSchemeRegistryForTests scoped_registry_;
-
- DISALLOW_COPY_AND_ASSIGN(URLUtilTest);
};
TEST_F(URLUtilTest, FindAndCompareScheme) {
@@ -492,6 +497,25 @@
}
}
+TEST_F(URLUtilTest, PotentiallyDanglingMarkupAfterReplacement) {
+ // Parse a URL with potentially dangling markup.
+ Parsed original_parsed;
+ RawCanonOutput<32> original;
+ const char* url = "htt\nps://example.com/<path";
+ Canonicalize(url, strlen(url), false, nullptr, &original, &original_parsed);
+ ASSERT_TRUE(original_parsed.potentially_dangling_markup);
+
+ // Perform a replacement, and validate that the potentially_dangling_markup
+ // flag carried over to the new Parsed object.
+ Replacements<char> replacements;
+ replacements.ClearRef();
+ Parsed replaced_parsed;
+ RawCanonOutput<32> replaced;
+ ReplaceComponents(original.data(), original.length(), original_parsed,
+ replacements, nullptr, &replaced, &replaced_parsed);
+ EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
+}
+
TEST_F(URLUtilTest, TestDomainIs) {
const struct {
const char* canonicalized_host;
@@ -535,4 +559,52 @@
}
}
+namespace {
+absl::optional<std::string> CanonicalizeSpec(gurl_base::StringPiece spec,
+ bool trim_path_end) {
+ std::string canonicalized;
+ StdStringCanonOutput output(&canonicalized);
+ Parsed parsed;
+ if (!Canonicalize(spec.data(), spec.size(), trim_path_end,
+ /*charset_converter=*/nullptr, &output, &parsed)) {
+ return {};
+ }
+ output.Complete(); // Must be called before string is used.
+ return canonicalized;
+}
+} // namespace
+
+#ifdef OS_WIN
+// Regression test for https://crbug.com/1252658.
+TEST_F(URLUtilTest, TestCanonicalizeWindowsPathWithLeadingNUL) {
+ auto PrefixWithNUL = [](std::string&& s) -> std::string { return '\0' + s; };
+ EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("w:"), /*trim_path_end=*/false),
+ absl::make_optional("file:///W:"));
+ EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("\\\\server\\share"),
+ /*trim_path_end=*/false),
+ absl::make_optional("file://server/share"));
+}
+#endif
+
+TEST_F(URLUtilTest, TestCanonicalizeIdempotencyWithLeadingControlCharacters) {
+ std::string spec = "_w:";
+ // Loop over all C0 control characters and the space character.
+ for (char c = '\0'; c <= ' '; c++) {
+ SCOPED_TRACE(testing::Message() << "c: " << c);
+
+ // Overwrite the first character of `spec`. Note that replacing the first
+ // character with NUL will not change the length!
+ spec[0] = c;
+
+ for (bool trim_path_end : {false, true}) {
+ SCOPED_TRACE(testing::Message() << "trim_path_end: " << trim_path_end);
+
+ absl::optional<std::string> canonicalized =
+ CanonicalizeSpec(spec, trim_path_end);
+ ASSERT_TRUE(canonicalized);
+ EXPECT_EQ(canonicalized, CanonicalizeSpec(*canonicalized, trim_path_end));
+ }
+ }
+}
+
} // namespace url