Roll googleurl to the latest Chromium upstream
This updates up to Chromium commit c42ff2e06f1bec13abfa625ad629e67b24cf6a66
from Fri Oct 13 15:43:03 2023 +0000
Change-Id: I8bd5bc4438ffa05211e9218c6da906dfe2fb8fba
diff --git a/AUTHORS b/AUTHORS
index 6714ac1..2ae5e98 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -30,18 +30,22 @@
Adam Bujalski <abujalski@gmail.com>
Adam Kallai <kadam@inf.u-szeged.hu>
Adam Labuda <a.labuda@samsung.com>
+Anguluri Aravind Kumar <ar.kumar@samsung.com>
Adam Roben <adam@github.com>
Adam Treat <adam.treat@samsung.com>
Adam Yi <i@adamyi.com>
Addanki Gandhi Kishor <kishor.ag@samsung.com>
Adenilson Cavalcanti <a.cavalcanti@samsung.com>
Aditi Singh <a20.singh@samsung.com>
+Aditya Agarwal <ad.agarwal@samsung.com>
Aditya Bhargava <heuristicist@gmail.com>
+Aditya Sharma <a2.sharma@samsung.com>
Adrian Belgun <adrian.belgun@intel.com>
Adrian Ratiu <adrian.ratiu@collabora.corp-partner.google.com>
Adrià Vilanova Martínez <me@avm99963.com>
Ahmed Elwasefi <a.m.elwasefi@gmail.com>
Ahmet Emir Ercin <ahmetemiremir@gmail.com>
+Aidarbek Suleimenov <suleimenov.aidarbek@gmail.com>
Aiden Grossman <aidengrossmanpso@gmail.com>
Ajay Berwal <a.berwal@samsung.com>
Ajay Berwal <ajay.berwal@samsung.com>
@@ -323,6 +327,7 @@
Debadree Chatterjee <debadree333@gmail.com>
Debashish Samantaray <d.samantaray@samsung.com>
Debug Wang <debugwang@tencent.com>
+Deep Shah <deep.shah@samsung.com>
Deepak Dilip Borade <deepak.db@samsung.com>
Deepak Mittal <deepak.m1@samsung.com>
Deepak Mohan <hop2deep@gmail.com>
@@ -359,6 +364,7 @@
Donna Wu <donna.wu@intel.com>
Douglas F. Turner <doug.turner@gmail.com>
Drew Blaisdell <drew.blaisdell@gmail.com>
+Dushyant Kant Sharma <dush.sharma@samsung.com>
Dustin Doloff <doloffd@amazon.com>
Ebrahim Byagowi <ebrahim@gnu.org>
Ebrahim Byagowi <ebraminio@gmail.com>
@@ -441,6 +447,7 @@
Girish Kumar M <mck.giri@samsung.com>
Gitanshu Mehndiratta <g.mehndiratt@samsung.com>
Giuseppe Iuculano <giuseppe@iuculano.it>
+Gloam <gaoqingguang@kuaishou.com>
Gnanasekar Somanathan <gnanasekar.s@samsung.com>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
Goutham Jagannatha <wrm364@motorola.com>
@@ -471,12 +478,14 @@
Harshit Pal <harshitp12345@gmail.com>
Hassan Salehe Matar <hassansalehe@gmail.com>
Hautio Kari <khautio@gmail.com>
+He Qi <heqi899@gmail.com>
Heejin R. Chung <heejin.r.chung@samsung.com>
Heeyoun Lee <heeyoun.lee@samsung.com>
Henrique de Carvalho <decarv.henrique@gmail.com>
Henrique Limas <henrique.ramos.limas@gmail.com>
Henry Lim <henry@limhenry.xyz>
Hikari Fujimoto <hikari.p.fujimoto@gmail.com>
+Himadri Agrawal <h2.agrawal@samsung.com>
Himanshu Joshi <h.joshi@samsung.com>
Himanshu Nayak <himanshu.nayak@amd.corp-partner.google.com>
Hiroki Oshima <hiroki.oshima@gmail.com>
@@ -547,6 +556,7 @@
Jaewon Choi <jaewon.james.choi@gmail.com>
Jaewon Jung <jw.jung@navercorp.com>
Jaeyong Bae <jdragon.bae@gmail.com>
+Jagadesh P <jagadeshjai1999@gmail.com>
Jagdish Chourasia <jagdish.c@samsung.com>
Jaime Soriano Pastor <jsorianopastor@gmail.com>
Jake Helfert <jake@helfert.us>
@@ -772,6 +782,7 @@
Lauri Oherd <lauri.oherd@gmail.com>
Lavar Askew <open.hyperion@gmail.com>
Le Hoang Quyen <le.hoang.q@gmail.com>
+Leena Kaushik <l1.kaushik@samsung.com>
Legend Lee <guanxian.li@intel.com>
Leith Bade <leith@leithalweapon.geek.nz>
Lei Gao <leigao@huawei.com>
@@ -874,6 +885,7 @@
Matthias Reitinger <reimarvin@gmail.com>
Matthieu Rigolot <matthieu.rigolot@gmail.com>
Matthieu Vlad Hauglustaine <matt.hauglustaine@gmail.com>
+Max Coplan <mchcopl@gmail.com>
Max Karolinskiy <max@brave.com>
Max Perepelitsyn <pph34r@gmail.com>
Max Schmitt <max@schmitt.mx>
@@ -945,6 +957,7 @@
Nagarjuna Atluri <nagarjuna.a@samsung.com>
Naiem Shaik <naiem.shaik@gmail.com>
Naman Kumar Narula <namankumarnarula@gmail.com>
+Naman Yadav <naman.yadav@samsung.com>
Naoki Takano <takano.naoki@gmail.com>
Naoto Ono <onoto1998@gmail.com>
Nathan Mitchell <nathaniel.v.mitchell@gmail.com>
@@ -980,6 +993,7 @@
Olivier Tilloy <olivier+chromium@tilloy.net>
Olli Raula (Old name Olli Syrjälä) <olli.raula@intel.com>
Omar Sandoval <osandov@osandov.com>
+Orko Garai <orko.garai@gmail.com>
Owen Shaw <owenpshaw@gmail.com>
Owen Yuwono <owenyuwono@gmail.com>
Palash Verma <palashverma47@gmail.com>
@@ -1047,6 +1061,7 @@
Preeti Nayak <preeti.nayak@samsung.com>
Pritam Nikam <pritam.nikam@samsung.com>
Puttaraju R <puttaraju.r@samsung.com>
+Punith Nayak <npunith125@gmail.com>
Qi Tiezheng <qitiezheng@360.cn>
Qi Yang <qi1988.yang@samsung.com>
Qiang Zeng <zengqiang1@huawei.com>
@@ -1074,6 +1089,7 @@
Ravi Nanjundappa <nravi.n@samsung.com>
Ravi Phaneendra Kasibhatla <r.kasibhatla@samsung.com>
Ravi Phaneendra Kasibhatla <ravi.kasibhatla@motorola.com>
+Ravindra Kumar <ravindra.k2@samsung.com>
Raviraj Sitaram <raviraj.p.sitaram@intel.com>
Rebecca Chang Swee Fun <rebecca.chang@starfivetech.com>
Reda Tawfik <redatawfik@noogler.google.com>
@@ -1130,6 +1146,7 @@
Saikrishna Arcot <saiarcot895@gmail.com>
Sajal Khandelwal <skhandelwa22@bloomberg.net>
Sajeesh Sidharthan <sajeesh.sidharthan@amd.corp-partner.google.com>
+Sakib Shabir <s1.tantray@samsung.com>
Saksham Mittal <gotlouemail@gmail.com>
Salvatore Iovene <salvatore.iovene@intel.com>
Sam James <sam@gentoo.org>
@@ -1203,10 +1220,12 @@
Shirish S <shirish.s@amd.com>
Shiva Kumar <shiva.k1@samsung.com>
Shivakumar JM <shiva.jm@samsung.com>
+Shiyi Zou <shiyi.zou@intel.com>
Shobhit Goel <shobhit.goel@samsung.com>
Shouqun Liu <liushouqun@xiaomi.com>
Shouqun Liu <shouqun.liu@intel.com>
Shreeram Kushwaha <shreeram.k@samsung.com>
+Shrey Patel <shrey1patel2@gmail.com>
Shreyas Gopal <shreyas.g@samsung.com>
Shreyas VA <v.a.shreyas@gmail.com>
Shubham Agrawal <shubag@amazon.com>
@@ -1217,10 +1236,12 @@
Siddharth Shankar <funkysidd@gmail.com>
Simeon Kuran <simeon.kuran@gmail.com>
Simon Arlott <simon.arlott@gmail.com>
+Simon Cadman <simon@cadman.uk>
Simon Jackson <simon.jackson@sonocent.com>
Simon La Macchia <smacchia@amazon.com>
Siva Kumar Gunturi <siva.gunturi@samsung.com>
Slava Aseev <nullptrnine@gmail.com>
+Smriti Singh <s01.singh@samsung.com>
Sohom Datta <sohom.datta@learner.manipal.edu>
Sohom Datta <dattasohom1@gmail.com>
Song Fangzhen <songfangzhen@bytedance.com>
@@ -1576,5 +1597,6 @@
Whist Technologies <*@whist.com>
Xperi Corporation <*@xperi.com>
Yandex LLC <*@yandex-team.ru>
+Zoho Corporation <*@zohocorp.com>
# Please DO NOT APPEND here. See comments at the top of the file.
# END organizations section.
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index e85da5c..5f7c31e 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -374,6 +374,20 @@
#define TRIVIAL_ABI
#endif
+// Detect whether a type is trivially relocatable, ie. a move-and-destroy
+// sequence can replaced with memmove(). This can be used to optimise the
+// implementation of containers. This is automatically true for types that were
+// defined with TRIVIAL_ABI such as scoped_refptr.
+//
+// See also:
+// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p1144r8.html
+// https://clang.llvm.org/docs/LanguageExtensions.html#:~:text=__is_trivially_relocatable
+#if defined(__clang__) && HAS_BUILTIN(__is_trivially_relocatable)
+#define IS_TRIVIALLY_RELOCATABLE(t) __is_trivially_relocatable(t)
+#else
+#define IS_TRIVIALLY_RELOCATABLE(t) false
+#endif
+
// Marks a member function as reinitializing a moved-from variable.
// See also
// https://clang.llvm.org/extra/clang-tidy/checks/bugprone-use-after-move.html#reinitialization
diff --git a/base/containers/contiguous_iterator.h b/base/containers/contiguous_iterator.h
index 7f06432..8311b50 100644
--- a/base/containers/contiguous_iterator.h
+++ b/base/containers/contiguous_iterator.h
@@ -40,8 +40,13 @@
// `static_assert(is_trivial_v<value_type>)` inside libc++'s std::basic_string.
template <typename T>
struct IsStringIter
- : std::conjunction<std::is_trivial<iter_value_t<T>>, IsStringIterImpl<T>> {
-};
+ : std::conjunction<
+ std::disjunction<std::is_same<iter_value_t<T>, char>,
+ std::is_same<iter_value_t<T>, wchar_t>,
+
+ std::is_same<iter_value_t<T>, char16_t>,
+ std::is_same<iter_value_t<T>, char32_t>>,
+ IsStringIterImpl<T>> {};
// An iterator to std::array is contiguous.
// Reference: https://wg21.link/array.overview#1
diff --git a/base/containers/span.h b/base/containers/span.h
index 83069d6..09f6b6f 100644
--- a/base/containers/span.h
+++ b/base/containers/span.h
@@ -27,7 +27,9 @@
// [views.constants]
constexpr size_t dynamic_extent = std::numeric_limits<size_t>::max();
-template <typename T, size_t Extent = dynamic_extent>
+template <typename T,
+ size_t Extent = dynamic_extent,
+ typename InternalPtrType = T*>
class span;
namespace internal {
@@ -233,7 +235,7 @@
// appropriate make_span() utility functions are provided.
// [span], class template span
-template <typename T, size_t Extent>
+template <typename T, size_t Extent, typename InternalPtrType>
class GSL_POINTER span : public internal::ExtentStorage<Extent> {
private:
using ExtentStorage = internal::ExtentStorage<Extent>;
@@ -434,11 +436,11 @@
// [span.iter], span iterator support
constexpr iterator begin() const noexcept {
- return iterator(data_, data_ + size());
+ return iterator(data(), data() + size());
}
constexpr iterator end() const noexcept {
- return iterator(data_, data_ + size(), data_ + size());
+ return iterator(data(), data() + size(), data() + size());
}
constexpr reverse_iterator rbegin() const noexcept {
@@ -452,13 +454,38 @@
private:
// This field is not a raw_ptr<> because it was filtered by the rewriter
// for: #constexpr-ctor-field-initializer, #global-scope, #union
- RAW_PTR_EXCLUSION T* data_;
+ InternalPtrType data_;
};
// span<T, Extent>::extent can not be declared inline prior to C++17, hence this
// definition is required.
-template <class T, size_t Extent>
-constexpr size_t span<T, Extent>::extent;
+template <class T, size_t Extent, typename InternalPtrType>
+constexpr size_t span<T, Extent, InternalPtrType>::extent;
+
+template <typename It,
+ typename T = std::remove_reference_t<iter_reference_t<It>>>
+span(It, StrictNumeric<size_t>) -> span<T>;
+
+template <typename It,
+ typename End,
+ typename = std::enable_if_t<!std::is_convertible_v<End, size_t>>,
+ typename T = std::remove_reference_t<iter_reference_t<It>>>
+span(It, End) -> span<T>;
+
+template <typename T, size_t N>
+span(T (&)[N]) -> span<T, N>;
+
+template <typename T, size_t N>
+span(std::array<T, N>&) -> span<T, N>;
+
+template <typename T, size_t N>
+span(const std::array<T, N>&) -> span<const T, N>;
+
+template <typename Container,
+ typename T = std::remove_pointer_t<
+ decltype(std::data(std::declval<Container>()))>,
+ size_t X = internal::Extent<Container>::value>
+span(Container&&) -> span<T, X>;
// [span.objectrep], views of object representation
template <typename T, size_t X>
diff --git a/base/memory/raw_ptr_exclusion.h b/base/memory/raw_ptr_exclusion.h
index 3ce1d60..e4d355d 100644
--- a/base/memory/raw_ptr_exclusion.h
+++ b/base/memory/raw_ptr_exclusion.h
@@ -8,6 +8,6 @@
// Although `raw_ptr` is part of the standalone PA distribution, it is
// easier to use the shorter path in `//base/memory`. We retain this
// facade header for ease of typing.
-#include "base/allocator/partition_allocator/pointers/raw_ptr_exclusion.h" // IWYU pragma: export
+#include "base/allocator/partition_allocator/src/partition_alloc/pointers/raw_ptr_exclusion.h" // IWYU pragma: export
#endif // BASE_MEMORY_RAW_PTR_EXCLUSION_H_
diff --git a/base/numerics/safe_conversions.h b/base/numerics/safe_conversions.h
index 3e04bf4..7f12916 100644
--- a/base/numerics/safe_conversions.h
+++ b/base/numerics/safe_conversions.h
@@ -357,8 +357,18 @@
using SizeT = StrictNumeric<size_t>;
// floating -> integral conversions that saturate and thus can actually return
-// an integral type. In most cases, these should be preferred over the std::
-// versions.
+// an integral type.
+//
+// Generally, what you want is saturated_cast<Dst>(std::nearbyint(x)), which
+// rounds correctly according to IEEE-754 (round to nearest, ties go to nearest
+// even number; this avoids bias). If your code is performance-critical
+// and you are sure that you will never overflow, you can use std::lrint()
+// or std::llrint(), which return a long or long long directly.
+//
+// Below are convenience functions around similar patterns, except that
+// they round in nonstandard directions and will generally be slower.
+
+// Rounds towards negative infinity (i.e., down).
template <typename Dst = int,
typename Src,
typename = std::enable_if_t<std::is_integral<Dst>::value &&
@@ -366,6 +376,8 @@
Dst ClampFloor(Src value) {
return saturated_cast<Dst>(std::floor(value));
}
+
+// Rounds towards positive infinity (i.e., up).
template <typename Dst = int,
typename Src,
typename = std::enable_if_t<std::is_integral<Dst>::value &&
@@ -373,13 +385,22 @@
Dst ClampCeil(Src value) {
return saturated_cast<Dst>(std::ceil(value));
}
+
+// Rounds towards nearest integer, with ties away from zero.
+// This means that 0.5 will be rounded to 1 and 1.5 will be rounded to 2.
+// Similarly, -0.5 will be rounded to -1 and -1.5 will be rounded to -2.
+//
+// This is normally not what you want accuracy-wise (it introduces a small bias
+// away from zero), and it is not the fastest option, but it is frequently what
+// existing code expects. Compare with saturated_cast<Dst>(std::nearbyint(x))
+// or std::lrint(x), which would round 0.5 and -0.5 to 0 but 1.5 to 2 and
+// -1.5 to -2.
template <typename Dst = int,
typename Src,
typename = std::enable_if_t<std::is_integral<Dst>::value &&
std::is_floating_point<Src>::value>>
Dst ClampRound(Src value) {
- const Src rounded =
- (value >= 0.0f) ? std::floor(value + 0.5f) : std::ceil(value - 0.5f);
+ const Src rounded = std::round(value);
return saturated_cast<Dst>(rounded);
}
diff --git a/base/strings/safe_sprintf_unittest.cc b/base/strings/safe_sprintf_unittest.cc
index af52d1c..9086e52 100644
--- a/base/strings/safe_sprintf_unittest.cc
+++ b/base/strings/safe_sprintf_unittest.cc
@@ -12,7 +12,7 @@
#include <limits>
#include <memory>
-
+#include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_config.h"
#include "polyfills/base/check_op.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc
index a6fcbbb..30819bb 100644
--- a/base/strings/string_piece_unittest.cc
+++ b/base/strings/string_piece_unittest.cc
@@ -684,7 +684,14 @@
}
}
-#if defined(_LIBCPP_ENABLE_ASSERTIONS)
+// Chromium development assumes StringPiece (which is std::string_view) is
+// implemented with an STL that enables hardening checks. We treat bugs that
+// trigger one of these conditions as functional rather than security bugs. If
+// this test fails on some embedder, it should not be disabled. Instead, the
+// embedder should fix their STL or build configuration to enable corresponding
+// hardening checks.
+//
+// See https://chromium.googlesource.com/chromium/src/+/main/docs/security/faq.md#indexing-a-container-out-of-bounds-hits-a-libcpp_verbose_abort_is-this-a-security-bug
TEST(StringPieceTest, OutOfBoundsDeath) {
{
constexpr StringPiece piece;
@@ -726,7 +733,6 @@
int length = -1;
ASSERT_DEATH_IF_SUPPORTED({ StringPiece piece("hello", length); }, "");
}
-#endif // defined(_LIBCPP_ENABLE_ASSERTIONS)
TEST(StringPieceTest, ConstexprData) {
{
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc
index 1913fdc..6720744 100644
--- a/base/strings/stringprintf.cc
+++ b/base/strings/stringprintf.cc
@@ -16,100 +16,6 @@
namespace gurl_base {
-namespace {
-
-// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
-// is the size of the buffer. These return the number of characters in the
-// formatted string excluding the NUL terminator. If the buffer is not
-// large enough to accommodate the formatted string without truncation, they
-// return the number of characters that would be in the fully-formatted string
-// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
-inline int vsnprintfT(char* buffer,
- size_t buf_size,
- const char* format,
- va_list argptr) {
- return gurl_base::vsnprintf(buffer, buf_size, format, argptr);
-}
-
-#if BUILDFLAG(IS_WIN)
-inline int vsnprintfT(wchar_t* buffer,
- size_t buf_size,
- const wchar_t* format,
- va_list argptr) {
- return gurl_base::vswprintf(buffer, buf_size, format, argptr);
-}
-#endif
-
-// Templatized backend for StringPrintF/StringAppendF. This does not finalize
-// the va_list, the caller is expected to do that.
-template <class CharT>
-static void StringAppendVT(std::basic_string<CharT>* dst,
- const CharT* format,
- va_list ap) {
- // First try with a small fixed size buffer.
- // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
- // and StringUtilTest.StringPrintfBounds.
- CharT stack_buf[1024];
-
- va_list ap_copy;
- va_copy(ap_copy, ap);
-
- gurl_base::ScopedClearLastError last_error;
- int result = vsnprintfT(stack_buf, std::size(stack_buf), format, ap_copy);
- va_end(ap_copy);
-
- if (result >= 0 && static_cast<size_t>(result) < std::size(stack_buf)) {
- // It fit.
- dst->append(stack_buf, static_cast<size_t>(result));
- return;
- }
-
- // Repeatedly increase buffer size until it fits.
- size_t mem_length = std::size(stack_buf);
- while (true) {
- if (result < 0) {
-#if BUILDFLAG(IS_WIN)
- // On Windows, vsnprintfT always returns the number of characters in a
- // fully-formatted string, so if we reach this point, something else is
- // wrong and no amount of buffer-doubling is going to fix it.
- return;
-#else
- if (errno != 0 && errno != EOVERFLOW)
- return;
- // Try doubling the buffer size.
- mem_length *= 2;
-#endif
- } else {
- // We need exactly "result + 1" characters.
- mem_length = static_cast<size_t>(result) + 1;
- }
-
- if (mem_length > 32 * 1024 * 1024) {
- // That should be plenty, don't try anything larger. This protects
- // against huge allocations when using vsnprintfT implementations that
- // return -1 for reasons other than overflow without setting errno.
- GURL_DLOG(WARNING) << "Unable to printf the requested string due to size.";
- return;
- }
-
- std::vector<CharT> mem_buf(mem_length);
-
- // NOTE: You can only use a va_list once. Since we're in a while loop, we
- // need to make a new copy each time so we don't use up the original.
- va_copy(ap_copy, ap);
- result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
- va_end(ap_copy);
-
- if ((result >= 0) && (static_cast<size_t>(result) < mem_length)) {
- // It fit.
- dst->append(&mem_buf[0], static_cast<size_t>(result));
- return;
- }
- }
-}
-
-} // namespace
-
std::string StringPrintf(const char* format, ...) {
va_list ap;
va_start(ap, format);
@@ -119,17 +25,6 @@
return result;
}
-#if BUILDFLAG(IS_WIN)
-std::wstring StringPrintf(const wchar_t* format, ...) {
- va_list ap;
- va_start(ap, format);
- std::wstring result;
- StringAppendV(&result, format, ap);
- va_end(ap);
- return result;
-}
-#endif
-
std::string StringPrintV(const char* format, va_list ap) {
std::string result;
StringAppendV(&result, format, ap);
@@ -143,23 +38,68 @@
va_end(ap);
}
-#if BUILDFLAG(IS_WIN)
-void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
- va_list ap;
- va_start(ap, format);
- StringAppendV(dst, format, ap);
- va_end(ap);
-}
-#endif
-
void StringAppendV(std::string* dst, const char* format, va_list ap) {
- StringAppendVT(dst, format, ap);
-}
+ // First try with a small fixed size buffer.
+ // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
+ // and StringUtilTest.StringPrintfBounds.
+ char stack_buf[1024];
+ va_list ap_copy;
+ va_copy(ap_copy, ap);
+
+ gurl_base::ScopedClearLastError last_error;
+ int result = vsnprintf(stack_buf, std::size(stack_buf), format, ap_copy);
+ va_end(ap_copy);
+
+ if (result >= 0 && static_cast<size_t>(result) < std::size(stack_buf)) {
+ // It fit.
+ dst->append(stack_buf, static_cast<size_t>(result));
+ return;
+ }
+
+ // Repeatedly increase buffer size until it fits.
+ size_t mem_length = std::size(stack_buf);
+ while (true) {
+ if (result < 0) {
#if BUILDFLAG(IS_WIN)
-void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
- StringAppendVT(dst, format, ap);
-}
+ // On Windows, vsnprintf always returns the number of characters in a
+ // fully-formatted string, so if we reach this point, something else is
+ // wrong and no amount of buffer-doubling is going to fix it.
+ return;
+#else
+ if (errno != 0 && errno != EOVERFLOW) {
+ return;
+ }
+ // Try doubling the buffer size.
+ mem_length *= 2;
#endif
+ } else {
+ // We need exactly "result + 1" characters.
+ mem_length = static_cast<size_t>(result) + 1;
+ }
+
+ if (mem_length > 32 * 1024 * 1024) {
+ // That should be plenty, don't try anything larger. This protects
+ // against huge allocations when using vsnprintf implementations that
+ // return -1 for reasons other than overflow without setting errno.
+ GURL_DLOG(WARNING) << "Unable to printf the requested string due to size.";
+ return;
+ }
+
+ std::vector<char> mem_buf(mem_length);
+
+ // NOTE: You can only use a va_list once. Since we're in a while loop, we
+ // need to make a new copy each time so we don't use up the original.
+ va_copy(ap_copy, ap);
+ result = vsnprintf(&mem_buf[0], mem_length, format, ap_copy);
+ va_end(ap_copy);
+
+ if ((result >= 0) && (static_cast<size_t>(result) < mem_length)) {
+ // It fit.
+ dst->append(&mem_buf[0], static_cast<size_t>(result));
+ return;
+ }
+ }
+}
} // namespace base
diff --git a/base/strings/stringprintf.h b/base/strings/stringprintf.h
index 5266a13..99d04e8 100644
--- a/base/strings/stringprintf.h
+++ b/base/strings/stringprintf.h
@@ -11,20 +11,12 @@
#include "polyfills/base/base_export.h"
#include "base/compiler_specific.h"
-#include "build/build_config.h"
namespace gurl_base {
// Return a C++ string given printf-like input.
[[nodiscard]] BASE_EXPORT std::string StringPrintf(const char* format, ...)
PRINTF_FORMAT(1, 2);
-#if BUILDFLAG(IS_WIN)
-// Note: Unfortunately compile time checking of the format string for UTF-16
-// strings is not supported by any compiler, thus these functions should be used
-// carefully and sparingly. Also applies to StringAppendV below.
-[[nodiscard]] BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...)
- WPRINTF_FORMAT(1, 2);
-#endif
// Return a C++ string given vprintf-like input.
[[nodiscard]] BASE_EXPORT std::string StringPrintV(const char* format,
@@ -34,20 +26,11 @@
// Append result to a supplied string.
BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...)
PRINTF_FORMAT(2, 3);
-#if BUILDFLAG(IS_WIN)
-BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...)
- WPRINTF_FORMAT(2, 3);
-#endif
// Lower-level routine that takes a va_list and appends to a specified
// string. All other routines are just convenience wrappers around it.
BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap)
PRINTF_FORMAT(2, 0);
-#if BUILDFLAG(IS_WIN)
-BASE_EXPORT void StringAppendV(std::wstring* dst,
- const wchar_t* format,
- va_list ap) WPRINTF_FORMAT(2, 0);
-#endif
} // namespace base
diff --git a/base/strings/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc
index 45fa9c6..981c45c 100644
--- a/base/strings/stringprintf_unittest.cc
+++ b/base/strings/stringprintf_unittest.cc
@@ -35,45 +35,24 @@
TEST(StringPrintfTest, StringPrintfMisc) {
EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
-#if BUILDFLAG(IS_WIN)
- EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
-#endif
}
TEST(StringPrintfTest, StringAppendfEmptyString) {
std::string value("Hello");
StringAppendF(&value, "%s", "");
EXPECT_EQ("Hello", value);
-
-#if BUILDFLAG(IS_WIN)
- std::wstring valuew(L"Hello");
- StringAppendF(&valuew, L"%ls", L"");
- EXPECT_EQ(L"Hello", valuew);
-#endif
}
TEST(StringPrintfTest, StringAppendfString) {
std::string value("Hello");
StringAppendF(&value, " %s", "World");
EXPECT_EQ("Hello World", value);
-
-#if BUILDFLAG(IS_WIN)
- std::wstring valuew(L"Hello");
- StringAppendF(&valuew, L" %ls", L"World");
- EXPECT_EQ(L"Hello World", valuew);
-#endif
}
TEST(StringPrintfTest, StringAppendfInt) {
std::string value("Hello");
StringAppendF(&value, " %d", 123);
EXPECT_EQ("Hello 123", value);
-
-#if BUILDFLAG(IS_WIN)
- std::wstring valuew(L"Hello");
- StringAppendF(&valuew, L" %d", 123);
- EXPECT_EQ(L"Hello 123", valuew);
-#endif
}
// Make sure that lengths exactly around the initial buffer size are handled
@@ -93,11 +72,6 @@
src[kSrcLen - i] = 0;
std::string out;
EXPECT_EQ(src, StringPrintf("%s", src));
-
-#if BUILDFLAG(IS_WIN)
- srcw[kSrcLen - i] = 0;
- EXPECT_EQ(srcw, StringPrintf(L"%ls", srcw));
-#endif
}
}
@@ -126,12 +100,6 @@
std::string out;
StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
EXPECT_EQ("1 foo bar", out);
-
-#if BUILDFLAG(IS_WIN)
- std::wstring outw;
- StringAppendVTestHelper(&outw, L"%d foo %ls", 1, L"bar");
- EXPECT_EQ(L"1 foo bar", outw);
-#endif
}
// Test the boundary condition for the size of the string_util's
@@ -150,16 +118,6 @@
EXPECT_EQ(src, StringPrintf("%s", src));
}
-#if BUILDFLAG(IS_WIN)
-TEST(StringPrintfTest, Invalid) {
- wchar_t invalid[2];
- invalid[0] = 0xffff;
- invalid[1] = 0;
-
- EXPECT_EQ(invalid, StringPrintf(L"%ls", invalid));
-}
-#endif
-
// Test that StringPrintf and StringAppendV do not change errno.
TEST(StringPrintfTest, StringPrintfErrno) {
errno = 1;
diff --git a/base/strings/sys_string_conversions_unittest.cc b/base/strings/sys_string_conversions_unittest.cc
index 0e3dfa1..5d898aa 100644
--- a/base/strings/sys_string_conversions_unittest.cc
+++ b/base/strings/sys_string_conversions_unittest.cc
@@ -25,7 +25,8 @@
EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));
EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));
- // >16 bits
+ // A value outside of the BMP and therefore not representable with one UTF-16
+ // code unit.
EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));
// Error case. When Windows finds a UTF-16 character going off the end of
@@ -52,7 +53,9 @@
TEST(SysStrings, SysUTF8ToWide) {
EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));
EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
- // >16 bits
+
+ // A value outside of the BMP and therefore not representable with one UTF-16
+ // code unit.
EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));
// Error case. When Windows finds an invalid UTF-8 character, it just skips
@@ -83,7 +86,8 @@
EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world"));
EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d"));
- // >16 bits
+ // A value outside of the BMP and therefore not representable with one UTF-16
+ // code unit.
EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA));
// Error case. When Windows finds a UTF-16 character going off the end of
@@ -114,7 +118,9 @@
#endif
EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world"));
EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
- // >16 bits
+
+ // A value outside of the BMP and therefore not representable with one UTF-16
+ // code unit.
EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80"));
// Error case. When Windows finds an invalid UTF-8 character, it just skips
diff --git a/base/strings/utf_string_conversion_utils.cc b/base/strings/utf_string_conversion_utils.cc
index 149a6ee..d7bbe62 100644
--- a/base/strings/utf_string_conversion_utils.cc
+++ b/base/strings/utf_string_conversion_utils.cc
@@ -11,13 +11,13 @@
// CountUnicodeCharacters ------------------------------------------------------
-absl::optional<size_t> CountUnicodeCharacters(const char16_t* src,
- size_t src_len,
+absl::optional<size_t> CountUnicodeCharacters(std::string_view text,
size_t limit) {
base_icu::UChar32 unused = 0;
size_t count = 0;
- for (size_t index = 0; count < limit && index < src_len; ++count, ++index) {
- if (!ReadUnicodeCharacter(src, src_len, &index, &unused)) {
+ for (size_t index = 0; count < limit && index < text.size();
+ ++count, ++index) {
+ if (!ReadUnicodeCharacter(text.data(), text.size(), &index, &unused)) {
return absl::nullopt;
}
}
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h
index 638a59f..183a7a6 100644
--- a/base/strings/utf_string_conversion_utils.h
+++ b/base/strings/utf_string_conversion_utils.h
@@ -11,7 +11,9 @@
#include <stddef.h>
#include <stdint.h>
+#include <limits>
#include <string>
+#include <string_view>
#include "polyfills/base/base_export.h"
#include "base/third_party/icu/icu_utf.h"
@@ -43,10 +45,9 @@
// CountUnicodeCharacters ------------------------------------------------------
// Returns the number of Unicode characters in `text`, up to the supplied
-// `limit`, if `text` contains valid UTF-16. Returns `nullopt` otherwise.
+// `limit`, if `text` contains valid UTF-8. Returns `nullopt` otherwise.
BASE_EXPORT absl::optional<size_t> CountUnicodeCharacters(
- const char16_t* src,
- size_t src_len,
+ std::string_view text,
size_t limit = std::numeric_limits<size_t>::max());
// ReadUnicodeCharacter --------------------------------------------------------
diff --git a/base/strings/utf_string_conversion_utils_unittest.cc b/base/strings/utf_string_conversion_utils_unittest.cc
index 4e700dc..10f9201 100644
--- a/base/strings/utf_string_conversion_utils_unittest.cc
+++ b/base/strings/utf_string_conversion_utils_unittest.cc
@@ -12,19 +12,24 @@
namespace gurl_base {
TEST(UtfStringConversionUtilsTest, CountUnicodeCharacters) {
- struct TestCase {
- std::u16string value;
+ const struct TestCase {
+ std::string value;
size_t limit;
absl::optional<size_t> count;
} test_cases[] = {
- {u"", 0, 0}, {u"abc", 1, 1},
- {u"abc", 3, 3}, {u"abc", 0, 0},
- {u"abc", 4, 3}, {u"abc\U0001F4A9", 4, 4},
- {u"\U0001F4A9", 1, 1}, {{1, 0xD801u}, 5, absl::nullopt},
+ {"", 0, 0},
+ {"abc", 1, 1},
+ {"abc", 3, 3},
+ {"abc", 0, 0},
+ {"abc", 4, 3},
+ // The casts and u8 string literals are needed here so that we don't
+ // trigger linter errors about invalid ascii values.
+ {reinterpret_cast<const char*>(u8"abc\U0001F4A9"), 4, 4},
+ {reinterpret_cast<const char*>(u8"\U0001F4A9"), 1, 1},
+ {{1, static_cast<char>(-1)}, 5, absl::nullopt},
};
for (const auto& test_case : test_cases) {
- EXPECT_EQ(CountUnicodeCharacters(test_case.value.data(),
- test_case.value.length(), test_case.limit),
+ EXPECT_EQ(CountUnicodeCharacters(test_case.value, test_case.limit),
test_case.count);
}
}
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h
index 1ee702c..d7f1ce7 100644
--- a/base/strings/utf_string_conversions.h
+++ b/base/strings/utf_string_conversions.h
@@ -67,27 +67,24 @@
// The conversion functions in this file should not be used to convert string
// literals. Instead, the corresponding prefixes (e.g. u"" for UTF16 or L"" for
-// Wide) should be used. Deleting the overloads here catches these cases at
-// compile time.
+// Wide) should be used. Catch those cases with overloads that assert at compile
+// time.
template <size_t N>
-std::u16string WideToUTF16(const wchar_t (&str)[N]) {
+[[noreturn]] std::u16string WideToUTF16(const wchar_t (&str)[N]) {
static_assert(AlwaysFalse<decltype(N)>,
- "Error: Use the u\"...\" prefix instead.");
- return std::u16string();
+ "Error: Use u\"...\" to create a std::u16string literal.");
}
template <size_t N>
-std::u16string UTF8ToUTF16(const char (&str)[N]) {
+[[noreturn]] std::u16string UTF8ToUTF16(const char (&str)[N]) {
static_assert(AlwaysFalse<decltype(N)>,
- "Error: Use the u\"...\" prefix instead.");
- return std::u16string();
+ "Error: Use u\"...\" to create a std::u16string literal.");
}
template <size_t N>
-std::u16string ASCIIToUTF16(const char (&str)[N]) {
+[[noreturn]] std::u16string ASCIIToUTF16(const char (&str)[N]) {
static_assert(AlwaysFalse<decltype(N)>,
- "Error: Use the u\"...\" prefix instead.");
- return std::u16string();
+ "Error: Use u\"...\" to create a std::u16string literal.");
}
// Mutable character arrays are usually only populated during runtime. Continue
diff --git a/copy.bara.sky b/copy.bara.sky
index 72fde7e..45c9192 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -113,7 +113,9 @@
core.replace("base/trace_event/base_tracing.h", "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"),
core.replace("base/trace_event/base_tracing_forward.h", "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"),
core.replace("#include \"base/strings/string_number_conversions_win.h\"", ""),
- core.replace("#include \"base/allocator/partition_allocator/partition_alloc_config.h\"", ""),
+ # Patch out C++20 feature use
+ core.replace(" std::is_same<iter_value_t<T>, char8_t>,", ""),
+ #core.replace("#include \"base/allocator/partition_allocator/partition_alloc_config.h\"", ""),
# Use system ICU.
core.replace(
diff --git a/url/gurl.cc b/url/gurl.cc
index dea8c7a..f905498 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -9,12 +9,12 @@
#include <algorithm>
#include <memory>
#include <ostream>
+#include <string_view>
#include <utility>
#include "polyfills/base/check_op.h"
#include "base/no_destructor.h"
#include "polyfills/base/notreached.h"
-#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "polyfills/base/trace_event/memory_usage_estimator.h"
@@ -43,11 +43,11 @@
other.parsed_ = url::Parsed();
}
-GURL::GURL(gurl_base::StringPiece url_string) {
+GURL::GURL(std::string_view url_string) {
InitCanonical(url_string, true);
}
-GURL::GURL(gurl_base::StringPiece16 url_string) {
+GURL::GURL(std::u16string_view url_string) {
InitCanonical(url_string, true);
}
@@ -174,7 +174,7 @@
}
// Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::Resolve(gurl_base::StringPiece relative) const {
+GURL GURL::Resolve(std::string_view relative) const {
// Not allowed for invalid URLs.
if (!is_valid_)
return GURL();
@@ -200,7 +200,7 @@
}
// Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::Resolve(gurl_base::StringPiece16 relative) const {
+GURL GURL::Resolve(std::u16string_view relative) const {
// Not allowed for invalid URLs.
if (!is_valid_)
return GURL();
@@ -355,7 +355,7 @@
return IsAboutUrl(url::kAboutSrcdocPath);
}
-bool GURL::SchemeIs(gurl_base::StringPiece lower_ascii_scheme) const {
+bool GURL::SchemeIs(std::string_view lower_ascii_scheme) const {
GURL_DCHECK(gurl_base::IsStringASCII(lower_ascii_scheme));
GURL_DCHECK(gurl_base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
@@ -378,7 +378,7 @@
return SchemeIsCryptographic(scheme_piece());
}
-bool GURL::SchemeIsCryptographic(gurl_base::StringPiece lower_ascii_scheme) {
+bool GURL::SchemeIsCryptographic(std::string_view lower_ascii_scheme) {
GURL_DCHECK(gurl_base::IsStringASCII(lower_ascii_scheme));
GURL_DCHECK(gurl_base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
@@ -413,13 +413,13 @@
return ComponentString(file_component);
}
-gurl_base::StringPiece GURL::PathForRequestPiece() const {
+std::string_view GURL::PathForRequestPiece() const {
GURL_DCHECK(parsed_.path.is_nonempty())
<< "Canonical path for requests should be non-empty";
if (parsed_.ref.is_valid()) {
// Clip off the reference when it exists. The reference starts after the
// #-sign, so we have to subtract one to also remove it.
- return gurl_base::StringPiece(spec_).substr(
+ return std::string_view(spec_).substr(
parsed_.path.begin, parsed_.ref.begin - parsed_.path.begin - 1);
}
// Compute the actual path length, rather than depending on the spec's
@@ -429,7 +429,7 @@
if (parsed_.query.is_valid())
path_len = parsed_.query.end() - parsed_.path.begin;
- return gurl_base::StringPiece(spec_).substr(parsed_.path.begin, path_len);
+ return std::string_view(spec_).substr(parsed_.path.begin, path_len);
}
std::string GURL::PathForRequest() const {
@@ -440,7 +440,7 @@
return std::string(HostNoBracketsPiece());
}
-gurl_base::StringPiece GURL::HostNoBracketsPiece() const {
+std::string_view GURL::HostNoBracketsPiece() const {
// If host looks like an IPv6 literal, strip the square brackets.
url::Component h(parsed_.host);
if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
@@ -454,9 +454,9 @@
return std::string(GetContentPiece());
}
-gurl_base::StringPiece GURL::GetContentPiece() const {
+std::string_view GURL::GetContentPiece() const {
if (!is_valid_)
- return gurl_base::StringPiece();
+ return std::string_view();
url::Component content_component = parsed_.GetContent();
if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.is_valid())
content_component.len -= parsed_.ref.len + 1;
@@ -472,7 +472,7 @@
return *empty_gurl;
}
-bool GURL::DomainIs(gurl_base::StringPiece canonical_domain) const {
+bool GURL::DomainIs(std::string_view canonical_domain) const {
if (!is_valid_)
return false;
@@ -486,8 +486,8 @@
int ref_position = parsed_.CountCharactersBefore(url::Parsed::REF, true);
int ref_position_other =
other.parsed_.CountCharactersBefore(url::Parsed::REF, true);
- return gurl_base::StringPiece(spec_).substr(0, ref_position) ==
- gurl_base::StringPiece(other.spec_).substr(0, ref_position_other);
+ return std::string_view(spec_).substr(0, ref_position) ==
+ std::string_view(other.spec_).substr(0, ref_position_other);
}
void GURL::Swap(GURL* other) {
@@ -503,7 +503,7 @@
(parsed_.inner_parsed() ? sizeof(url::Parsed) : 0);
}
-bool GURL::IsAboutUrl(gurl_base::StringPiece allowed_path) const {
+bool GURL::IsAboutUrl(std::string_view allowed_path) const {
if (!SchemeIs(url::kAboutScheme))
return false;
@@ -514,8 +514,8 @@
}
// static
-bool GURL::IsAboutPath(gurl_base::StringPiece actual_path,
- gurl_base::StringPiece allowed_path) {
+bool GURL::IsAboutPath(std::string_view actual_path,
+ std::string_view allowed_path) {
if (!gurl_base::StartsWith(actual_path, allowed_path))
return false;
@@ -549,22 +549,22 @@
return !(x == y);
}
-bool operator==(const GURL& x, const gurl_base::StringPiece& spec) {
+bool operator==(const GURL& x, std::string_view spec) {
GURL_DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec)
<< "Comparisons of GURLs and strings must ensure as a precondition that "
"the string is fully canonicalized.";
return x.possibly_invalid_spec() == spec;
}
-bool operator==(const gurl_base::StringPiece& spec, const GURL& x) {
+bool operator==(std::string_view spec, const GURL& x) {
return x == spec;
}
-bool operator!=(const GURL& x, const gurl_base::StringPiece& spec) {
+bool operator!=(const GURL& x, std::string_view spec) {
return !(x == spec);
}
-bool operator!=(const gurl_base::StringPiece& spec, const GURL& x) {
+bool operator!=(std::string_view spec, const GURL& x) {
return !(x == spec);
}
diff --git a/url/gurl.h b/url/gurl.h
index 1b29989..931c803 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -10,11 +10,11 @@
#include <iosfwd>
#include <memory>
#include <string>
+#include <string_view>
#include "polyfills/base/component_export.h"
#include "polyfills/base/debug/alias.h"
#include "base/debug/crash_logging.h"
-#include "base/strings/string_piece.h"
#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -46,8 +46,8 @@
// will know to escape this and produce the desired result.
class COMPONENT_EXPORT(URL) GURL {
public:
- typedef url::StringPieceReplacements<char> Replacements;
- typedef url::StringPieceReplacements<char16_t> ReplacementsW;
+ using Replacements = url::StringViewReplacements<char>;
+ using ReplacementsW = url::StringViewReplacements<char16_t>;
// Creates an empty, invalid URL.
GURL();
@@ -57,9 +57,9 @@
GURL(const GURL& other);
GURL(GURL&& other) noexcept;
- // The strings to this contructor should be UTF-8 / UTF-16.
- explicit GURL(gurl_base::StringPiece url_string);
- explicit GURL(gurl_base::StringPiece16 url_string);
+ // The strings to this constructor should be UTF-8 / UTF-16.
+ explicit GURL(std::string_view url_string);
+ explicit GURL(std::u16string_view url_string);
// Constructor for URLs that have already been parsed and canonicalized. This
// is used for conversions from KURL, for example. The caller must supply all
@@ -152,8 +152,8 @@
//
// It is an error to resolve a URL relative to an invalid URL. The result
// will be the empty URL.
- GURL Resolve(gurl_base::StringPiece relative) const;
- GURL Resolve(gurl_base::StringPiece16 relative) const;
+ GURL Resolve(std::string_view relative) const;
+ GURL Resolve(std::u16string_view relative) const;
// Creates a new GURL by replacing the current URL's components with the
// supplied versions. See the Replacements class in url_canon.h for more.
@@ -238,7 +238,7 @@
// Returns true if the given parameter (should be lower-case ASCII to match
// the canonicalized scheme) is the scheme for this URL. Do not include a
// colon.
- bool SchemeIs(gurl_base::StringPiece lower_ascii_scheme) const;
+ bool SchemeIs(std::string_view lower_ascii_scheme) const;
// Returns true if the scheme is "http" or "https".
bool SchemeIsHTTPOrHTTPS() const;
@@ -267,7 +267,7 @@
bool SchemeIsCryptographic() const;
// As above, but static. Parameter should be lower-case ASCII.
- static bool SchemeIsCryptographic(gurl_base::StringPiece lower_ascii_scheme);
+ static bool SchemeIsCryptographic(std::string_view lower_ascii_scheme);
// Returns true if the scheme is "blob".
bool SchemeIsBlob() const {
@@ -286,7 +286,7 @@
// It is an error to get the content of an invalid URL: the result will be an
// empty string.
std::string GetContent() const;
- gurl_base::StringPiece GetContentPiece() const;
+ std::string_view GetContentPiece() const;
// Returns true if the hostname is an IP address. Note: this function isn't
// as cheap as a simple getter because it re-parses the hostname to verify.
@@ -297,7 +297,7 @@
std::string scheme() const {
return ComponentString(parsed_.scheme);
}
- gurl_base::StringPiece scheme_piece() const {
+ std::string_view scheme_piece() const {
return ComponentStringPiece(parsed_.scheme);
}
@@ -305,7 +305,7 @@
std::string username() const {
return ComponentString(parsed_.username);
}
- gurl_base::StringPiece username_piece() const {
+ std::string_view username_piece() const {
return ComponentStringPiece(parsed_.username);
}
@@ -313,7 +313,7 @@
std::string password() const {
return ComponentString(parsed_.password);
}
- gurl_base::StringPiece password_piece() const {
+ std::string_view password_piece() const {
return ComponentStringPiece(parsed_.password);
}
@@ -327,7 +327,7 @@
std::string host() const {
return ComponentString(parsed_.host);
}
- gurl_base::StringPiece host_piece() const {
+ std::string_view host_piece() const {
return ComponentStringPiece(parsed_.host);
}
@@ -338,7 +338,7 @@
std::string port() const {
return ComponentString(parsed_.port);
}
- gurl_base::StringPiece port_piece() const {
+ std::string_view port_piece() const {
return ComponentStringPiece(parsed_.port);
}
@@ -348,7 +348,7 @@
std::string path() const {
return ComponentString(parsed_.path);
}
- gurl_base::StringPiece path_piece() const {
+ std::string_view path_piece() const {
return ComponentStringPiece(parsed_.path);
}
@@ -357,7 +357,7 @@
std::string query() const {
return ComponentString(parsed_.query);
}
- gurl_base::StringPiece query_piece() const {
+ std::string_view query_piece() const {
return ComponentStringPiece(parsed_.query);
}
@@ -367,7 +367,7 @@
std::string ref() const {
return ComponentString(parsed_.ref);
}
- gurl_base::StringPiece ref_piece() const {
+ std::string_view ref_piece() const {
return ComponentStringPiece(parsed_.ref);
}
@@ -389,14 +389,14 @@
std::string PathForRequest() const;
// Returns the same characters as PathForRequest(), avoiding a copy.
- gurl_base::StringPiece PathForRequestPiece() const;
+ std::string_view PathForRequestPiece() const;
// Returns the host, excluding the square brackets surrounding IPv6 address
// literals. This can be useful for passing to getaddrinfo().
std::string HostNoBrackets() const;
// Returns the same characters as HostNoBrackets(), avoiding a copy.
- gurl_base::StringPiece HostNoBracketsPiece() const;
+ std::string_view HostNoBracketsPiece() const;
// Returns true if this URL's host matches or is in the same domain as
// the given input string. For example, if the hostname of the URL is
@@ -409,7 +409,7 @@
// This call is more efficient than getting the host and checking whether the
// host has the specific domain or not because no copies or object
// constructions are done.
- bool DomainIs(gurl_base::StringPiece canonical_domain) const;
+ bool DomainIs(std::string_view canonical_domain) const;
// Checks whether or not two URLs differ only in the ref (the part after
// the # character).
@@ -440,8 +440,8 @@
size_t EstimateMemoryUsage() const;
// Helper used by GURL::IsAboutUrl and KURL::IsAboutURL.
- static bool IsAboutPath(gurl_base::StringPiece actual_path,
- gurl_base::StringPiece allowed_path);
+ static bool IsAboutPath(std::string_view actual_path,
+ std::string_view allowed_path);
void WriteIntoTrace(perfetto::TracedValue context) const;
@@ -460,17 +460,17 @@
void InitializeFromCanonicalSpec();
// Helper used by IsAboutBlank and IsAboutSrcdoc.
- bool IsAboutUrl(gurl_base::StringPiece allowed_path) const;
+ bool IsAboutUrl(std::string_view allowed_path) const;
// Returns the substring of the input identified by the given component.
std::string ComponentString(const url::Component& comp) const {
return std::string(ComponentStringPiece(comp));
}
- gurl_base::StringPiece ComponentStringPiece(const url::Component& comp) const {
+ std::string_view ComponentStringPiece(const url::Component& comp) const {
if (comp.is_empty())
- return gurl_base::StringPiece();
- return gurl_base::StringPiece(spec_).substr(static_cast<size_t>(comp.begin),
- static_cast<size_t>(comp.len));
+ return std::string_view();
+ return std::string_view(spec_).substr(static_cast<size_t>(comp.begin),
+ static_cast<size_t>(comp.len));
}
void ProcessFileSystemURLAfterReplaceComponents();
@@ -501,13 +501,13 @@
// url == GURL(spec) where |spec| is known (i.e. constants). This is to prevent
// needlessly re-parsing |spec| into a temporary GURL.
COMPONENT_EXPORT(URL)
-bool operator==(const GURL& x, const gurl_base::StringPiece& spec);
+bool operator==(const GURL& x, std::string_view spec);
COMPONENT_EXPORT(URL)
-bool operator==(const gurl_base::StringPiece& spec, const GURL& x);
+bool operator==(std::string_view spec, const GURL& x);
COMPONENT_EXPORT(URL)
-bool operator!=(const GURL& x, const gurl_base::StringPiece& spec);
+bool operator!=(const GURL& x, std::string_view spec);
COMPONENT_EXPORT(URL)
-bool operator!=(const gurl_base::StringPiece& spec, const GURL& x);
+bool operator!=(std::string_view spec, const GURL& x);
// DEBUG_ALIAS_FOR_GURL(var_name, url) copies |url| into a new stack-allocated
// variable named |<var_name>|. This helps ensure that the value of |url| gets
diff --git a/url/gurl_abstract_tests.h b/url/gurl_abstract_tests.h
index d787d3e..6ef976c 100644
--- a/url/gurl_abstract_tests.h
+++ b/url/gurl_abstract_tests.h
@@ -11,7 +11,7 @@
// by parametrizing the tests with a class that has to expose the following
// members:
// using UrlType = ...;
-// static UrlType CreateUrlFromString(gurl_base::StringPiece s);
+// static UrlType CreateUrlFromString(std::string_view s);
// static bool IsAboutBlank(const UrlType& url);
// static bool IsAboutSrcdoc(const UrlType& url);
template <typename TUrlTraits>
@@ -23,7 +23,7 @@
// avoid hitting: explicit qualification required to use member 'IsAboutBlank'
// from dependent base class.
using UrlType = typename TUrlTraits::UrlType;
- UrlType CreateUrlFromString(gurl_base::StringPiece s) {
+ UrlType CreateUrlFromString(std::string_view s) {
return TUrlTraits::CreateUrlFromString(s);
}
bool IsAboutBlank(const UrlType& url) {
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index c6be656..36dd969 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -838,8 +838,8 @@
GURL url_with_escape_chars("https://www.,.test");
EXPECT_TRUE(url_with_escape_chars.is_valid());
- EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test");
- EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test"));
+ EXPECT_EQ(url_with_escape_chars.host(), "www.,.test");
+ EXPECT_TRUE(url_with_escape_chars.DomainIs(",.test"));
}
TEST(GURLTest, DomainIsTerminatingDotBehavior) {
@@ -1167,7 +1167,7 @@
public:
using UrlType = GURL;
- static UrlType CreateUrlFromString(gurl_base::StringPiece s) { return GURL(s); }
+ static UrlType CreateUrlFromString(std::string_view s) { return GURL(s); }
static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
diff --git a/url/origin.cc b/url/origin.cc
index 0274f34..45017ea 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -9,6 +9,7 @@
#include <algorithm>
#include <ostream>
#include <string>
+#include <string_view>
#include <tuple>
#include <utility>
@@ -20,7 +21,6 @@
#include "base/debug/crash_logging.h"
#include "base/pickle.h"
#include "base/strings/strcat.h"
-#include "base/strings/string_piece.h"
#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "polyfills/base/trace_event/memory_usage_estimator.h"
#include "base/unguessable_token.h"
@@ -79,8 +79,8 @@
// static
absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
- gurl_base::StringPiece scheme,
- gurl_base::StringPiece host,
+ std::string_view scheme,
+ std::string_view host,
uint16_t port) {
SchemeHostPort tuple(std::string(scheme), std::string(host), port,
SchemeHostPort::CHECK_CANONICALIZATION);
@@ -91,8 +91,8 @@
// static
absl::optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
- gurl_base::StringPiece precursor_scheme,
- gurl_base::StringPiece precursor_host,
+ std::string_view precursor_scheme,
+ std::string_view precursor_host,
uint16_t precursor_port,
const Origin::Nonce& nonce) {
SchemeHostPort precursor(std::string(precursor_scheme),
@@ -249,7 +249,7 @@
return url.scheme() == tuple_.scheme();
}
-bool Origin::DomainIs(gurl_base::StringPiece canonical_domain) const {
+bool Origin::DomainIs(std::string_view canonical_domain) const {
return !opaque() && url::DomainIs(tuple_.host(), canonical_domain);
}
diff --git a/url/origin.h b/url/origin.h
index a49cbca..d364454 100644
--- a/url/origin.h
+++ b/url/origin.h
@@ -9,32 +9,24 @@
#include <memory>
#include <string>
+#include <string_view>
#include "polyfills/base/component_export.h"
#include "polyfills/base/debug/alias.h"
#include "base/debug/crash_logging.h"
#include "base/gtest_prod_util.h"
-#include "base/strings/string_piece_forward.h"
#include "base/strings/string_util.h"
#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "base/unguessable_token.h"
#include "build/build_config.h"
#include "build/buildflag.h"
+#include "build/robolectric_buildflags.h"
#include "absl/types/optional.h"
#include "url/scheme_host_port.h"
-#if BUILDFLAG(IS_ANDROID)
-#include <jni.h>
-
-namespace gurl_base {
-namespace android {
-template <typename>
-class ScopedJavaLocalRef;
-template <typename>
-class JavaRef;
-} // namespace android
-} // namespace base
-#endif // BUILDFLAG(IS_ANDROID)
+#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
+#include "base/android/jni_android.h"
+#endif
class GURL;
@@ -199,8 +191,8 @@
// dangerous recanonicalization); other potential callers should prefer the
// 'GURL'-based constructor.
static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
- gurl_base::StringPiece scheme,
- gurl_base::StringPiece host,
+ std::string_view scheme,
+ std::string_view host,
uint16_t port);
// Creates an origin without sanity checking that the host is canonicalized.
@@ -285,7 +277,7 @@
GURL GetURL() const;
// Same as GURL::DomainIs. If |this| origin is opaque, then returns false.
- bool DomainIs(gurl_base::StringPiece canonical_domain) const;
+ bool DomainIs(std::string_view canonical_domain) const;
// Allows Origin to be used as a key in STL (for example, a std::set or
// std::map).
@@ -314,8 +306,8 @@
// and precursor information.
std::string GetDebugString(bool include_nonce = true) const;
-#if BUILDFLAG(IS_ANDROID)
- gurl_base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const;
+#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
+ gurl_base::android::ScopedJavaLocalRef<jobject> ToJavaObject() const;
static Origin FromJavaObject(
const gurl_base::android::JavaRef<jobject>& java_origin);
static jlong CreateNative(JNIEnv* env,
@@ -334,6 +326,13 @@
size_t EstimateMemoryUsage() const;
private:
+#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_ROBOLECTRIC)
+ friend Origin CreateOpaqueOriginForAndroid(
+ const std::string& scheme,
+ const std::string& host,
+ uint16_t port,
+ const gurl_base::UnguessableToken& nonce_token);
+#endif
friend class blink::SecurityOrigin;
friend class blink::SecurityOriginTest;
friend class blink::StorageKey;
@@ -419,8 +418,8 @@
// back and forth over IPC (as transitioning through GURL would risk
// potentially dangerous recanonicalization).
static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
- gurl_base::StringPiece precursor_scheme,
- gurl_base::StringPiece precursor_host,
+ std::string_view precursor_scheme,
+ std::string_view precursor_host,
uint16_t precursor_port,
const Nonce& nonce);
diff --git a/url/origin_abstract_tests.cc b/url/origin_abstract_tests.cc
index d48a9f5..175abff 100644
--- a/url/origin_abstract_tests.cc
+++ b/url/origin_abstract_tests.cc
@@ -29,7 +29,7 @@
}
// static
-Origin UrlOriginTestTraits::CreateOriginFromString(gurl_base::StringPiece s) {
+Origin UrlOriginTestTraits::CreateOriginFromString(std::string_view s) {
return Origin::Create(GURL(s));
}
@@ -40,7 +40,7 @@
// static
Origin UrlOriginTestTraits::CreateWithReferenceOrigin(
- gurl_base::StringPiece url,
+ std::string_view url,
const Origin& reference_origin) {
return Origin::Resolve(GURL(url), reference_origin);
}
@@ -94,7 +94,7 @@
}
// static
-bool UrlOriginTestTraits::IsValidUrl(gurl_base::StringPiece str) {
+bool UrlOriginTestTraits::IsValidUrl(std::string_view str) {
return GURL(str).is_valid();
}
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h
index b89f63f..78483bf 100644
--- a/url/origin_abstract_tests.h
+++ b/url/origin_abstract_tests.h
@@ -6,10 +6,10 @@
#define URL_ORIGIN_ABSTRACT_TESTS_H_
#include <string>
+#include <string_view>
#include <type_traits>
#include "base/containers/contains.h"
-#include "base/strings/string_piece.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/origin.h"
@@ -28,10 +28,10 @@
using OriginType = Origin;
// Constructing an origin.
- static OriginType CreateOriginFromString(gurl_base::StringPiece s);
+ static OriginType CreateOriginFromString(std::string_view s);
static OriginType CreateUniqueOpaqueOrigin();
static OriginType CreateWithReferenceOrigin(
- gurl_base::StringPiece url,
+ std::string_view url,
const OriginType& reference_origin);
static OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin);
@@ -51,7 +51,7 @@
//
// TODO(lukasza): Consider merging together OriginTraitsBase here and
// UrlTraitsBase in //url/gurl_abstract_tests.h.
- static bool IsValidUrl(gurl_base::StringPiece str);
+ static bool IsValidUrl(std::string_view str);
// Only static members = no constructors are needed.
UrlOriginTestTraits() = delete;
@@ -95,13 +95,13 @@
// avoid hitting: explicit qualification required to use member 'IsOpaque'
// from dependent base class.
using OriginType = typename TOriginTraits::OriginType;
- OriginType CreateOriginFromString(gurl_base::StringPiece s) {
+ OriginType CreateOriginFromString(std::string_view s) {
return TOriginTraits::CreateOriginFromString(s);
}
OriginType CreateUniqueOpaqueOrigin() {
return TOriginTraits::CreateUniqueOpaqueOrigin();
}
- OriginType CreateWithReferenceOrigin(gurl_base::StringPiece url,
+ OriginType CreateWithReferenceOrigin(std::string_view url,
const OriginType& reference_origin) {
return TOriginTraits::CreateWithReferenceOrigin(url, reference_origin);
}
@@ -132,7 +132,7 @@
std::string Serialize(const OriginType& origin) {
return TOriginTraits::Serialize(origin);
}
- bool IsValidUrl(gurl_base::StringPiece str) {
+ bool IsValidUrl(std::string_view str) {
return TOriginTraits::IsValidUrl(str);
}
@@ -213,7 +213,7 @@
VerifyOriginInvariants(origin);
}
- void TestUniqueOpaqueOrigin(gurl_base::StringPiece test_input) {
+ void TestUniqueOpaqueOrigin(std::string_view test_input) {
auto origin = this->CreateOriginFromString(test_input);
this->VerifyUniqueOpaqueOriginInvariants(origin);
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index 5f28acc..ead042a 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -76,8 +76,8 @@
// Wrappers around url::Origin methods to expose it to tests.
absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
- gurl_base::StringPiece precursor_scheme,
- gurl_base::StringPiece precursor_host,
+ std::string_view precursor_scheme,
+ std::string_view precursor_host,
uint16_t precursor_port,
const Origin::Nonce& nonce) {
return Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
@@ -371,8 +371,8 @@
TEST_F(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
struct TestCases {
- gurl_base::StringPiece scheme;
- gurl_base::StringPiece host;
+ std::string_view scheme;
+ std::string_view host;
uint16_t port = 80;
} cases[] = {{{"http\0more", 9}, {"example.com", 11}},
{{"http\0", 5}, {"example.com", 11}},
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
index 2db80a8..0b5e867 100644
--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -8,6 +8,7 @@
#include <string.h>
#include <ostream>
+#include <string_view>
#include <tuple>
#include "polyfills/base/check_op.h"
@@ -15,7 +16,6 @@
#include "polyfills/base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
-#include "base/strings/string_piece.h"
#include "polyfills/base/trace_event/memory_usage_estimator.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
@@ -28,7 +28,7 @@
namespace {
-bool IsCanonicalHost(const gurl_base::StringPiece& host) {
+bool IsCanonicalHost(const std::string_view& host) {
std::string canon_host;
// Try to canonicalize the host (copy/pasted from net/base. :( ).
@@ -56,8 +56,8 @@
// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
// behavior between these 2 layers, but we should avoid introducing new
// differences).
-bool IsValidInput(const gurl_base::StringPiece& scheme,
- const gurl_base::StringPiece& host,
+bool IsValidInput(const std::string_view& scheme,
+ const std::string_view& host,
uint16_t port,
SchemeHostPort::ConstructPolicy policy) {
// Empty schemes are never valid.
@@ -159,8 +159,8 @@
<< " Port: " << port;
}
-SchemeHostPort::SchemeHostPort(gurl_base::StringPiece scheme,
- gurl_base::StringPiece host,
+SchemeHostPort::SchemeHostPort(std::string_view scheme,
+ std::string_view host,
uint16_t port)
: SchemeHostPort(std::string(scheme),
std::string(host),
@@ -171,8 +171,8 @@
if (!url.is_valid())
return;
- gurl_base::StringPiece scheme = url.scheme_piece();
- gurl_base::StringPiece host = url.host_piece();
+ std::string_view scheme = url.scheme_piece();
+ std::string_view host = url.host_piece();
// A valid GURL never returns PORT_INVALID.
int port = url.EffectiveIntPort();
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h
index 0798fec..9938824 100644
--- a/url/scheme_host_port.h
+++ b/url/scheme_host_port.h
@@ -8,9 +8,9 @@
#include <stdint.h>
#include <string>
+#include <string_view>
#include "polyfills/base/component_export.h"
-#include "base/strings/string_piece.h"
class GURL;
@@ -84,9 +84,7 @@
// ports (e.g. 'file').
//
// Copies the data in |scheme| and |host|.
- SchemeHostPort(gurl_base::StringPiece scheme,
- gurl_base::StringPiece host,
- uint16_t port);
+ SchemeHostPort(std::string_view scheme, std::string_view host, uint16_t port);
// Metadata influencing whether or not the constructor should sanity check
// host canonicalization.
diff --git a/url/url_canon.h b/url/url_canon.h
index 55fe426..0af495a 100644
--- a/url/url_canon.h
+++ b/url/url_canon.h
@@ -8,6 +8,8 @@
#include <stdlib.h>
#include <string.h>
+#include <string_view>
+
#include "polyfills/base/component_export.h"
#include "polyfills/base/export_template.h"
#include "polyfills/base/memory/raw_ptr_exclusion.h"
@@ -57,6 +59,11 @@
// write the data, then use set_size() to declare the new length().
size_t capacity() const { return buffer_len_; }
+ // Returns the contents of the buffer as a string_view.
+ std::basic_string_view<T> view() const {
+ return std::basic_string_view<T>(data(), length());
+ }
+
// Called by the user of this class to get the output. The output will NOT
// be NULL-terminated. Call length() to get the
// length.
@@ -102,6 +109,8 @@
cur_len_ += str_len;
}
+ void Append(std::basic_string_view<T> str) { Append(str.data(), str.size()); }
+
void ReserveSizeIfNeeded(size_t estimated_size) {
// Reserve a bit extra to account for escaped chars.
if (estimated_size > buffer_len_)
@@ -278,7 +287,7 @@
//
// On error, returns false. The output in this case is undefined.
COMPONENT_EXPORT(URL)
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output);
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output);
// Piece-by-piece canonicalizers ----------------------------------------------
//
diff --git a/url/url_canon_filesystemurl.cc b/url/url_canon_filesystemurl.cc
index 0472484..f1a9f1c 100644
--- a/url/url_canon_filesystemurl.cc
+++ b/url/url_canon_filesystemurl.cc
@@ -36,7 +36,7 @@
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
- output->Append("filesystem:", 11);
+ output->Append("filesystem:");
new_parsed->scheme.len = 10;
if (!inner_parsed || !inner_parsed->scheme.is_valid())
@@ -46,7 +46,7 @@
SchemeType inner_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
new_inner_parsed.scheme.begin = output->length();
- output->Append("file://", 7);
+ output->Append("file://");
new_inner_parsed.scheme.len = 4;
success &= CanonicalizePath(spec, inner_parsed->path, output,
&new_inner_parsed.path);
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc
index dae5c4c..5c243f6 100644
--- a/url/url_canon_fileurl.cc
+++ b/url/url_canon_fileurl.cc
@@ -4,7 +4,8 @@
// Functions for canonicalizing "file:" URLs.
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -18,13 +19,13 @@
bool IsLocalhost(const char* spec, int begin, int end) {
if (begin > end)
return false;
- return gurl_base::StringPiece(&spec[begin], end - begin) == "localhost";
+ return std::string_view(&spec[begin], end - begin) == "localhost";
}
bool IsLocalhost(const char16_t* spec, int begin, int end) {
if (begin > end)
return false;
- return gurl_base::StringPiece16(&spec[begin], end - begin) == u"localhost";
+ return std::u16string_view(&spec[begin], end - begin) == u"localhost";
}
template <typename CHAR>
@@ -134,7 +135,7 @@
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
- output->Append("file://", 7);
+ output->Append("file://");
new_parsed->scheme.len = 4;
// If the host is localhost, and the path starts with a Windows drive letter,
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index eacc69f..65118b2 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -6,11 +6,14 @@
#include "polyfills/base/cpu_reduction_experiment.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_features.h"
namespace url {
namespace {
+// clang-format off
+//
// For reference, here's what IE supports:
// Key: 0 (disallowed: failure if present in the input)
// + (allowed either escaped or unescaped, and unmodified)
@@ -37,19 +40,15 @@
// I also didn't test if characters affecting HTML parsing are allowed
// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
// Surprisingly, space is accepted in the input and always escaped.
-
+//
+// TODO(https://crbug.com/1416013): Remove the above historical reference
+// information once we are 100% standard compliant to the URL Standard.
+//
// This table lists the canonical version of all characters we allow in the
// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar
// value to indicate that this character should be escaped. We are a little more
// restrictive than IE, but less restrictive than Firefox.
//
-// Note that we disallow the % character. We will allow it when part of an
-// escape sequence, of course, but this disallows "%25". Even though IE allows
-// it, allowing it would put us in a funny state. If there was an invalid
-// escape sequence like "%zz", we'll add "%25zz" to the output and fail.
-// Allowing percents means we'll succeed a second time, so validity would change
-// based on how many times you run the canonicalizer. We prefer to always report
-// the same vailidity, so reject this.
const unsigned char kEsc = 0xff;
const unsigned char kHostCharLookup[0x80] = {
// 00-1f: all are invalid
@@ -68,6 +67,27 @@
// p q r s t u v w x y z { | } ~
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 };
+// The following table is used when kStandardCompliantHostCharLookup feature is
+// enabled. See https://crbug.com/1416013 for details. At present, ' ' (SPACE)
+// and '*' (asterisk) are still non-compliant to the URL Standard.
+const unsigned char kStandardCompliantHostCharLookup[0x80] = {
+// 00-1f: all are invalid
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+// ' ' ! " # $ % & ' ( ) * + , - . /
+ kEsc,'!', '"', 0, '$', 0, '&', '\'','(', ')', kEsc, '+', ',', '-', '.', 0,
+// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';' , 0, '=', 0, 0,
+// @ A B C D E F G H I J K L M N O
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+// P Q R S T U V W X Y Z [ \ ] ^ _
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0, ']', 0, '_',
+// ` a b c d e f g h i j k l m n o
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+// p q r s t u v w x y z { | } ~
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', 0, '}', '~', 0 };
+// clang-format on
+
// RFC1034 maximum FQDN length.
constexpr size_t kMaxHostLength = 253;
@@ -149,7 +169,12 @@
if (source < 0x80) {
// We have ASCII input, we can use our lookup table.
- unsigned char replacement = kHostCharLookup[source];
+ unsigned char replacement;
+ if (url::IsUsingStandardCompliantHostCharacters()) {
+ replacement = kStandardCompliantHostCharLookup[source];
+ } else {
+ replacement = kHostCharLookup[source];
+ }
if (!replacement) {
// Invalid character, add it as percent-escaped and mark as failed.
AppendEscapedChar(source, output);
@@ -189,9 +214,7 @@
}
StackBufferW wide_output;
- if (!IDNToASCII(url_escaped_host.data(),
- url_escaped_host.length(),
- &wide_output)) {
+ if (!IDNToASCII(url_escaped_host.view(), &wide_output)) {
// Some error, give up. This will write some reasonable looking
// representation of the string to the output.
AppendInvalidNarrowString(src, 0, src_len, output);
@@ -381,7 +404,7 @@
// we just leave it in place.
if (host_info->IsIPAddress()) {
output->set_length(output_begin);
- output->Append(canon_ip.data(), canon_ip.length());
+ output->Append(canon_ip.view());
}
} else {
// Canonicalization failed. Set BROKEN to notify the caller.
diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc
index e48b642..cbd4bb4 100644
--- a/url/url_canon_mailtourl.cc
+++ b/url/url_canon_mailtourl.cc
@@ -45,7 +45,7 @@
// Scheme (known, so we don't bother running it through the more
// complicated scheme canonicalizer).
new_parsed->scheme.begin = output->length();
- output->Append("mailto:", 7);
+ output->Append("mailto:");
new_parsed->scheme.len = 6;
bool success = true;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 7f917db..440da70 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -336,8 +336,7 @@
// the last character of the escape sequence.
char unescaped_flags = kPathCharLookup[unescaped_value];
- if (!gurl_base::FeatureList::IsEnabled(
- url::kDontDecodeAsciiPercentEncodedURLPath) &&
+ if (!url::IsUsingDontDecodeAsciiPercentEncodedURLPath() &&
(unescaped_flags & UNESCAPE)) {
// This escaped value shouldn't be escaped. Try to copy it.
unescape_escaped_char = true;
diff --git a/url/url_canon_stdstring.h b/url/url_canon_stdstring.h
index c8f9500..2ddf569 100644
--- a/url/url_canon_stdstring.h
+++ b/url/url_canon_stdstring.h
@@ -10,11 +10,11 @@
// we have segregated it here.
#include <string>
+#include <string_view>
#include "base/compiler_specific.h"
#include "polyfills/base/component_export.h"
#include "polyfills/base/memory/raw_ptr_exclusion.h"
-#include "base/strings/string_piece.h"
#include "url/url_canon.h"
namespace url {
@@ -55,64 +55,64 @@
};
// An extension of the Replacements class that allows the setters to use
-// StringPieces (implicitly allowing strings or char*s).
+// string_views (implicitly allowing strings or char*s).
//
-// The contents of the StringPieces are not copied and must remain valid until
-// the StringPieceReplacements object goes out of scope.
+// The contents of the string_views are not copied and must remain valid until
+// the StringViewReplacements object goes out of scope.
//
// In order to make it harder to misuse the API the setters do not accept rvalue
// references to std::strings.
// Note: Extra const char* overloads are necessary to break ambiguities that
// would otherwise exist for char literals.
template <typename CharT>
-class StringPieceReplacements : public Replacements<CharT> {
+class StringViewReplacements : public Replacements<CharT> {
private:
using StringT = std::basic_string<CharT>;
- using StringPieceT = gurl_base::BasicStringPiece<CharT>;
+ using StringViewT = std::basic_string_view<CharT>;
using ParentT = Replacements<CharT>;
using SetterFun = void (ParentT::*)(const CharT*, const Component&);
- void SetImpl(SetterFun fun, StringPieceT str) {
+ void SetImpl(SetterFun fun, StringViewT str) {
(this->*fun)(str.data(), Component(0, static_cast<int>(str.size())));
}
public:
void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); }
- void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); }
+ void SetSchemeStr(StringViewT str) { SetImpl(&ParentT::SetScheme, str); }
void SetSchemeStr(const StringT&&) = delete;
void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); }
- void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); }
+ void SetUsernameStr(StringViewT str) { SetImpl(&ParentT::SetUsername, str); }
void SetUsernameStr(const StringT&&) = delete;
using ParentT::ClearUsername;
void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); }
- void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); }
+ void SetPasswordStr(StringViewT str) { SetImpl(&ParentT::SetPassword, str); }
void SetPasswordStr(const StringT&&) = delete;
using ParentT::ClearPassword;
void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); }
- void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); }
+ void SetHostStr(StringViewT str) { SetImpl(&ParentT::SetHost, str); }
void SetHostStr(const StringT&&) = delete;
using ParentT::ClearHost;
void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); }
- void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); }
+ void SetPortStr(StringViewT str) { SetImpl(&ParentT::SetPort, str); }
void SetPortStr(const StringT&&) = delete;
using ParentT::ClearPort;
void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); }
- void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); }
+ void SetPathStr(StringViewT str) { SetImpl(&ParentT::SetPath, str); }
void SetPathStr(const StringT&&) = delete;
using ParentT::ClearPath;
void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); }
- void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); }
+ void SetQueryStr(StringViewT str) { SetImpl(&ParentT::SetQuery, str); }
void SetQueryStr(const StringT&&) = delete;
using ParentT::ClearQuery;
void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); }
- void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); }
+ void SetRefStr(StringViewT str) { SetImpl(&ParentT::SetRef, str); }
void SetRefStr(const StringT&&) = delete;
using ParentT::ClearRef;
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index c1b19d2..da4b7b3 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -6,8 +6,8 @@
#include <errno.h>
#include <stddef.h>
+#include <string_view>
-#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/gtest_util.h"
#include "base/test/metrics/histogram_tester.h"
@@ -119,12 +119,12 @@
{0x10FFFF, "\xF4\x8F\xBF\xBF"},
};
std::string out_str;
- for (size_t i = 0; i < std::size(utf_cases); i++) {
+ for (const auto& utf_case : utf_cases) {
out_str.clear();
StdStringCanonOutput output(&out_str);
- AppendUTF8Value(utf_cases[i].input, &output);
+ AppendUTF8Value(utf_case.input, &output);
output.Complete();
- EXPECT_EQ(utf_cases[i].output, out_str);
+ EXPECT_EQ(utf_case.output, out_str);
}
}
@@ -171,27 +171,27 @@
};
std::string out_str;
- for (size_t i = 0; i < std::size(utf_cases); i++) {
- if (utf_cases[i].input8) {
+ for (const auto& utf_case : utf_cases) {
+ if (utf_case.input8) {
out_str.clear();
StdStringCanonOutput output(&out_str);
- size_t input_len = strlen(utf_cases[i].input8);
+ size_t input_len = strlen(utf_case.input8);
bool success = true;
for (size_t ch = 0; ch < input_len; ch++) {
- success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
- &output);
+ success &=
+ AppendUTF8EscapedChar(utf_case.input8, &ch, input_len, &output);
}
output.Complete();
- EXPECT_EQ(utf_cases[i].expected_success, success);
- EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+ EXPECT_EQ(utf_case.expected_success, success);
+ EXPECT_EQ(std::string(utf_case.output), out_str);
}
- if (utf_cases[i].input16) {
+ if (utf_case.input16) {
out_str.clear();
StdStringCanonOutput output(&out_str);
std::u16string input_str(
- test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(utf_case.input16));
size_t input_len = input_str.length();
bool success = true;
for (size_t ch = 0; ch < input_len; ch++) {
@@ -199,18 +199,17 @@
&output);
}
output.Complete();
- EXPECT_EQ(utf_cases[i].expected_success, success);
- EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+ EXPECT_EQ(utf_case.expected_success, success);
+ EXPECT_EQ(std::string(utf_case.output), out_str);
}
- if (utf_cases[i].input8 && utf_cases[i].input16 &&
- utf_cases[i].expected_success) {
+ if (utf_case.input8 && utf_case.input16 && utf_case.expected_success) {
// Check that the UTF-8 and UTF-16 inputs are equivalent.
// UTF-16 -> UTF-8
- std::string input8_str(utf_cases[i].input8);
+ std::string input8_str(utf_case.input8);
std::u16string input16_str(
- test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(utf_case.input16));
EXPECT_EQ(input8_str, gurl_base::UTF16ToUTF8(input16_str));
// UTF-8 -> UTF-16
@@ -242,36 +241,36 @@
std::string out_str;
- for (size_t i = 0; i < std::size(scheme_cases); i++) {
- int url_len = static_cast<int>(strlen(scheme_cases[i].input));
+ for (const auto& scheme_case : scheme_cases) {
+ int url_len = static_cast<int>(strlen(scheme_case.input));
Component in_comp(0, url_len);
Component out_comp;
out_str.clear();
StdStringCanonOutput output1(&out_str);
- bool success = CanonicalizeScheme(scheme_cases[i].input, in_comp, &output1,
- &out_comp);
+ bool success =
+ CanonicalizeScheme(scheme_case.input, in_comp, &output1, &out_comp);
output1.Complete();
- EXPECT_EQ(scheme_cases[i].expected_success, success);
- EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
- EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(scheme_case.expected_success, success);
+ EXPECT_EQ(std::string(scheme_case.expected), out_str);
+ EXPECT_EQ(scheme_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(scheme_case.expected_component.len, out_comp.len);
// Now try the wide version.
out_str.clear();
StdStringCanonOutput output2(&out_str);
- std::u16string wide_input(gurl_base::UTF8ToUTF16(scheme_cases[i].input));
+ std::u16string wide_input(gurl_base::UTF8ToUTF16(scheme_case.input));
in_comp.len = static_cast<int>(wide_input.length());
success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
&out_comp);
output2.Complete();
- EXPECT_EQ(scheme_cases[i].expected_success, success);
- EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
- EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(scheme_case.expected_success, success);
+ EXPECT_EQ(std::string(scheme_case.expected), out_str);
+ EXPECT_EQ(scheme_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(scheme_case.expected_component.len, out_comp.len);
}
// Test the case where the scheme is declared nonexistent, it should be
@@ -315,23 +314,29 @@
TEST_P(URLCanonHostTest, Host) {
bool use_idna_non_transitional = IsUsingIDNA2008NonTransitional();
+ // clang-format off
IPAddressCase host_cases[] = {
// Basic canonicalization, uppercase should be converted to lowercase.
{"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10),
CanonHostInfo::NEUTRAL, -1, ""},
- // Spaces and some other characters should be escaped.
- {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com",
- Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
+ // TODO(https://crbug.com/1416013): Update the test after SPACE is
+ // correctly handled.
+ {"Goo%20 goo.com", L"Goo%20 goo.com", "goo%20%20goo.com",
+ Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
+ // TODO(https://crbug.com/1416013): Update the test after ASTERISK is
+ // correctly handled.
+ {"Goo%2a*goo.com", L"Goo%2a*goo.com", "goo%2A%2Agoo.com",
+ Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
// Exciting different types of spaces!
- {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
+ {nullptr, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
CanonHostInfo::NEUTRAL, -1, ""},
// Other types of space (no-break, zero-width, zero-width-no-break) are
// name-prepped away to nothing.
- {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
+ {nullptr, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
CanonHostInfo::NEUTRAL, -1, ""},
// Ideographic full stop (full-width period for Chinese, etc.) should be
// treated as a dot.
- {NULL,
+ {nullptr,
L"www.foo\x3002"
L"bar.com",
"www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
@@ -437,7 +442,7 @@
// Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
// However, we do allow this at the moment because we don't use
// STD3 rules and canonicalize full-width ASCII to ASCII.
- {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", Component(0, 6),
+ {"wow\xef\xbc\x81", L"wow\xff01", "wow!", Component(0, 4),
CanonHostInfo::NEUTRAL, -1, ""},
// U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
// Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
@@ -575,7 +580,7 @@
// before punycode string was created. I.e.
// if '(' is escaped after punycode is created we would get xn--%28-8tb
// (incorrect).
- {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
+ {"\xd1\x82(", L"\x0442(", "xn--(-8tb", Component(0, 9),
CanonHostInfo::NEUTRAL, -1, ""},
// Address with all hexadecimal characters with leading number of 1<<32
// or greater and should return NEUTRAL rather than BROKEN if not all
@@ -597,37 +602,38 @@
{"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen",
Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
};
+ // clang-format on
// CanonicalizeHost() non-verbose.
std::string out_str;
- for (size_t i = 0; i < std::size(host_cases); i++) {
+ for (const auto& host_case : host_cases) {
// Narrow version.
- if (host_cases[i].input8) {
- int host_len = static_cast<int>(strlen(host_cases[i].input8));
+ if (host_case.input8) {
+ int host_len = static_cast<int>(strlen(host_case.input8));
Component in_comp(0, host_len);
Component out_comp;
out_str.clear();
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizeHost(host_cases[i].input8, in_comp, &output,
- &out_comp);
+ bool success =
+ CanonicalizeHost(host_case.input8, in_comp, &output, &out_comp);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
- success) << "for input: " << host_cases[i].input8;
- EXPECT_EQ(std::string(host_cases[i].expected), out_str) <<
- "for input: " << host_cases[i].input8;
- EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin) <<
- "for input: " << host_cases[i].input8;
- EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len) <<
- "for input: " << host_cases[i].input8;
+ EXPECT_EQ(host_case.expected_family != CanonHostInfo::BROKEN, success)
+ << "for input: " << host_case.input8;
+ EXPECT_EQ(std::string(host_case.expected), out_str)
+ << "for input: " << host_case.input8;
+ EXPECT_EQ(host_case.expected_component.begin, out_comp.begin)
+ << "for input: " << host_case.input8;
+ EXPECT_EQ(host_case.expected_component.len, out_comp.len)
+ << "for input: " << host_case.input8;
}
// Wide version.
- if (host_cases[i].input16) {
+ if (host_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(host_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(host_case.input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
Component out_comp;
@@ -639,46 +645,43 @@
&out_comp);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
- success);
- EXPECT_EQ(std::string(host_cases[i].expected), out_str);
- EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(host_case.expected_family != CanonHostInfo::BROKEN, success);
+ EXPECT_EQ(std::string(host_case.expected), out_str);
+ EXPECT_EQ(host_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(host_case.expected_component.len, out_comp.len);
}
}
// CanonicalizeHostVerbose()
- for (size_t i = 0; i < std::size(host_cases); i++) {
+ for (const auto& host_case : host_cases) {
// Narrow version.
- if (host_cases[i].input8) {
- int host_len = static_cast<int>(strlen(host_cases[i].input8));
+ if (host_case.input8) {
+ int host_len = static_cast<int>(strlen(host_case.input8));
Component in_comp(0, host_len);
out_str.clear();
StdStringCanonOutput output(&out_str);
CanonHostInfo host_info;
- CanonicalizeHostVerbose(host_cases[i].input8, in_comp, &output,
- &host_info);
+ CanonicalizeHostVerbose(host_case.input8, in_comp, &output, &host_info);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(host_cases[i].expected), out_str);
- EXPECT_EQ(host_cases[i].expected_component.begin,
- host_info.out_host.begin);
- EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
+ EXPECT_EQ(host_case.expected_family, host_info.family);
+ EXPECT_EQ(std::string(host_case.expected), out_str);
+ EXPECT_EQ(host_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(host_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(std::string(host_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
- if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
- EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+ if (host_case.expected_family == CanonHostInfo::IPV4) {
+ EXPECT_EQ(host_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
// Wide version.
- if (host_cases[i].input16) {
+ if (host_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(host_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(host_case.input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -689,21 +692,71 @@
CanonicalizeHostVerbose(input16.c_str(), in_comp, &output, &host_info);
output.Complete();
- EXPECT_EQ(host_cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(host_cases[i].expected), out_str);
- EXPECT_EQ(host_cases[i].expected_component.begin,
- host_info.out_host.begin);
- EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
+ EXPECT_EQ(host_case.expected_family, host_info.family);
+ EXPECT_EQ(std::string(host_case.expected), out_str);
+ EXPECT_EQ(host_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(host_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(std::string(host_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
- if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
- EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+ if (host_case.expected_family == CanonHostInfo::IPV4) {
+ EXPECT_EQ(host_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
}
}
+TEST(URLCanonTest, HostPuncutationChar) {
+ // '%' is not tested here. '%' is used for percent-escaping.
+ const std::string_view allowed_host_chars[] = {
+ "!", "\"", "$", "&", "'", "(", ")", "+", ",",
+ "-", ".", ";", "=", "_", "`", "{", "}", "~",
+ };
+
+ const std::string_view forbidden_host_chars[] = {
+ "#", "/", ":", "<", ">", "?", "@", "[", "\\", "]", "^", "|",
+ };
+
+ // Standard non-compliant characters which are escaped. See
+ // https://crbug.com/1416013.
+ struct EscapedCharTestCase {
+ std::string_view input;
+ std::string_view expected;
+ } escaped_host_chars[] = {{" ", "%20"}, {"*", "%2A"}};
+
+ for (const std::string_view input : allowed_host_chars) {
+ std::string out_str;
+ Component in_comp(0, input.size());
+ Component out_comp;
+ StdStringCanonOutput output(&out_str);
+ bool success = CanonicalizeHost(input.data(), in_comp, &output, &out_comp);
+ EXPECT_TRUE(success) << "Input: " << input;
+ output.Complete();
+ EXPECT_EQ(out_str, input) << "Input: " << input;
+ }
+
+ for (const std::string_view input : forbidden_host_chars) {
+ std::string out_str;
+ Component in_comp(0, input.size());
+ Component out_comp;
+ StdStringCanonOutput output(&out_str);
+ EXPECT_FALSE(CanonicalizeHost(input.data(), in_comp, &output, &out_comp))
+ << "Input: " << input;
+ }
+
+ for (const auto& c : escaped_host_chars) {
+ std::string out_str;
+ Component in_comp(0, c.input.size());
+ Component out_comp;
+ StdStringCanonOutput output(&out_str);
+ bool success =
+ CanonicalizeHost(c.input.data(), in_comp, &output, &out_comp);
+ EXPECT_TRUE(success) << "Input: " << c.input;
+ output.Complete();
+ EXPECT_EQ(out_str, c.expected) << "Input: " << c.input;
+ }
+}
+
TEST(URLCanonTest, IPv4) {
// clang-format off
IPAddressCase cases[] = {
@@ -1130,34 +1183,30 @@
{"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
};
- for (size_t i = 0; i < std::size(user_info_cases); i++) {
- int url_len = static_cast<int>(strlen(user_info_cases[i].input));
+ for (const auto& user_info_case : user_info_cases) {
+ int url_len = static_cast<int>(strlen(user_info_case.input));
Parsed parsed;
- ParseStandardURL(user_info_cases[i].input, url_len, &parsed);
+ ParseStandardURL(user_info_case.input, url_len, &parsed);
Component out_user, out_pass;
std::string out_str;
StdStringCanonOutput output1(&out_str);
- bool success = CanonicalizeUserInfo(user_info_cases[i].input,
- parsed.username,
- user_info_cases[i].input,
- parsed.password,
- &output1,
- &out_user,
- &out_pass);
+ bool success = CanonicalizeUserInfo(user_info_case.input, parsed.username,
+ user_info_case.input, parsed.password,
+ &output1, &out_user, &out_pass);
output1.Complete();
- EXPECT_EQ(user_info_cases[i].expected_success, success);
- EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
- EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
- EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
- EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
- EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+ EXPECT_EQ(user_info_case.expected_success, success);
+ EXPECT_EQ(std::string(user_info_case.expected), out_str);
+ EXPECT_EQ(user_info_case.expected_username.begin, out_user.begin);
+ EXPECT_EQ(user_info_case.expected_username.len, out_user.len);
+ EXPECT_EQ(user_info_case.expected_password.begin, out_pass.begin);
+ EXPECT_EQ(user_info_case.expected_password.len, out_pass.len);
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- std::u16string wide_input(gurl_base::UTF8ToUTF16(user_info_cases[i].input));
+ std::u16string wide_input(gurl_base::UTF8ToUTF16(user_info_case.input));
success = CanonicalizeUserInfo(wide_input.c_str(),
parsed.username,
wide_input.c_str(),
@@ -1167,12 +1216,12 @@
&out_pass);
output2.Complete();
- EXPECT_EQ(user_info_cases[i].expected_success, success);
- EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
- EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
- EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
- EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
- EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+ EXPECT_EQ(user_info_case.expected_success, success);
+ EXPECT_EQ(std::string(user_info_case.expected), out_str);
+ EXPECT_EQ(user_info_case.expected_username.begin, out_user.begin);
+ EXPECT_EQ(user_info_case.expected_username.len, out_user.len);
+ EXPECT_EQ(user_info_case.expected_password.begin, out_pass.begin);
+ EXPECT_EQ(user_info_case.expected_password.len, out_pass.len);
}
}
@@ -1199,39 +1248,33 @@
{"80", PORT_UNSPECIFIED, ":80", Component(1, 2), true},
};
- for (size_t i = 0; i < std::size(port_cases); i++) {
- int url_len = static_cast<int>(strlen(port_cases[i].input));
+ for (const auto& port_case : port_cases) {
+ int url_len = static_cast<int>(strlen(port_case.input));
Component in_comp(0, url_len);
Component out_comp;
std::string out_str;
StdStringCanonOutput output1(&out_str);
- bool success = CanonicalizePort(port_cases[i].input,
- in_comp,
- port_cases[i].default_port,
- &output1,
- &out_comp);
+ bool success = CanonicalizePort(
+ port_case.input, in_comp, port_case.default_port, &output1, &out_comp);
output1.Complete();
- EXPECT_EQ(port_cases[i].expected_success, success);
- EXPECT_EQ(std::string(port_cases[i].expected), out_str);
- EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(port_case.expected_success, success);
+ EXPECT_EQ(std::string(port_case.expected), out_str);
+ EXPECT_EQ(port_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(port_case.expected_component.len, out_comp.len);
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- std::u16string wide_input(gurl_base::UTF8ToUTF16(port_cases[i].input));
- success = CanonicalizePort(wide_input.c_str(),
- in_comp,
- port_cases[i].default_port,
- &output2,
- &out_comp);
+ std::u16string wide_input(gurl_base::UTF8ToUTF16(port_case.input));
+ success = CanonicalizePort(wide_input.c_str(), in_comp,
+ port_case.default_port, &output2, &out_comp);
output2.Complete();
- EXPECT_EQ(port_cases[i].expected_success, success);
- EXPECT_EQ(std::string(port_cases[i].expected), out_str);
- EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+ EXPECT_EQ(port_case.expected_success, success);
+ EXPECT_EQ(std::string(port_case.expected), out_str);
+ EXPECT_EQ(port_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(port_case.expected_component.len, out_comp.len);
}
}
@@ -1437,34 +1480,34 @@
{"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
};
- for (size_t i = 0; i < std::size(query_cases); i++) {
+ for (const auto& query_case : query_cases) {
Component out_comp;
- if (query_cases[i].input8) {
- int len = static_cast<int>(strlen(query_cases[i].input8));
+ if (query_case.input8) {
+ int len = static_cast<int>(strlen(query_case.input8));
Component in_comp(0, len);
std::string out_str;
StdStringCanonOutput output(&out_str);
- CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,
+ CanonicalizeQuery(query_case.input8, in_comp, nullptr, &output,
&out_comp);
output.Complete();
- EXPECT_EQ(query_cases[i].expected, out_str);
+ EXPECT_EQ(query_case.expected, out_str);
}
- if (query_cases[i].input16) {
+ if (query_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(query_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(query_case.input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
std::string out_str;
StdStringCanonOutput output(&out_str);
- CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);
+ CanonicalizeQuery(input16.c_str(), in_comp, nullptr, &output, &out_comp);
output.Complete();
- EXPECT_EQ(query_cases[i].expected, out_str);
+ EXPECT_EQ(query_case.expected, out_str);
}
}
@@ -1472,7 +1515,8 @@
std::string out_str;
StdStringCanonOutput output(&out_str);
Component out_comp;
- CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
+ CanonicalizeQuery("a \x00z\x01", Component(0, 5), nullptr, &output,
+ &out_comp);
output.Complete();
EXPECT_EQ("?a%20%00z%01", out_str);
}
@@ -1509,27 +1553,27 @@
{"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
};
- for (size_t i = 0; i < std::size(ref_cases); i++) {
+ for (const auto& ref_case : ref_cases) {
// 8-bit input
- if (ref_cases[i].input8) {
- int len = static_cast<int>(strlen(ref_cases[i].input8));
+ if (ref_case.input8) {
+ int len = static_cast<int>(strlen(ref_case.input8));
Component in_comp(0, len);
Component out_comp;
std::string out_str;
StdStringCanonOutput output(&out_str);
- CanonicalizeRef(ref_cases[i].input8, in_comp, &output, &out_comp);
+ CanonicalizeRef(ref_case.input8, in_comp, &output, &out_comp);
output.Complete();
- EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
- EXPECT_EQ(ref_cases[i].expected, out_str);
+ EXPECT_EQ(ref_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(ref_case.expected_component.len, out_comp.len);
+ EXPECT_EQ(ref_case.expected, out_str);
}
// 16-bit input
- if (ref_cases[i].input16) {
+ if (ref_case.input16) {
std::u16string input16(
- test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
+ test_utils::TruncateWStringToUTF16(ref_case.input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
Component out_comp;
@@ -1539,9 +1583,9 @@
CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp);
output.Complete();
- EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
- EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
- EXPECT_EQ(ref_cases[i].expected, out_str);
+ EXPECT_EQ(ref_case.expected_component.begin, out_comp.begin);
+ EXPECT_EQ(ref_case.expected_component.len, out_comp.len);
+ EXPECT_EQ(ref_case.expected, out_str);
}
}
@@ -1564,24 +1608,14 @@
// The individual component canonicalize tests should have caught the cases
// for each of those components. Here, we just need to test that the various
// parts are included or excluded properly, and have the correct separators.
+ // clang-format off
struct URLCase {
const char* input;
const char* expected;
bool expected_success;
} cases[] = {
- {"http://www.google.com/foo?bar=baz#",
- "http://www.google.com/foo?bar=baz#", true},
- {"http://[www.google.com]/", "http://[www.google.com]/", false},
- {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#",
- false},
- {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo",
- true},
- {"www.google.com", ":www.google.com/", false},
- {"http://192.0x00A80001", "http://192.168.0.1/", true},
- {"http://www/foo%2Ehtml", "http://www/foo.html", true},
- {"http://user:pass@/", "http://user:pass@/", false},
- {"http://%25DOMAIN:foobar@foodomain.com/",
- "http://%25DOMAIN:foobar@foodomain.com/", true},
+ {"http://www.google.com/foo?bar=baz#", "http://www.google.com/foo?bar=baz#",
+ true},
// Backslashes should get converted to forward slashes.
{"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},
@@ -1618,27 +1652,28 @@
// ICU will convert to an ASCII one, generating "%81".
{"ws:)W\x1eW\xef\xb9\xaa"
"81:80/",
- "ws://%29w%1ew%81/", false},
+ "ws://)w%1ew%81/", false},
// Regression test for the last_invalid_percent_index bug described in
// https://crbug.com/1080890#c10.
{R"(HTTP:S/5%\../>%41)", "http://s/%3E%41", true},
};
+ // clang-format on
- for (size_t i = 0; i < std::size(cases); i++) {
- int url_len = static_cast<int>(strlen(cases[i].input));
+ for (const auto& i : cases) {
+ int url_len = static_cast<int>(strlen(i.input));
Parsed parsed;
- ParseStandardURL(cases[i].input, url_len, &parsed);
+ ParseStandardURL(i.input, url_len, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
bool success = CanonicalizeStandardURL(
- cases[i].input, url_len, parsed,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed);
+ i.input, url_len, parsed, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
+ nullptr, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(cases[i].expected_success, success);
- EXPECT_EQ(cases[i].expected, out_str);
+ EXPECT_EQ(i.expected_success, success);
+ EXPECT_EQ(i.expected, out_str);
}
}
@@ -1665,8 +1700,8 @@
"filesystem://a:b@google.com:22/foo?baz@cat"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseStandardURL(cur.base, base_len, &parsed);
@@ -1688,12 +1723,12 @@
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
- ReplaceStandardURL(replace_cases[i].base, parsed, r,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+ ReplaceStandardURL(replace_case.base, parsed, r,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
// The path pointer should be ignored if the address is invalid.
@@ -1712,7 +1747,7 @@
StdStringCanonOutput output1(&out_str1);
Parsed new_parsed;
ReplaceStandardURL(src, parsed, r,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&output1, &new_parsed);
output1.Complete();
EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
@@ -1722,7 +1757,7 @@
std::string out_str2;
StdStringCanonOutput output2(&out_str2);
ReplaceStandardURL(src, parsed, r,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr,
&output2, &new_parsed);
output2.Complete();
EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
@@ -1765,8 +1800,8 @@
nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
SCOPED_TRACE(cur.base);
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
@@ -1786,10 +1821,10 @@
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
- ReplaceFileURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+ ReplaceFileURL(cur.base, parsed, r, nullptr, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
@@ -1834,8 +1869,8 @@
"filesystem:http://bar.com:40/t/gaba?query#ref"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseFileSystemURL(cur.base, base_len, &parsed);
@@ -1854,10 +1889,10 @@
std::string out_str;
StdStringCanonOutput output(&out_str);
Parsed out_parsed;
- ReplaceFileSystemURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+ ReplaceFileSystemURL(cur.base, parsed, r, nullptr, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
@@ -1878,8 +1913,8 @@
nullptr, nullptr, "data:"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParsePathURL(cur.base, base_len, false, &parsed);
@@ -1901,36 +1936,46 @@
ReplacePathURL(cur.base, parsed, r, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
TEST(URLCanonTest, ReplaceMailtoURL) {
ReplaceCase replace_cases[] = {
// Replace everything
- {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"},
+ {"mailto:jon@foo.com?body=sup", "mailto", nullptr, nullptr, nullptr,
+ nullptr, "addr1", "to=tony", nullptr, "mailto:addr1?to=tony"},
// Replace nothing
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, "mailto:jon@foo.com?body=sup"},
// Replace the path
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, "jason", nullptr, nullptr, "mailto:jason?body=sup"},
// Replace the query
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "custom=1", nullptr, "mailto:jon@foo.com?custom=1"},
// Replace the path and query
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, "jason", "custom=1", nullptr, "mailto:jason?custom=1"},
// Set the query to empty (should leave trailing question mark)
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "", nullptr, "mailto:jon@foo.com?"},
// Clear the query
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "|", nullptr, "mailto:jon@foo.com"},
// Clear the path
- {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"},
+ {"mailto:jon@foo.com?body=sup", nullptr, nullptr, nullptr, nullptr,
+ nullptr, "|", nullptr, nullptr, "mailto:?body=sup"},
// Clear the path + query
- {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"},
+ {"mailto:", nullptr, nullptr, nullptr, nullptr, nullptr, "|", "|",
+ nullptr, "mailto:"},
// Setting the ref should have no effect
- {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"},
+ {"mailto:addr1", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, "BLAH", "mailto:addr1"},
};
- for (size_t i = 0; i < std::size(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
+ for (const auto& replace_case : replace_cases) {
+ const ReplaceCase& cur = replace_case;
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseMailtoURL(cur.base, base_len, &parsed);
@@ -1952,7 +1997,7 @@
ReplaceMailtoURL(cur.base, parsed, r, &output, &out_parsed);
output.Complete();
- EXPECT_EQ(replace_cases[i].expected, out_str);
+ EXPECT_EQ(replace_case.expected, out_str);
}
}
@@ -2042,31 +2087,31 @@
#endif // _WIN32
};
- for (size_t i = 0; i < std::size(cases); i++) {
- int url_len = static_cast<int>(strlen(cases[i].input));
+ for (const auto& i : cases) {
+ int url_len = static_cast<int>(strlen(i.input));
Parsed parsed;
- ParseFileURL(cases[i].input, url_len, &parsed);
+ ParseFileURL(i.input, url_len, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizeFileURL(cases[i].input, url_len, parsed, NULL,
+ bool success = CanonicalizeFileURL(i.input, url_len, parsed, nullptr,
&output, &out_parsed);
output.Complete();
- EXPECT_EQ(cases[i].expected_success, success);
- EXPECT_EQ(cases[i].expected, out_str);
+ EXPECT_EQ(i.expected_success, success);
+ EXPECT_EQ(i.expected, out_str);
// Make sure the spec was properly identified, the file canonicalizer has
// different code for writing the spec.
EXPECT_EQ(0, out_parsed.scheme.begin);
EXPECT_EQ(4, out_parsed.scheme.len);
- EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin);
- EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len);
+ EXPECT_EQ(i.expected_host.begin, out_parsed.host.begin);
+ EXPECT_EQ(i.expected_host.len, out_parsed.host.len);
- EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
- EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
+ EXPECT_EQ(i.expected_path.begin, out_parsed.path.begin);
+ EXPECT_EQ(i.expected_path.len, out_parsed.path.len);
}
}
@@ -2088,23 +2133,23 @@
{"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true},
{"filesystem:File:///temporary/Bob?qUery#reF",
"filesystem:file:///temporary/Bob?qUery#reF", true},
- {"FilEsysteM:htTp:E=/.", "filesystem:http://e%3D//", false},
+ {"FilEsysteM:htTp:E=/.", "filesystem:http://e=//", false},
};
- for (size_t i = 0; i < std::size(cases); i++) {
- int url_len = static_cast<int>(strlen(cases[i].input));
+ for (const auto& i : cases) {
+ int url_len = static_cast<int>(strlen(i.input));
Parsed parsed;
- ParseFileSystemURL(cases[i].input, url_len, &parsed);
+ ParseFileSystemURL(i.input, url_len, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizeFileSystemURL(cases[i].input, url_len, parsed,
- NULL, &output, &out_parsed);
+ bool success = CanonicalizeFileSystemURL(i.input, url_len, parsed, nullptr,
+ &output, &out_parsed);
output.Complete();
- EXPECT_EQ(cases[i].expected_success, success);
- EXPECT_EQ(cases[i].expected, out_str);
+ EXPECT_EQ(i.expected_success, success);
+ EXPECT_EQ(i.expected, out_str);
// Make sure the spec was properly identified, the filesystem canonicalizer
// has different code for writing the spec.
@@ -2130,26 +2175,26 @@
{"javascript:\uFFFF", "javascript:%EF%BF%BF"},
};
- for (size_t i = 0; i < std::size(path_cases); i++) {
- int url_len = static_cast<int>(strlen(path_cases[i].input));
+ for (const auto& path_case : path_cases) {
+ int url_len = static_cast<int>(strlen(path_case.input));
Parsed parsed;
- ParsePathURL(path_cases[i].input, url_len, true, &parsed);
+ ParsePathURL(path_case.input, url_len, true, &parsed);
Parsed out_parsed;
std::string out_str;
StdStringCanonOutput output(&out_str);
- bool success = CanonicalizePathURL(path_cases[i].input, url_len, parsed,
+ bool success = CanonicalizePathURL(path_case.input, url_len, parsed,
&output, &out_parsed);
output.Complete();
EXPECT_TRUE(success);
- EXPECT_EQ(path_cases[i].expected, out_str);
+ EXPECT_EQ(path_case.expected, out_str);
EXPECT_EQ(0, out_parsed.host.begin);
EXPECT_EQ(-1, out_parsed.host.len);
// When we end with a colon at the end, there should be no path.
- if (path_cases[i].input[url_len - 1] == ':') {
+ if (path_case.input[url_len - 1] == ':') {
EXPECT_EQ(0, out_parsed.GetContent().begin);
EXPECT_EQ(-1, out_parsed.GetContent().len);
}
@@ -2168,20 +2213,20 @@
{"\uFFFF", L"\uFFFF", "%EF%BF%BF"},
};
- for (size_t i = 0; i < std::size(path_cases); i++) {
+ for (const auto& path_case : path_cases) {
// 8-bit string input
std::string out_str;
StdStringCanonOutput output(&out_str);
url::Component out_component;
- CanonicalizePathURLPath(path_cases[i].input.data(),
- Component(0, path_cases[i].input.size()), &output,
+ CanonicalizePathURLPath(path_case.input.data(),
+ Component(0, path_case.input.size()), &output,
&out_component);
output.Complete();
- EXPECT_EQ(path_cases[i].expected, out_str);
+ EXPECT_EQ(path_case.expected, out_str);
EXPECT_EQ(0, out_component.begin);
- EXPECT_EQ(path_cases[i].expected.size(),
+ EXPECT_EQ(path_case.expected.size(),
static_cast<size_t>(out_component.len));
// 16-bit string input
@@ -2189,16 +2234,16 @@
StdStringCanonOutput output16(&out_str16);
url::Component out_component16;
std::u16string input16(
- test_utils::TruncateWStringToUTF16(path_cases[i].input16.data()));
+ test_utils::TruncateWStringToUTF16(path_case.input16.data()));
CanonicalizePathURLPath(input16.c_str(),
- Component(0, path_cases[i].input16.size()),
- &output16, &out_component16);
+ Component(0, path_case.input16.size()), &output16,
+ &out_component16);
output16.Complete();
- EXPECT_EQ(path_cases[i].expected, out_str16);
+ EXPECT_EQ(path_case.expected, out_str16);
EXPECT_EQ(0, out_component16.begin);
- EXPECT_EQ(path_cases[i].expected.size(),
+ EXPECT_EQ(path_case.expected.size(),
static_cast<size_t>(out_component16.len));
}
}
@@ -2393,148 +2438,209 @@
const char* resolved; // What we expect in the result when resolving.
} rel_cases[] = {
// Basic absolute input.
- {"http://host/a", true, false, "http://another/", true, false, false, NULL},
- {"http://host/a", true, false, "http:////another/", true, false, false, NULL},
+ {"http://host/a", true, false, "http://another/", true, false, false,
+ nullptr},
+ {"http://host/a", true, false, "http:////another/", true, false, false,
+ nullptr},
// Empty relative URLs should only remove the ref part of the URL,
// leaving the rest unchanged.
- {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
- {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"},
- {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
+ {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
+ {"http://foo/bar#ref", true, false, "", true, true, true,
+ "http://foo/bar"},
+ {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
// Spaces at the ends of the relative path should be ignored.
- {"http://foo/bar", true, false, " another ", true, true, true, "http://foo/another"},
- {"http://foo/bar", true, false, " . ", true, true, true, "http://foo/"},
- {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"},
+ {"http://foo/bar", true, false, " another ", true, true, true,
+ "http://foo/another"},
+ {"http://foo/bar", true, false, " . ", true, true, true, "http://foo/"},
+ {"http://foo/bar", true, false, " \t ", true, true, true,
+ "http://foo/bar"},
// Matching schemes without two slashes are treated as relative.
- {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"},
- {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"},
- {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"},
- {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"},
+ {"http://host/a", true, false, "http:path", true, true, true,
+ "http://host/path"},
+ {"http://host/a/", true, false, "http:path", true, true, true,
+ "http://host/a/path"},
+ {"http://host/a", true, false, "http:/path", true, true, true,
+ "http://host/path"},
+ {"http://host/a", true, false, "HTTP:/path", true, true, true,
+ "http://host/path"},
// Nonmatching schemes are absolute.
- {"http://host/a", true, false, "https:host2", true, false, false, NULL},
- {"http://host/a", true, false, "htto:/host2", true, false, false, NULL},
+ {"http://host/a", true, false, "https:host2", true, false, false,
+ nullptr},
+ {"http://host/a", true, false, "htto:/host2", true, false, false,
+ nullptr},
// Absolute path input
- {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"},
- {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"},
- {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"},
- {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"},
- {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"},
- {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"},
+ {"http://host/a", true, false, "/b/c/d", true, true, true,
+ "http://host/b/c/d"},
+ {"http://host/a", true, false, "\\b\\c\\d", true, true, true,
+ "http://host/b/c/d"},
+ {"http://host/a", true, false, "/b/../c", true, true, true,
+ "http://host/c"},
+ {"http://host/a?b#c", true, false, "/b/../c", true, true, true,
+ "http://host/c"},
+ {"http://host/a", true, false, "\\b/../c?x#y", true, true, true,
+ "http://host/c?x#y"},
+ {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true,
+ "http://host/c?x#y"},
// Relative path input
- {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
- {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"},
- {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"},
- {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
- {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
- {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
- {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
- {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"},
- {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"},
+ {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
+ {"http://host/a", true, false, "bc/de", true, true, true,
+ "http://host/bc/de"},
+ {"http://host/a/", true, false, "bc/de?query#ref", true, true, true,
+ "http://host/a/bc/de?query#ref"},
+ {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
+ {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
+ {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
+ {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
+ {"http://host/a/", true, false, "././.", true, true, true,
+ "http://host/a/"},
+ {"http://host/a?query#ref", true, false, "../../../foo", true, true, true,
+ "http://host/foo"},
// Query input
- {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"},
- {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"},
- {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"},
+ {"http://host/a", true, false, "?foo=bar", true, true, true,
+ "http://host/a?foo=bar"},
+ {"http://host/a?x=y#z", true, false, "?", true, true, true,
+ "http://host/a?"},
+ {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true,
+ "http://host/a?foo=bar#com"},
// Ref input
- {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"},
- {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
- {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"},
+ {"http://host/a", true, false, "#ref", true, true, true,
+ "http://host/a#ref"},
+ {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
+ {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true,
+ "http://host/a?foo=bar#bye"},
// Non-hierarchical base: no relative handling. Relative input should
// error, and if a scheme is present, it should be treated as absolute.
- {"data:foobar", false, false, "baz.html", false, false, false, NULL},
- {"data:foobar", false, false, "data:baz", true, false, false, NULL},
- {"data:foobar", false, false, "data:/base", true, false, false, NULL},
+ {"data:foobar", false, false, "baz.html", false, false, false, nullptr},
+ {"data:foobar", false, false, "data:baz", true, false, false, nullptr},
+ {"data:foobar", false, false, "data:/base", true, false, false, nullptr},
// Non-hierarchical base: absolute input should succeed.
- {"data:foobar", false, false, "http://host/", true, false, false, NULL},
- {"data:foobar", false, false, "http:host", true, false, false, NULL},
+ {"data:foobar", false, false, "http://host/", true, false, false,
+ nullptr},
+ {"data:foobar", false, false, "http:host", true, false, false, nullptr},
// Non-hierarchical base: empty URL should give error.
- {"data:foobar", false, false, "", false, false, false, NULL},
+ {"data:foobar", false, false, "", false, false, false, nullptr},
// Invalid schemes should be treated as relative.
- {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"},
- {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"},
- {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"},
- {"data:asdf", false, false, ":foo", false, false, false, NULL},
- {"data:asdf", false, false, "bad(':foo')", false, false, false, NULL},
+ {"http://foo/bar", true, false, "./asd:fgh", true, true, true,
+ "http://foo/asd:fgh"},
+ {"http://foo/bar", true, false, ":foo", true, true, true,
+ "http://foo/:foo"},
+ {"http://foo/bar", true, false, " hello world", true, true, true,
+ "http://foo/hello%20world"},
+ {"data:asdf", false, false, ":foo", false, false, false, nullptr},
+ {"data:asdf", false, false, "bad(':foo')", false, false, false, nullptr},
// We should treat semicolons like any other character in URL resolving
- {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"},
- {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"},
- {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"},
+ {"http://host/a", true, false, ";foo", true, true, true,
+ "http://host/;foo"},
+ {"http://host/a;", true, false, ";foo", true, true, true,
+ "http://host/;foo"},
+ {"http://host/a", true, false, ";/../bar", true, true, true,
+ "http://host/bar"},
// Relative URLs can also be written as "//foo/bar" which is relative to
// the scheme. In this case, it would take the old scheme, so for http
// the example would resolve to "http://foo/bar".
- {"http://host/a", true, false, "//another", true, true, true, "http://another/"},
- {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"},
- {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"},
- {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"},
- {"http://host/a", true, false, "//", true, true, false, "http:"},
+ {"http://host/a", true, false, "//another", true, true, true,
+ "http://another/"},
+ {"http://host/a", true, false, "//another/path?query#ref", true, true,
+ true, "http://another/path?query#ref"},
+ {"http://host/a", true, false, "///another/path", true, true, true,
+ "http://another/path"},
+ {"http://host/a", true, false, "//Another\\path", true, true, true,
+ "http://another/path"},
+ {"http://host/a", true, false, "//", true, true, false, "http:"},
// IE will also allow one or the other to be a backslash to get the same
// behavior.
- {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"},
- {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"},
+ {"http://host/a", true, false, "\\/another/path", true, true, true,
+ "http://another/path"},
+ {"http://host/a", true, false, "/\\Another\\path", true, true, true,
+ "http://another/path"},
#ifdef WIN32
// Resolving against Windows file base URLs.
- {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL},
- {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
- {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"},
- {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"},
+ {"file:///C:/foo", true, true, "http://host/", true, false, false,
+ nullptr},
+ {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
+ {"file:///C:/foo", true, true, "../../../bar.html", true, true, true,
+ "file:///C:/bar.html"},
+ {"file:///C:/foo", true, true, "/../bar.html", true, true, true,
+ "file:///C:/bar.html"},
// But two backslashes on Windows should be UNC so should be treated
// as absolute.
- {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL},
+ {"http://host/a", true, false, "\\\\another\\path", true, false, false,
+ nullptr},
// IE doesn't support drive specs starting with two slashes. It fails
// immediately and doesn't even try to load. We fix it up to either
// an absolute path or UNC depending on what it looks like.
- {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"},
- {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"},
+ {"file:///C:/something", true, true, "//c:/foo", true, true, true,
+ "file:///C:/foo"},
+ {"file:///C:/something", true, true, "//localhost/c:/foo", true, true,
+ true, "file:///C:/foo"},
// Windows drive specs should be allowed and treated as absolute.
- {"file:///C:/foo", true, true, "c:", true, false, false, NULL},
- {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL},
- {"http://host/a", true, false, "c:\\foo", true, false, false, NULL},
+ {"file:///C:/foo", true, true, "c:", true, false, false, nullptr},
+ {"file:///C:/foo", true, true, "c:/foo", true, false, false, nullptr},
+ {"http://host/a", true, false, "c:\\foo", true, false, false, nullptr},
// Relative paths with drive letters should be allowed when the base is
// also a file.
- {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"},
+ {"file:///C:/foo", true, true, "/z:/bar", true, true, true,
+ "file:///Z:/bar"},
// Treat absolute paths as being off of the drive.
- {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"},
- {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"},
- {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"},
+ {"file:///C:/foo", true, true, "/bar", true, true, true,
+ "file:///C:/bar"},
+ {"file://localhost/C:/foo", true, true, "/bar", true, true, true,
+ "file://localhost/C:/bar"},
+ {"file:///C:/foo/com/", true, true, "/bar", true, true, true,
+ "file:///C:/bar"},
// On Windows, two slashes without a drive letter when the base is a file
// means that the path is UNC.
- {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"},
- {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"},
+ {"file:///C:/something", true, true, "//somehost/path", true, true, true,
+ "file://somehost/path"},
+ {"file:///C:/something", true, true, "/\\//somehost/path", true, true,
+ true, "file://somehost/path"},
#else
// On Unix we fall back to relative behavior since there's nothing else
// reasonable to do.
- {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"},
+ {"http://host/a", true, false, "\\\\Another\\path", true, true, true,
+ "http://another/path"},
#endif
// Even on Windows, we don't allow relative drive specs when the base
// is not file.
- {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
- {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+ {"http://host/a", true, false, "/c:\\foo", true, true, true,
+ "http://host/c:/foo"},
+ {"http://host/a", true, false, "//c:\\foo", true, true, true,
+ "http://c/foo"},
// Cross-platform relative file: resolution behavior.
- {"file://host/a", true, true, "/", true, true, true, "file://host/"},
- {"file://host/a", true, true, "//", true, true, true, "file:///"},
- {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
- {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
+ {"file://host/a", true, true, "/", true, true, true, "file://host/"},
+ {"file://host/a", true, true, "//", true, true, true, "file:///"},
+ {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
+ {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
// Ensure that ports aren't allowed for hosts relative to a file url.
// Although the result string shows a host:port portion, the call to
// resolve the relative URL returns false, indicating parse failure,
// which is what is required.
- {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
+ {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false,
+ "file://host:80/bar.txt"},
// Filesystem URL tests; filesystem URLs are only valid and relative if
// they have no scheme, e.g. "./index.html". There's no valid equivalent
// to http:index.html.
- {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
- {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
- {"filesystem:http://host/t/path", true, false, "http://host/t/path2", true, false, false, NULL},
- {"http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
- {"filesystem:http://host/t/path", true, false, "./path2", true, true, true, "filesystem:http://host/t/path2"},
- {"filesystem:http://host/t/path/", true, false, "path2", true, true, true, "filesystem:http://host/t/path/path2"},
- {"filesystem:http://host/t/path", true, false, "filesystem:http:path2", true, false, false, NULL},
+ {"filesystem:http://host/t/path", true, false,
+ "filesystem:http://host/t/path2", true, false, false, nullptr},
+ {"filesystem:http://host/t/path", true, false,
+ "filesystem:https://host/t/path2", true, false, false, nullptr},
+ {"filesystem:http://host/t/path", true, false, "http://host/t/path2",
+ true, false, false, nullptr},
+ {"http://host/t/path", true, false, "filesystem:http://host/t/path2",
+ true, false, false, nullptr},
+ {"filesystem:http://host/t/path", true, false, "./path2", true, true,
+ true, "filesystem:http://host/t/path2"},
+ {"filesystem:http://host/t/path/", true, false, "path2", true, true, true,
+ "filesystem:http://host/t/path/path2"},
+ {"filesystem:http://host/t/path", true, false, "filesystem:http:path2",
+ true, false, false, nullptr},
// Absolute URLs are still not relative to a non-standard base URL.
- {"about:blank", false, false, "http://X/A", true, false, true, ""},
- {"about:blank", false, false, "content://content.Provider/", true, false, true, ""},
+ {"about:blank", false, false, "http://X/A", true, false, true, ""},
+ {"about:blank", false, false, "content://content.Provider/", true, false,
+ true, ""},
};
- for (size_t i = 0; i < std::size(rel_cases); i++) {
- const RelativeCase& cur_case = rel_cases[i];
-
+ for (const auto& cur_case : rel_cases) {
Parsed parsed;
int base_len = static_cast<int>(strlen(cur_case.base));
if (cur_case.is_base_file)
@@ -2564,7 +2670,7 @@
bool succeed_resolve = ResolveRelativeURL(
cur_case.base, parsed, cur_case.is_base_file, cur_case.test,
- relative_component, NULL, &output, &resolved_parsed);
+ relative_component, nullptr, &output, &resolved_parsed);
output.Complete();
EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve);
@@ -2614,7 +2720,7 @@
Parsed repl_parsed;
std::string repl_str;
StdStringCanonOutput repl_output(&repl_str);
- ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed);
+ ReplaceFileURL(src, parsed, repl, nullptr, &repl_output, &repl_parsed);
repl_output.Complete();
// Generate the expected string and check.
@@ -2642,7 +2748,7 @@
{"WSS", PORT_UNSPECIFIED},
};
- for (auto& test_case : cases) {
+ for (const auto& test_case : cases) {
SCOPED_TRACE(test_case.scheme);
EXPECT_EQ(test_case.expected_port,
DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme)));
@@ -2651,7 +2757,7 @@
TEST(URLCanonTest, FindWindowsDriveLetter) {
struct TestCase {
- gurl_base::StringPiece spec;
+ std::string_view spec;
int begin;
int end; // -1 for end of spec
int expected_drive_letter_pos;
@@ -2695,33 +2801,33 @@
// Basic ASCII test.
std::u16string str = u"hello";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"hello", std::u16string(output.data()));
output.set_length(0);
// Mixed ASCII/non-ASCII.
str = u"hellö";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// All non-ASCII.
str = u"你好";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data()));
output.set_length(0);
// Characters that need mapping (the resulting Punycode is the encoding for
// "1⁄4").
str = u"¼";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and all ASCII. Should not
// modify the string.
str = u"xn--hell-8qa";
- EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_TRUE(IDNToASCII(str, &output));
EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
@@ -2729,7 +2835,7 @@
// Should fail, due to a special case: if the label starts with "xn--", it
// should be parsed as Punycode, which must be all ASCII.
str = u"xn--hellö";
- EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_FALSE(IDNToASCII(str, &output));
output.set_length(0);
// String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
@@ -2737,7 +2843,7 @@
// which would be a valid ASCII character, U+0044, if the high byte were
// ignored.
str = u"xn--1⁄4";
- EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
+ EXPECT_FALSE(IDNToASCII(str, &output));
output.set_length(0);
}
@@ -2765,7 +2871,7 @@
TEST_P(URLCanonAsciiPercentEncodePathTest, UnescapePathCharHistogram) {
struct TestCase {
- gurl_base::StringPiece path;
+ std::string_view path;
gurl_base::HistogramBase::Count cnt;
} cases[] = {
{"/a", 0},
diff --git a/url/url_features.cc b/url/url_features.cc
index e046f60..9c9827b 100644
--- a/url/url_features.cc
+++ b/url/url_features.cc
@@ -30,6 +30,11 @@
"DontDecodeAsciiPercentEncodedURLPath",
gurl_base::FEATURE_ENABLED_BY_DEFAULT);
+// Kill switch for https://crbug.com/1416013.
+BASE_FEATURE(kStandardCompliantHostCharacters,
+ "StandardCompliantHostCharacters",
+ gurl_base::FEATURE_ENABLED_BY_DEFAULT);
+
bool IsUsingIDNA2008NonTransitional() {
// If the FeatureList isn't available yet, fall back to the feature's default
// state. This may happen during early startup, see crbug.com/1441956.
@@ -41,6 +46,28 @@
return gurl_base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
}
+bool IsUsingDontDecodeAsciiPercentEncodedURLPath() {
+ // If the FeatureList isn't available yet, fall back to the feature's default
+ // state. This may happen during early startup, see https://crbug.com/1478960.
+ if (!gurl_base::FeatureList::GetInstance()) {
+ return kDontDecodeAsciiPercentEncodedURLPath.default_state ==
+ gurl_base::FEATURE_ENABLED_BY_DEFAULT;
+ }
+
+ return gurl_base::FeatureList::IsEnabled(kDontDecodeAsciiPercentEncodedURLPath);
+}
+
+bool IsUsingStandardCompliantHostCharacters() {
+ // If the FeatureList isn't available yet, fall back to the feature's default
+ // state. This may happen during early startup, see crbug.com/1441956.
+ if (!gurl_base::FeatureList::GetInstance()) {
+ return kStandardCompliantHostCharacters.default_state ==
+ gurl_base::FEATURE_ENABLED_BY_DEFAULT;
+ }
+
+ return gurl_base::FeatureList::IsEnabled(kStandardCompliantHostCharacters);
+}
+
bool IsRecordingIDNA2008Metrics() {
return gurl_base::FeatureList::IsEnabled(kRecordIDNA2008Metrics);
}
diff --git a/url/url_features.h b/url/url_features.h
index 3dfb383..55b3234 100644
--- a/url/url_features.h
+++ b/url/url_features.h
@@ -18,6 +18,14 @@
// Returns true if Chrome is recording IDNA 2008 related metrics.
COMPONENT_EXPORT(URL) bool IsRecordingIDNA2008Metrics();
+// Returns true if kDontDecodeAsciiPercentEncodedURLPath feature is enabled.
+// See url::kDontDecodeAsciiPercentEncodedURLPath for details.
+COMPONENT_EXPORT(URL) bool IsUsingDontDecodeAsciiPercentEncodedURLPath();
+
+// Returns true if kDontDecodeAsciiPercentEncodedURLPath feature is enabled.
+// See url::kStandardCompliantHostCharacters for details.
+COMPONENT_EXPORT(URL) bool IsUsingStandardCompliantHostCharacters();
+
// Returns true if Chrome is enforcing the 4 part check for IPv4 embedded IPv6
// addresses.
COMPONENT_EXPORT(URL)
@@ -33,6 +41,12 @@
COMPONENT_EXPORT(URL)
BASE_DECLARE_FEATURE(kDontDecodeAsciiPercentEncodedURLPath);
+// When enabled, Chrome uses URL Standard compliant mode to
+// handle punctuation characters in URL host part.
+// https://crbug.com/1416013 for details.
+COMPONENT_EXPORT(URL)
+BASE_DECLARE_FEATURE(kStandardCompliantHostCharacters);
+
} // namespace url
#endif // URL_URL_FEATURES_H_
diff --git a/url/url_idna_ascii_only.cc b/url/url_idna_ascii_only.cc
index 2a4f0d3..6a864bb 100644
--- a/url/url_idna_ascii_only.cc
+++ b/url/url_idna_ascii_only.cc
@@ -6,8 +6,8 @@
#include <ostream>
#include <string>
+#include <string_view>
-#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "url/url_canon_internal.h"
@@ -16,9 +16,9 @@
// Only allow ASCII to avoid ICU dependency. Use NSString+IDN
// to convert non-ASCII URL prior to passing to API.
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
- if (gurl_base::IsStringASCII(gurl_base::StringPiece16(src, src_len))) {
- output->Append(src, src_len);
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output) {
+ if (gurl_base::IsStringASCII(src)) {
+ output->Append(src);
return true;
}
GURL_DCHECK(false) << "IDN URL support is not available.";
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index 4a3a602..7c1931c 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc
@@ -11,6 +11,7 @@
#include <ostream>
#include "polyfills/base/check_op.h"
+#include "base/numerics/safe_conversions.h"
#include <unicode/uidna.h>
#include <unicode/utypes.h>
#include "url/url_canon_icu.h"
@@ -90,7 +91,7 @@
// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
// version with StringByteSink. That way, we can avoid C wrappers and additional
// string conversion.
-bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(std::u16string_view src, CanonOutputW* output) {
GURL_DCHECK(output->length() == 0); // Output buffer is assumed empty.
UIDNA* uidna = GetUIDNA();
@@ -98,8 +99,9 @@
while (true) {
UErrorCode err = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
- int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
- output->capacity(), &info, &err);
+ int output_length = uidna_nameToASCII(
+ uidna, src.data(), gurl_base::checked_cast<int32_t>(src.size()),
+ output->data(), output->capacity(), &info, &err);
// Ignore various errors for web compatibility. The options are specified
// by the WHATWG URL Standard. See
diff --git a/url/url_parse_perftest.cc b/url/url_parse_perftest.cc
index b9b85d9..63bccdb 100644
--- a/url/url_parse_perftest.cc
+++ b/url/url_parse_perftest.cc
@@ -2,7 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "base/test/perf_time_logger.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
@@ -13,7 +14,7 @@
namespace {
TEST(URLParse, FullURL) {
- constexpr gurl_base::StringPiece kUrl =
+ constexpr std::string_view kUrl =
"http://me:pass@host/foo/bar.html;param?query=yes#ref";
url::Parsed parsed;
@@ -24,16 +25,16 @@
timer.Done();
}
-constexpr gurl_base::StringPiece kTypicalUrl1 =
+constexpr std::string_view kTypicalUrl1 =
"http://www.google.com/"
"search?q=url+parsing&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:en-US:"
"official&client=firefox-a";
-constexpr gurl_base::StringPiece kTypicalUrl2 =
+constexpr std::string_view kTypicalUrl2 =
"http://www.amazon.com/Stephen-King-Thrillers-Horror-People/dp/0766012336/"
"ref=sr_1_2/133-4144931-4505264?ie=UTF8&s=books&qid=2144880915&sr=8-2";
-constexpr gurl_base::StringPiece kTypicalUrl3 =
+constexpr std::string_view kTypicalUrl3 =
"http://store.apple.com/1-800-MY-APPLE/WebObjects/AppleStore.woa/wa/"
"RSLID?nnmm=browse&mco=578E9744&node=home/desktop/mac_pro";
diff --git a/url/url_util.cc b/url/url_util.cc
index 1fb663e..267e4b9 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -147,18 +147,6 @@
DO_NOT_REMOVE_WHITESPACE,
};
-// This template converts a given character type to the corresponding
-// StringPiece type.
-template<typename CHAR> struct CharToStringPiece {
-};
-template<> struct CharToStringPiece<char> {
- typedef gurl_base::StringPiece Piece;
-};
-template <>
-struct CharToStringPiece<char16_t> {
- typedef gurl_base::StringPiece16 Piece;
-};
-
// Given a string and a range inside the string, compares it to the given
// lower-case |compare_to| buffer.
template<typename CHAR>
@@ -168,8 +156,7 @@
if (component.is_empty())
return compare_to[0] == 0; // When component is empty, match empty scheme.
return gurl_base::EqualsCaseInsensitiveASCII(
- typename CharToStringPiece<CHAR>::Piece(&spec[component.begin],
- component.len),
+ std::basic_string_view(&spec[component.begin], component.len),
compare_to);
}
@@ -185,8 +172,7 @@
for (const SchemeWithType& scheme_with_type : schemes) {
if (gurl_base::EqualsCaseInsensitiveASCII(
- typename CharToStringPiece<CHAR>::Piece(&spec[scheme.begin],
- scheme.len),
+ std::basic_string_view(&spec[scheme.begin], scheme.len),
scheme_with_type.scheme)) {
*type = scheme_with_type.type;
return true;
@@ -735,8 +721,8 @@
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
-bool DomainIs(gurl_base::StringPiece canonical_host,
- gurl_base::StringPiece canonical_domain) {
+bool DomainIs(std::string_view canonical_host,
+ std::string_view canonical_domain) {
if (canonical_host.empty() || canonical_domain.empty())
return false;
@@ -754,7 +740,7 @@
const char* host_first_pos =
canonical_host.data() + host_len - canonical_domain.length();
- if (gurl_base::StringPiece(host_first_pos, canonical_domain.length()) !=
+ if (std::string_view(host_first_pos, canonical_domain.length()) !=
canonical_domain) {
return false;
}
@@ -771,7 +757,7 @@
return true;
}
-bool HostIsIPAddress(gurl_base::StringPiece host) {
+bool HostIsIPAddress(std::string_view host) {
STACK_UNINITIALIZED url::RawCanonOutputT<char, 128> ignored_output;
url::CanonHostInfo host_info;
url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
@@ -847,19 +833,18 @@
charset_converter, output, out_parsed);
}
-void DecodeURLEscapeSequences(const char* input,
- int length,
+void DecodeURLEscapeSequences(std::string_view input,
DecodeURLMode mode,
CanonOutputW* output) {
- if (length <= 0)
+ if (input.empty()) {
return;
+ }
STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
- size_t length_size_t = static_cast<size_t>(length);
- for (size_t i = 0; i < length_size_t; i++) {
+ for (size_t i = 0; i < input.length(); i++) {
if (input[i] == '%') {
unsigned char ch;
- if (DecodeEscaped(input, &i, length_size_t, &ch)) {
+ if (DecodeEscaped(input.data(), &i, input.length(), &ch)) {
unescaped_chars.push_back(ch);
} else {
// Invalid escape sequence, copy the percent literal.
@@ -908,16 +893,20 @@
}
}
-void EncodeURIComponent(const char* input, int length, CanonOutput* output) {
- for (int i = 0; i < length; ++i) {
- unsigned char c = static_cast<unsigned char>(input[i]);
- if (IsComponentChar(c))
+void EncodeURIComponent(std::string_view input, CanonOutput* output) {
+ for (unsigned char c : input) {
+ if (IsComponentChar(c)) {
output->push_back(c);
- else
+ } else {
AppendEscapedChar(c, output);
+ }
}
}
+bool IsURIComponentChar(char c) {
+ return IsComponentChar(c);
+}
+
bool CompareSchemeComponent(const char* spec,
const Component& component,
const char* compare_to) {
@@ -930,4 +919,16 @@
return DoCompareSchemeComponent(spec, component, compare_to);
}
+bool HasInvalidURLEscapeSequences(std::string_view input) {
+ for (size_t i = 0; i < input.size(); i++) {
+ if (input[i] == '%') {
+ unsigned char ch;
+ if (!DecodeEscaped(input.data(), &i, input.size(), &ch)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
} // namespace url
diff --git a/url/url_util.h b/url/url_util.h
index b489362..4254426 100644
--- a/url/url_util.h
+++ b/url/url_util.h
@@ -7,10 +7,10 @@
#include <memory>
#include <string>
+#include <string_view>
#include <vector>
#include "polyfills/base/component_export.h"
-#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
@@ -203,12 +203,12 @@
// input domain should match host canonicalization rules. i.e. it should be
// lowercase except for escape chars.
COMPONENT_EXPORT(URL)
-bool DomainIs(gurl_base::StringPiece canonical_host,
- gurl_base::StringPiece canonical_domain);
+bool DomainIs(std::string_view canonical_host,
+ std::string_view canonical_domain);
// Returns true if the hostname is an IP address. Note: this function isn't very
// cheap, as it must re-parse the host to verify.
-COMPONENT_EXPORT(URL) bool HostIsIPAddress(gurl_base::StringPiece host);
+COMPONENT_EXPORT(URL) bool HostIsIPAddress(std::string_view host);
// URL library wrappers --------------------------------------------------------
@@ -299,15 +299,29 @@
// Unescapes the given string using URL escaping rules.
COMPONENT_EXPORT(URL)
-void DecodeURLEscapeSequences(const char* input,
- int length,
+void DecodeURLEscapeSequences(std::string_view input,
DecodeURLMode mode,
CanonOutputW* output);
// Escapes the given string as defined by the JS method encodeURIComponent. See
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
COMPONENT_EXPORT(URL)
-void EncodeURIComponent(const char* input, int length, CanonOutput* output);
+void EncodeURIComponent(std::string_view input, CanonOutput* output);
+
+// Returns true if `c` is a character that does not require escaping in
+// encodeURIComponent.
+// TODO(crbug.com/1481056): Remove this when event-level reportEvent is removed
+// (if it is still this function's only consumer).
+COMPONENT_EXPORT(URL)
+bool IsURIComponentChar(char c);
+
+// Checks an arbitrary string for invalid escape sequences.
+//
+// A valid percent-encoding is '%' followed by exactly two hex-digits. This
+// function returns true if an occurrence of '%' is found and followed by
+// anything other than two hex-digits.
+COMPONENT_EXPORT(URL)
+bool HasInvalidURLEscapeSequences(std::string_view input);
} // namespace url
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index b052c3d..1435d3b 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -6,7 +6,8 @@
#include <stddef.h>
-#include "base/strings/string_piece.h"
+#include <string_view>
+
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest-message.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -259,14 +260,12 @@
for (size_t i = 0; i < std::size(decode_cases); i++) {
const char* input = decode_cases[i].input;
RawCanonOutputT<char16_t> output;
- DecodeURLEscapeSequences(input, strlen(input),
- DecodeURLMode::kUTF8OrIsomorphic, &output);
+ DecodeURLEscapeSequences(input, DecodeURLMode::kUTF8OrIsomorphic, &output);
EXPECT_EQ(decode_cases[i].output, gurl_base::UTF16ToUTF8(std::u16string(
output.data(), output.length())));
RawCanonOutputT<char16_t> output_utf8;
- DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
- &output_utf8);
+ DecodeURLEscapeSequences(input, DecodeURLMode::kUTF8, &output_utf8);
EXPECT_EQ(decode_cases[i].output,
gurl_base::UTF16ToUTF8(
std::u16string(output_utf8.data(), output_utf8.length())));
@@ -275,8 +274,7 @@
// Our decode should decode %00
const char zero_input[] = "%00";
RawCanonOutputT<char16_t> zero_output;
- DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8,
- &zero_output);
+ DecodeURLEscapeSequences(zero_input, DecodeURLMode::kUTF8, &zero_output);
EXPECT_NE("%00", gurl_base::UTF16ToUTF8(std::u16string(zero_output.data(),
zero_output.length())));
@@ -301,14 +299,13 @@
for (const auto& test : utf8_decode_cases) {
const char* input = test.input;
RawCanonOutputT<char16_t> output_iso;
- DecodeURLEscapeSequences(input, strlen(input),
- DecodeURLMode::kUTF8OrIsomorphic, &output_iso);
+ DecodeURLEscapeSequences(input, DecodeURLMode::kUTF8OrIsomorphic,
+ &output_iso);
EXPECT_EQ(std::u16string(test.expected_iso.data()),
std::u16string(output_iso.data(), output_iso.length()));
RawCanonOutputT<char16_t> output_utf8;
- DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
- &output_utf8);
+ DecodeURLEscapeSequences(input, DecodeURLMode::kUTF8, &output_utf8);
EXPECT_EQ(std::u16string(test.expected_utf8.data()),
std::u16string(output_utf8.data(), output_utf8.length()));
}
@@ -341,7 +338,7 @@
for (size_t i = 0; i < std::size(encode_cases); i++) {
const char* input = encode_cases[i].input;
RawCanonOutputT<char> buffer;
- EncodeURIComponent(input, strlen(input), &buffer);
+ EncodeURIComponent(input, &buffer);
std::string output(buffer.data(), buffer.length());
EXPECT_EQ(encode_cases[i].output, output);
}
@@ -588,7 +585,7 @@
}
namespace {
-absl::optional<std::string> CanonicalizeSpec(gurl_base::StringPiece spec,
+absl::optional<std::string> CanonicalizeSpec(std::string_view spec,
bool trim_path_end) {
std::string canonicalized;
StdStringCanonOutput output(&canonicalized);
@@ -635,4 +632,87 @@
}
}
+TEST_F(URLUtilTest, TestHasInvalidURLEscapeSequences) {
+ struct TestCase {
+ const char* input;
+ bool is_invalid;
+ } cases[] = {
+ // Edge cases.
+ {"", false},
+ {"%", true},
+
+ // Single regular chars with no escaping are valid.
+ {"a", false},
+ {"g", false},
+ {"A", false},
+ {"G", false},
+ {":", false},
+ {"]", false},
+ {"\x00", false}, // ASCII 'NUL' char
+ {"\x01", false}, // ASCII 'SOH' char
+ {"\xC2\xA3", false}, // UTF-8 encoded '£'.
+
+ // Longer strings without escaping are valid.
+ {"Hello world", false},
+ {"Here: [%25] <-- a percent-encoded percent character.", false},
+
+ // Valid %-escaped sequences ('%' followed by two hex digits).
+ {"%00", false},
+ {"%20", false},
+ {"%02", false},
+ {"%ff", false},
+ {"%FF", false},
+ {"%0a", false},
+ {"%0A", false},
+ {"abc%FF", false},
+ {"%FFabc", false},
+ {"abc%FFabc", false},
+ {"hello %FF world", false},
+ {"%20hello%20world", false},
+ {"%25", false},
+ {"%25%25", false},
+ {"%250", false},
+ {"%259", false},
+ {"%25A", false},
+ {"%25F", false},
+ {"%0a:", false},
+
+ // '%' followed by a single character is never a valid sequence.
+ {"%%", true},
+ {"%2", true},
+ {"%a", true},
+ {"%A", true},
+ {"%g", true},
+ {"%G", true},
+ {"%:", true},
+ {"%[", true},
+ {"%F", true},
+ {"%\xC2\xA3", true}, //% followed by UTF-8 encoded '£'.
+
+ // String ends on a potential escape sequence but without two hex-digits
+ // is invalid.
+ {"abc%", true},
+ {"abc%%", true},
+ {"abc%%%", true},
+ {"abc%a", true},
+
+ // One hex and one non-hex digit is invalid.
+ {"%a:", true},
+ {"%:a", true},
+ {"%::", true},
+ {"%ag", true},
+ {"%ga", true},
+ {"%-1", true},
+ {"%1-", true},
+ {"%0\xC2\xA3", true}, // %0£.
+ };
+
+ for (TestCase test_case : cases) {
+ const char* input = test_case.input;
+ bool result = HasInvalidURLEscapeSequences(input);
+ EXPECT_EQ(test_case.is_invalid, result)
+ << "Invalid result for '" << input << "'";
+ }
+}
+
} // namespace url