Update googleurl from upstream
The revision used is 9418d10b236dffce823870895b0e0b672c37fd86, from
Mon Feb 1 19:47:01 2021 +0000.
diff --git a/AUTHORS b/AUTHORS
index a619990..ce38168 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -54,6 +54,7 @@
Alexander Zhirov <ciberst@gmail.com>
Alexandre Abreu <wiss1976@gmail.com>
Alexandru Chiculita <achicu@adobe.com>
+Alexey Knyazev <lexa.knyazev@gmail.com>
Alexey Korepanov <alexkorep@gmail.com>
Alexey Kuts <kruntuid@gmail.com>
Alexey Kuzmin <alex.s.kuzmin@gmail.com>
@@ -89,6 +90,7 @@
Andrew Hung <andrhung@amazon.com>
Andrew Jorgensen <ajorgens@amazon.com>
Andrew MacPherson <andrew.macpherson@soundtrap.com>
+Andrew Nicols <andrewrn@gmail.com>
Andrew Tulloch <andrew@tullo.ch>
Andriy Rysin <arysin@gmail.com>
Anish Patankar <anish.p@samsung.com>
@@ -327,6 +329,7 @@
Frédéric Jacob <frederic.jacob.78@gmail.com>
Frédéric Wang <fred.wang@free.fr>
Fu Junwei <junwei.fu@intel.com>
+Gabriel Campana <gabriel.campana@ledger.fr>
Gabor Rapcsanyi <g.rapcsanyi@samsung.com>
Gaetano Mendola <mendola@gmail.com>
Gajendra N <gajendra.n@samsung.com>
@@ -441,6 +444,7 @@
James Vega <vega.james@gmail.com>
James Wei <james.wei@intel.com>
James Willcox <jwillcox@litl.com>
+Jan Grulich <grulja@gmail.com>
Jan Rucka <ruckajan10@gmail.com>
Jan Sauer <jan@jansauer.de>
Janusz Majnert <jmajnert@gmail.com>
@@ -449,6 +453,7 @@
Jared Sohn <jared.sohn@gmail.com>
Jared Wein <weinjared@gmail.com>
Jari Karppanen <jkarp@amazon.com>
+Jason Gronn <jasontopia03@gmail.com>
Jay Oster <jay@kodewerx.org>
Jay Soffian <jaysoffian@gmail.com>
Jeado Ko <haibane84@gmail.com>
@@ -461,6 +466,7 @@
Jeremy Spiegel <jeremysspiegel@gmail.com>
Jeroen Van den Berghe <vandenberghe.jeroen@gmail.com>
Jerry Lin <wahahab11@gmail.com>
+Jerry Zhang <zhj8407@gmail.com>
Jesper Storm Bache <jsbache@gmail.com>
Jesse Miller <jesse@jmiller.biz>
Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com>
@@ -492,6 +498,7 @@
Jinsong Fan <fanjinsong@sogou-inc.com>
Jinsong Fan <jinsong.van@gmail.com>
Jinwoo Song <jinwoo7.song@samsung.com>
+Jinyoung Hur <hur.ims@navercorp.com>
Jinyoung Hur <hurims@gmail.com>
Jitendra Kumar Sahoo <jitendra.ks@samsung.com>
Joachim Bauch <jbauch@webrtc.org>
@@ -500,6 +507,7 @@
Joe Knoll <joe.knoll@workday.com>
Joe Thomas <mhx348@motorola.com>
Joel Stanley <joel@jms.id.au>
+Joey Jiao <joeyjiao0810@gmail.com>
Johannes Rudolph <johannes.rudolph@googlemail.com>
John Kleinschmidt <kleinschmidtorama@gmail.com>
John Yani <vanuan@gmail.com>
@@ -556,6 +564,7 @@
Karan Thakkar <karanjthakkar@gmail.com>
Kartikey Bhatt <kartikey@amazon.com>
Kaspar Brand <googlecontrib@velox.ch>
+Kaushalendra Mishra <k.mishra@samsung.com>
Kaustubh Atrawalkar <kaustubh.a@samsung.com>
Kaustubh Atrawalkar <kaustubh.ra@gmail.com>
Ke He <ke.he@intel.com>
@@ -611,6 +620,7 @@
Le Hoang Quyen <le.hoang.q@gmail.com>
Legend Lee <guanxian.li@intel.com>
Leith Bade <leith@leithalweapon.geek.nz>
+Lei Gao <leigao@huawei.com>
Lei Li <lli.kernel.kvm@gmail.com>
Lenny Khazan <lenny.khazan@gmail.com>
Leo Wolf <jclw@ymail.com>
@@ -660,6 +670,7 @@
Mariusz Mlynski <marius.mlynski@gmail.com>
Mark Hahnenberg <mhahnenb@andrew.cmu.edu>
Mark Seaborn <mrs@mythic-beasts.com>
+Mark Winter <wintermarkedward@gmail.com>
Martijn Croonen <martijn@martijnc.be>
Martin Bednorz <m.s.bednorz@gmail.com>
Martin Persson <mnpn03@gmail.com>
@@ -696,6 +707,7 @@
Michael Constant <mconst@gmail.com>
Michael Forney <mforney@mforney.org>
Michael Gilbert <floppymaster@gmail.com>
+Michael Kolomeytsev <michael.kolomeytsev@gmail.com>
Michael Lopez <lopes92290@gmail.com>
Michael Morrison <codebythepound@gmail.com>
Michael Müller <michael@fds-team.de>
@@ -868,6 +880,7 @@
Robert O'Callahan <rocallahan@gmail.com>
Robert Nagy <robert.nagy@gmail.com>
Robert Sesek <rsesek@bluestatic.org>
+Roee Kasher <roee91@gmail.com>
Roger Zanoni <rogerzanoni@gmail.com>
Roland Takacs <rtakacs.u-szeged@partner.samsung.com>
Romain Pokrzywka <romain.pokrzywka@gmail.com>
@@ -965,10 +978,12 @@
Siddharth Shankar <funkysidd@gmail.com>
Simeon Kuran <simeon.kuran@gmail.com>
Simon Arlott <simon.arlott@gmail.com>
+Simon Jackson <simon.jackson@sonocent.com>
Simon La Macchia <smacchia@amazon.com>
Siva Kumar Gunturi <siva.gunturi@samsung.com>
Sohan Jyoti Ghosh <sohan.jyoti@huawei.com>
Sohan Jyoti Ghosh <sohan.jyoti@samsung.com>
+Song Fangzhen <songfangzhen@bytedance.com>
Song YeWen <ffmpeg@gmail.com>
Sooho Park <sooho1000@gmail.com>
Soojung Choi <crystal2840@gmail.com>
@@ -1077,6 +1092,7 @@
Wojciech Bielawski <wojciech.bielawski@gmail.com>
Wanming Lin <wanming.lin@intel.com>
Wei Li <wei.c.li@intel.com>
+Wen Fan <fanwen1@huawei.com>
Wenxiang Qian <leonwxqian@gmail.com>
WenSheng He <wensheng.he@samsung.com>
Wesley Lancel <wesleylancel@gmail.com>
diff --git a/base/BUILD b/base/BUILD
index 2ab77e8..63787b7 100644
--- a/base/BUILD
+++ b/base/BUILD
@@ -9,6 +9,7 @@
hdrs = [
"compiler_specific.h",
"containers/checked_iterators.h",
+ "containers/contains.h",
"containers/contiguous_iterator.h",
"containers/span.h",
"containers/util.h",
@@ -16,6 +17,7 @@
"functional/identity.h",
"functional/invoke.h",
"functional/not_fn.h",
+ "i18n/uchar.h",
"macros.h",
"no_destructor.h",
"optional.h",
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index fe3d499..fa961b0 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -65,7 +65,7 @@
// To provide the complementary behavior (prevent the annotated function from
// being omitted) look at NOINLINE. Also note that this doesn't prevent code
// folding of multiple identical caller functions into a single signature. To
-// prevent code folding, see gurl_base::debug::Alias.
+// prevent code folding, see NO_CODE_FOLDING() in base/debug/alias.h.
// Use like:
// void NOT_TAIL_CALLED FooBar();
#if defined(__clang__) && __has_attribute(not_tail_called)
diff --git a/base/containers/contiguous_iterator.h b/base/containers/contiguous_iterator.h
index a1c1f9b..48b2755 100644
--- a/base/containers/contiguous_iterator.h
+++ b/base/containers/contiguous_iterator.h
@@ -26,13 +26,19 @@
template <typename T>
struct IsPointer : std::is_pointer<T> {};
+template <typename T, typename StringT = std::basic_string<iter_value_t<T>>>
+struct IsStringIterImpl
+ : disjunction<std::is_same<T, typename StringT::const_iterator>,
+ std::is_same<T, typename StringT::iterator>> {};
+
// An iterator to std::basic_string is contiguous.
// Reference: https://wg21.link/basic.string.general#2
-template <typename T, typename StringT = std::basic_string<iter_value_t<T>>>
+//
+// Note: Requires indirection via `IsStringIterImpl` to avoid triggering a
+// `static_assert(is_trivial_v<value_type>)` inside libc++'s std::basic_string.
+template <typename T>
struct IsStringIter
- : conjunction<std::is_trivial<iter_value_t<T>>,
- disjunction<std::is_same<T, typename StringT::const_iterator>,
- std::is_same<T, typename StringT::iterator>>> {};
+ : conjunction<std::is_trivial<iter_value_t<T>>, IsStringIterImpl<T>> {};
// An iterator to std::array is contiguous.
// Reference: https://wg21.link/array.overview#1
diff --git a/base/containers/util.h b/base/containers/util.h
index 14f012a..7a65b6a 100644
--- a/base/containers/util.h
+++ b/base/containers/util.h
@@ -12,7 +12,7 @@
// TODO(crbug.com/817982): What we really need is for checked_math.h to be
// able to do checked arithmetic on pointers.
template <typename T>
-static inline uintptr_t get_uintptr(const T* t) {
+inline uintptr_t get_uintptr(const T* t) {
return reinterpret_cast<uintptr_t>(t);
}
diff --git a/base/macros.h b/base/macros.h
index c67bdbd..19d15ca 100644
--- a/base/macros.h
+++ b/base/macros.h
@@ -14,21 +14,20 @@
// Use explicit deletions instead. See the section on copyability/movability in
// //styleguide/c++/c++-dos-and-donts.md for more information.
-// Put this in the declarations for a class to be uncopyable.
+// DEPRECATED: See above. Makes a class uncopyable.
#define DISALLOW_COPY(TypeName) \
TypeName(const TypeName&) = delete
-// Put this in the declarations for a class to be unassignable.
+// DEPRECATED: See above. Makes a class unassignable.
#define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
-// Put this in the declarations for a class to be uncopyable and unassignable.
+// DEPRECATED: See above. Makes a class uncopyable and unassignable.
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
DISALLOW_COPY(TypeName); \
DISALLOW_ASSIGN(TypeName)
-// A macro to disallow all the implicit constructors, namely the
+// DEPRECATED: See above. Disallow all implicit constructors, namely the
// default constructor, copy constructor and operator= functions.
-// This is especially useful for classes containing only static methods.
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName() = delete; \
DISALLOW_COPY_AND_ASSIGN(TypeName)
diff --git a/base/ranges/algorithm.h b/base/ranges/algorithm.h
index 646b733..e6432f8 100644
--- a/base/ranges/algorithm.h
+++ b/base/ranges/algorithm.h
@@ -419,11 +419,12 @@
InputIterator last,
const T& value,
Proj proj = {}) {
- // Note: In order to be able to apply `proj` to each element in [first, last)
- // we are dispatching to std::find_if instead of std::find.
- return std::find_if(first, last, [&proj, &value](auto&& lhs) {
- return gurl_base::invoke(proj, std::forward<decltype(lhs)>(lhs)) == value;
- });
+ for (; first != last; ++first) {
+ if (gurl_base::invoke(proj, *first) == value)
+ break;
+ }
+
+ return first;
}
// Let `E(i)` be `bool(invoke(proj, *i) == value)`.
@@ -4299,7 +4300,7 @@
//
// Returns: `last`.
//
-// Complexity: At most `3 log(last - first)` comparisons and twice as many
+// Complexity: At most `3 * (last - first)` comparisons and twice as many
// projections.
//
// Reference: https://wg21.link/make.heap#:~:text=ranges::make_heap(I
@@ -4323,7 +4324,7 @@
//
// Returns: `end(range)`.
//
-// Complexity: At most `3 log(size(range))` comparisons and twice as many
+// Complexity: At most `3 * size(range)` comparisons and twice as many
// projections.
//
// Reference: https://wg21.link/make.heap#:~:text=ranges::make_heap(R
diff --git a/base/stl_util.h b/base/stl_util.h
index f3d86ef..29f200e 100644
--- a/base/stl_util.h
+++ b/base/stl_util.h
@@ -24,6 +24,7 @@
#include <vector>
#include "polyfills/base/check.h"
+#include "base/containers/contains.h"
#include "base/optional.h"
#include "base/ranges/algorithm.h"
#include "base/template_util.h"
@@ -51,38 +52,6 @@
std::is_same<typename std::iterator_traits<Iter>::iterator_category,
std::random_access_iterator_tag>::value;
-// Utility type traits used for specializing gurl_base::Contains() below.
-template <typename Container, typename Element, typename = void>
-struct HasFindWithNpos : std::false_type {};
-
-template <typename Container, typename Element>
-struct HasFindWithNpos<
- Container,
- Element,
- void_t<decltype(std::declval<const Container&>().find(
- std::declval<const Element&>()) != Container::npos)>>
- : std::true_type {};
-
-template <typename Container, typename Element, typename = void>
-struct HasFindWithEnd : std::false_type {};
-
-template <typename Container, typename Element>
-struct HasFindWithEnd<Container,
- Element,
- void_t<decltype(std::declval<const Container&>().find(
- std::declval<const Element&>()) !=
- std::declval<const Container&>().end())>>
- : std::true_type {};
-
-template <typename Container, typename Element, typename = void>
-struct HasContains : std::false_type {};
-
-template <typename Container, typename Element>
-struct HasContains<Container,
- Element,
- void_t<decltype(std::declval<const Container&>().contains(
- std::declval<const Element&>()))>> : std::true_type {};
-
} // namespace internal
// C++14 implementation of C++17's std::size():
@@ -219,51 +188,6 @@
return std::count(container.begin(), container.end(), val);
}
-// General purpose implementation to check if |container| contains |value|.
-template <typename Container,
- typename Value,
- std::enable_if_t<
- !internal::HasFindWithNpos<Container, Value>::value &&
- !internal::HasFindWithEnd<Container, Value>::value &&
- !internal::HasContains<Container, Value>::value>* = nullptr>
-bool Contains(const Container& container, const Value& value) {
- using std::begin;
- using std::end;
- return std::find(begin(container), end(container), value) != end(container);
-}
-
-// Specialized Contains() implementation for when |container| has a find()
-// member function and a static npos member, but no contains() member function.
-template <typename Container,
- typename Value,
- std::enable_if_t<internal::HasFindWithNpos<Container, Value>::value &&
- !internal::HasContains<Container, Value>::value>* =
- nullptr>
-bool Contains(const Container& container, const Value& value) {
- return container.find(value) != Container::npos;
-}
-
-// Specialized Contains() implementation for when |container| has a find()
-// and end() member function, but no contains() member function.
-template <typename Container,
- typename Value,
- std::enable_if_t<internal::HasFindWithEnd<Container, Value>::value &&
- !internal::HasContains<Container, Value>::value>* =
- nullptr>
-bool Contains(const Container& container, const Value& value) {
- return container.find(value) != container.end();
-}
-
-// Specialized Contains() implementation for when |container| has a contains()
-// member function.
-template <
- typename Container,
- typename Value,
- std::enable_if_t<internal::HasContains<Container, Value>::value>* = nullptr>
-bool Contains(const Container& container, const Value& value) {
- return container.contains(value);
-}
-
// O(1) implementation of const casting an iterator for any sequence,
// associative or unordered associative container in the STL.
//
diff --git a/base/strings/BUILD b/base/strings/BUILD
index c76b35a..65f3293 100644
--- a/base/strings/BUILD
+++ b/base/strings/BUILD
@@ -12,7 +12,7 @@
"string_util_constants.cc",
"utf_string_conversion_utils.cc",
"utf_string_conversions.cc",
- ] + build_config.strings_srcs,
+ ],
hdrs = [
"char_traits.h",
"string16.h",
diff --git a/base/strings/char_traits.h b/base/strings/char_traits.h
index 0fe9f26..13f5833 100644
--- a/base/strings/char_traits.h
+++ b/base/strings/char_traits.h
@@ -7,6 +7,8 @@
#include <stddef.h>
+#include <string>
+
#include "base/compiler_specific.h"
namespace gurl_base {
@@ -33,10 +35,14 @@
constexpr int CharTraits<T>::compare(const T* s1,
const T* s2,
size_t n) noexcept {
+ // Comparison with operator < fails, because of signed/unsigned
+ // mismatch, https://crbug.com/941696
+ // std::char_traits<T>::lt is guaranteed to be constexpr in C++14:
+ // https://timsong-cpp.github.io/cppwp/n4140/char.traits.specializations#char
for (; n; --n, ++s1, ++s2) {
- if (*s1 < *s2)
+ if (std::char_traits<T>::lt(*s1, *s2))
return -1;
- if (*s1 > *s2)
+ if (std::char_traits<T>::lt(*s2, *s1))
return 1;
}
return 0;
@@ -50,42 +56,35 @@
return i;
}
-// char specialization of CharTraits that can use clang's constexpr instrinsics,
-// where available.
+// char and wchar_t specialization of CharTraits that can use clang's constexpr
+// instrinsics, where available.
+#if HAS_FEATURE(cxx_constexpr_string_builtins)
template <>
struct CharTraits<char> {
static constexpr int compare(const char* s1,
const char* s2,
- size_t n) noexcept;
- static constexpr size_t length(const char* s) noexcept;
+ size_t n) noexcept {
+ return __builtin_memcmp(s1, s2, n);
+ }
+
+ static constexpr size_t length(const char* s) noexcept {
+ return __builtin_strlen(s);
+ }
};
-constexpr int CharTraits<char>::compare(const char* s1,
- const char* s2,
- size_t n) noexcept {
-#if HAS_FEATURE(cxx_constexpr_string_builtins)
- return __builtin_memcmp(s1, s2, n);
-#else
- for (; n; --n, ++s1, ++s2) {
- if (*s1 < *s2)
- return -1;
- if (*s1 > *s2)
- return 1;
+template <>
+struct CharTraits<wchar_t> {
+ static constexpr int compare(const wchar_t* s1,
+ const wchar_t* s2,
+ size_t n) noexcept {
+ return __builtin_wmemcmp(s1, s2, n);
}
- return 0;
-#endif
-}
-constexpr size_t CharTraits<char>::length(const char* s) noexcept {
-#if defined(__clang__)
- return __builtin_strlen(s);
-#else
- size_t i = 0;
- for (; *s; ++s)
- ++i;
- return i;
+ static constexpr size_t length(const wchar_t* s) noexcept {
+ return __builtin_wcslen(s);
+ }
+};
#endif
-}
} // namespace base
diff --git a/base/strings/string16.cc b/base/strings/string16.cc
deleted file mode 100644
index 426d5b6..0000000
--- a/base/strings/string16.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/strings/string16.h"
-
-#if defined(WCHAR_T_IS_UTF16) && !defined(_AIX)
-
-#error This file should not be used on 2-byte wchar_t systems
-// If this winds up being needed on 2-byte wchar_t systems, either the
-// definitions below can be used, or the host system's wide character
-// functions like wmemcmp can be wrapped.
-
-#elif defined(WCHAR_T_IS_UTF32)
-
-#include <string.h>
-
-#include <ostream>
-
-#include "base/strings/string_piece.h"
-
-namespace gurl_base {
-
-int c16memcmp(const char16* s1, const char16* s2, size_t n) {
- // We cannot call memcmp because that changes the semantics.
- while (n-- > 0) {
- if (*s1 != *s2) {
- // We cannot use (*s1 - *s2) because char16 is unsigned.
- return ((*s1 < *s2) ? -1 : 1);
- }
- ++s1;
- ++s2;
- }
- return 0;
-}
-
-size_t c16len(const char16* s) {
- const char16 *s_orig = s;
- while (*s) {
- ++s;
- }
- return s - s_orig;
-}
-
-const char16* c16memchr(const char16* s, char16 c, size_t n) {
- while (n-- > 0) {
- if (*s == c) {
- return s;
- }
- ++s;
- }
- return nullptr;
-}
-
-char16* c16memmove(char16* s1, const char16* s2, size_t n) {
- return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
-}
-
-char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
- return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
-}
-
-char16* c16memset(char16* s, char16 c, size_t n) {
- char16 *s_orig = s;
- while (n-- > 0) {
- *s = c;
- ++s;
- }
- return s_orig;
-}
-
-namespace string16_internals {
-
-std::ostream& operator<<(std::ostream& out, const string16& str) {
- return out << gurl_base::StringPiece16(str);
-}
-
-void PrintTo(const string16& str, std::ostream* out) {
- *out << str;
-}
-
-} // namespace string16_internals
-
-} // namespace base
-
-template class std::
- basic_string<gurl_base::char16, gurl_base::string16_internals::string16_char_traits>;
-
-#endif // WCHAR_T_IS_UTF32
diff --git a/base/strings/string16.h b/base/strings/string16.h
index f17a57f..dc3ddc7 100644
--- a/base/strings/string16.h
+++ b/base/strings/string16.h
@@ -6,34 +6,18 @@
#define BASE_STRINGS_STRING16_H_
// WHAT:
-// A version of std::basic_string that provides 2-byte characters even when
-// wchar_t is not implemented as a 2-byte type. You can access this class as
-// string16. We also define char16, which string16 is based upon.
+// Type aliases for string and character types supporting UTF-16 data. Prior to
+// C++11 there was no standard library solution for this, which is why wstring
+// was used where possible (i.e. where wchar_t holds UTF-16 encoded data).
//
-// WHY:
-// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
-// data. Plenty of existing code operates on strings encoded as UTF-16.
-//
-// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
-// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
-// at run time, because it calls some functions (like wcslen) that come from
-// the system's native C library -- which was built with a 4-byte wchar_t!
-// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
-// entirely improper on those systems where the encoding of wchar_t is defined
-// as UTF-32.
-//
-// Here, we define string16, which is similar to std::wstring but replaces all
-// libc functions with custom, 2-byte-char compatible routines. It is capable
-// of carrying UTF-16-encoded data.
+// In C++11 we gained std::u16string, which is a cross-platform solution for
+// UTF-16 strings. This is now the string16 type where ever wchar_t does not
+// hold UTF16 data (i.e. commonly non-Windows platforms). Eventually this should
+// be used everywhere, at which point this type alias and this file should be
+// removed. https://crbug.com/911896 tracks the migration effort.
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#include <functional>
#include <string>
-#include "polyfills/base/base_export.h"
#include "build/build_config.h"
#if defined(WCHAR_T_IS_UTF16)
@@ -42,188 +26,24 @@
// a literal string. This indirection allows for an easier migration of
// gurl_base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one
// character change to the macro will be necessary.
-// This macro does not exist when WCHAR_T_IS_UTF32, as it is currently not
-// possible to create a char array form a literal in this case.
// TODO(https://crbug.com/911896): Remove this macro once gurl_base::char16 is
// char16_t on all platforms.
#define STRING16_LITERAL(x) L##x
namespace gurl_base {
-
-typedef wchar_t char16;
-typedef std::wstring string16;
-
+using char16 = wchar_t;
+using string16 = std::wstring;
} // namespace base
-#elif defined(WCHAR_T_IS_UTF32)
+#else
-#include <wchar.h> // for mbstate_t
+#define STRING16_LITERAL(x) u##x
namespace gurl_base {
-
-typedef uint16_t char16;
-
-// char16 versions of the functions required by string16_char_traits; these
-// are based on the wide character functions of similar names ("w" or "wcs"
-// instead of "c16").
-BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
-BASE_EXPORT size_t c16len(const char16* s);
-BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n);
-BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n);
-BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
-BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
-
-// This namespace contains the implementation of gurl_base::string16 along with
-// things that need to be found via argument-dependent lookup from a
-// gurl_base::string16.
-namespace string16_internals {
-
-struct string16_char_traits {
- typedef char16 char_type;
- typedef int int_type;
-
- // int_type needs to be able to hold each possible value of char_type, and in
- // addition, the distinct value of eof().
- static_assert(sizeof(int_type) > sizeof(char_type),
- "int must be larger than 16 bits wide");
-
- typedef std::streamoff off_type;
- typedef mbstate_t state_type;
- typedef std::fpos<state_type> pos_type;
-
- static void assign(char_type& c1, const char_type& c2) {
- c1 = c2;
- }
-
- static bool eq(const char_type& c1, const char_type& c2) {
- return c1 == c2;
- }
- static bool lt(const char_type& c1, const char_type& c2) {
- return c1 < c2;
- }
-
- static int compare(const char_type* s1, const char_type* s2, size_t n) {
- return c16memcmp(s1, s2, n);
- }
-
- static size_t length(const char_type* s) {
- return c16len(s);
- }
-
- static const char_type* find(const char_type* s, size_t n,
- const char_type& a) {
- return c16memchr(s, a, n);
- }
-
- static char_type* move(char_type* s1, const char_type* s2, size_t n) {
- return c16memmove(s1, s2, n);
- }
-
- static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
- return c16memcpy(s1, s2, n);
- }
-
- static char_type* assign(char_type* s, size_t n, char_type a) {
- return c16memset(s, a, n);
- }
-
- static int_type not_eof(const int_type& c) {
- return eq_int_type(c, eof()) ? 0 : c;
- }
-
- static char_type to_char_type(const int_type& c) {
- return char_type(c);
- }
-
- static int_type to_int_type(const char_type& c) {
- return int_type(c);
- }
-
- static bool eq_int_type(const int_type& c1, const int_type& c2) {
- return c1 == c2;
- }
-
- static int_type eof() {
- return static_cast<int_type>(EOF);
- }
-};
-
-} // namespace string16_internals
-
-typedef std::basic_string<char16,
- gurl_base::string16_internals::string16_char_traits>
- string16;
-
-namespace string16_internals {
-
-BASE_EXPORT extern std::ostream& operator<<(std::ostream& out,
- const string16& str);
-
-// This is required by googletest to print a readable output on test failures.
-BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out);
-
-} // namespace string16_internals
-
+using char16 = char16_t;
+using string16 = std::u16string;
} // namespace base
-// The string class will be explicitly instantiated only once, in string16.cc.
-//
-// std::basic_string<> in GNU libstdc++ contains a static data member,
-// _S_empty_rep_storage, to represent empty strings. When an operation such
-// as assignment or destruction is performed on a string, causing its existing
-// data member to be invalidated, it must not be freed if this static data
-// member is being used. Otherwise, it counts as an attempt to free static
-// (and not allocated) data, which is a memory error.
-//
-// Generally, due to C++ template magic, _S_empty_rep_storage will be marked
-// as a coalesced symbol, meaning that the linker will combine multiple
-// instances into a single one when generating output.
-//
-// If a string class is used by multiple shared libraries, a problem occurs.
-// Each library will get its own copy of _S_empty_rep_storage. When strings
-// are passed across a library boundary for alteration or destruction, memory
-// errors will result. GNU libstdc++ contains a configuration option,
-// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which
-// disables the static data member optimization, but it's a good optimization
-// and non-STL code is generally at the mercy of the system's STL
-// configuration. Fully-dynamic strings are not the default for GNU libstdc++
-// libstdc++ itself or for the libstdc++ installations on the systems we care
-// about, such as Mac OS X and relevant flavors of Linux.
-//
-// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 .
-//
-// To avoid problems, string classes need to be explicitly instantiated only
-// once, in exactly one library. All other string users see it via an "extern"
-// declaration. This is precisely how GNU libstdc++ handles
-// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring).
-//
-// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2),
-// in which the linker does not fully coalesce symbols when dead code
-// stripping is enabled. This bug causes the memory errors described above
-// to occur even when a std::basic_string<> does not cross shared library
-// boundaries, such as in statically-linked executables.
-//
-// TODO(mark): File this bug with Apple and update this note with a bug number.
-
-extern template class BASE_EXPORT
- std::basic_string<gurl_base::char16,
- gurl_base::string16_internals::string16_char_traits>;
-
-// Specialize std::hash for gurl_base::string16. Although the style guide forbids
-// this in general, it is necessary for consistency with WCHAR_T_IS_UTF16
-// platforms, where gurl_base::string16 is a type alias for std::wstring.
-namespace std {
-template <>
-struct hash<gurl_base::string16> {
- std::size_t operator()(const gurl_base::string16& s) const {
- std::size_t result = 0;
- for (gurl_base::char16 c : s)
- result = (result * 131) + c;
- return result;
- }
-};
-} // namespace std
-
-#endif // WCHAR_T_IS_UTF32
+#endif // WCHAR_T_IS_UTF16
#endif // BASE_STRINGS_STRING16_H_
diff --git a/base/strings/string16_unittest.cc b/base/strings/string16_unittest.cc
index a9aecef..9cdb075 100644
--- a/base/strings/string16_unittest.cc
+++ b/base/strings/string16_unittest.cc
@@ -2,74 +2,22 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include <sstream>
-#include <unordered_set>
-
#include "base/strings/string16.h"
-#include "base/strings/utf_string_conversions.h"
-#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace gurl_base {
-#if defined(WCHAR_T_IS_UTF16)
+// Ensure that STRING16_LITERAL can be used to instantiate constants of type
+// char16 and char16[], respectively.
TEST(String16Test, String16Literal) {
- static constexpr char16 kHelloWorld[] = STRING16_LITERAL("Hello, World");
- string16 hello_world = kHelloWorld;
- EXPECT_EQ(kHelloWorld, hello_world);
-}
-#endif
+ static constexpr char16 kHelloChars[] = {
+ STRING16_LITERAL('H'), STRING16_LITERAL('e'), STRING16_LITERAL('l'),
+ STRING16_LITERAL('l'), STRING16_LITERAL('o'), STRING16_LITERAL('\0'),
+ };
-// We define a custom operator<< for string16 so we can use it with logging.
-// This tests that conversion.
-TEST(String16Test, OutputStream) {
- // Basic stream test.
- {
- std::ostringstream stream;
- stream << "Empty '" << string16() << "' standard '"
- << string16(ASCIIToUTF16("Hello, world")) << "'";
- EXPECT_STREQ("Empty '' standard 'Hello, world'",
- stream.str().c_str());
- }
-
- // Interesting edge cases.
- {
- // These should each get converted to the invalid character: EF BF BD.
- string16 initial_surrogate;
- initial_surrogate.push_back(0xd800);
- string16 final_surrogate;
- final_surrogate.push_back(0xdc00);
-
- // Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'.
- string16 surrogate_pair;
- surrogate_pair.push_back(0xd800);
- surrogate_pair.push_back(0xdf00);
- surrogate_pair.push_back('z');
-
- // Will get converted to the invalid char + 's': EF BF BD 's'.
- string16 unterminated_surrogate;
- unterminated_surrogate.push_back(0xd800);
- unterminated_surrogate.push_back('s');
-
- std::ostringstream stream;
- stream << initial_surrogate << "," << final_surrogate << ","
- << surrogate_pair << "," << unterminated_surrogate;
-
- EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds",
- stream.str().c_str());
- }
-}
-
-TEST(String16Test, Hash) {
- string16 str1 = ASCIIToUTF16("hello");
- string16 str2 = ASCIIToUTF16("world");
-
- std::unordered_set<string16> set;
-
- set.insert(str1);
- EXPECT_EQ(1u, set.count(str1));
- EXPECT_EQ(0u, set.count(str2));
+ static constexpr char16 kHelloStr[] = STRING16_LITERAL("Hello");
+ EXPECT_EQ(std::char_traits<char16>::compare(kHelloChars, kHelloStr, 6), 0);
}
} // namespace base
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc
index 62ba11f..fe9be5f 100644
--- a/base/strings/string_piece.cc
+++ b/base/strings/string_piece.cc
@@ -12,6 +12,7 @@
#include <ostream>
#include "base/strings/utf_string_conversions.h"
+#include "build/build_config.h"
namespace gurl_base {
namespace {
@@ -24,8 +25,7 @@
// the possible values of an unsigned char. Thus it should be be declared
// as follows:
// bool table[UCHAR_MAX + 1]
-inline void BuildLookupTable(const StringPiece& characters_wanted,
- bool* table) {
+inline void BuildLookupTable(StringPiece characters_wanted, bool* table) {
const size_t length = characters_wanted.length();
const char* const data = characters_wanted.data();
for (size_t i = 0; i < length; ++i) {
@@ -41,39 +41,25 @@
template class BasicStringPiece<string16>;
#endif
-std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
+std::ostream& operator<<(std::ostream& o, StringPiece piece) {
o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
return o;
}
-std::ostream& operator<<(std::ostream& o, const StringPiece16& piece) {
+std::ostream& operator<<(std::ostream& o, StringPiece16 piece) {
return o << UTF16ToUTF8(piece);
}
+#if !defined(WCHAR_T_IS_UTF16)
+std::ostream& operator<<(std::ostream& o, WStringPiece piece) {
+ return o << WideToUTF8(piece);
+}
+#endif
+
namespace internal {
-template<typename STR>
-size_t copyT(const BasicStringPiece<STR>& self,
- typename STR::value_type* buf,
- size_t n,
- size_t pos) {
- size_t ret = std::min(self.size() - pos, n);
- memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type));
- return ret;
-}
-
-size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) {
- return copyT(self, buf, n, pos);
-}
-
-size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) {
- return copyT(self, buf, n, pos);
-}
-
-template<typename STR>
-size_t findT(const BasicStringPiece<STR>& self,
- const BasicStringPiece<STR>& s,
- size_t pos) {
+template <typename STR>
+size_t findT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) {
if (pos > self.size())
return BasicStringPiece<STR>::npos;
@@ -84,16 +70,16 @@
return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos;
}
-size_t find(const StringPiece& self, const StringPiece& s, size_t pos) {
+size_t find(StringPiece self, StringPiece s, size_t pos) {
return findT(self, s, pos);
}
-size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) {
+size_t find(StringPiece16 self, StringPiece16 s, size_t pos) {
return findT(self, s, pos);
}
-template<typename STR>
-size_t findT(const BasicStringPiece<STR>& self,
+template <typename STR>
+size_t findT(BasicStringPiece<STR> self,
typename STR::value_type c,
size_t pos) {
if (pos >= self.size())
@@ -105,18 +91,16 @@
static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
}
-size_t find(const StringPiece& self, char c, size_t pos) {
+size_t find(StringPiece self, char c, size_t pos) {
return findT(self, c, pos);
}
-size_t find(const StringPiece16& self, char16 c, size_t pos) {
+size_t find(StringPiece16 self, char16 c, size_t pos) {
return findT(self, c, pos);
}
-template<typename STR>
-size_t rfindT(const BasicStringPiece<STR>& self,
- const BasicStringPiece<STR>& s,
- size_t pos) {
+template <typename STR>
+size_t rfindT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) {
if (self.size() < s.size())
return BasicStringPiece<STR>::npos;
@@ -131,16 +115,16 @@
static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
}
-size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) {
+size_t rfind(StringPiece self, StringPiece s, size_t pos) {
return rfindT(self, s, pos);
}
-size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) {
+size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos) {
return rfindT(self, s, pos);
}
-template<typename STR>
-size_t rfindT(const BasicStringPiece<STR>& self,
+template <typename STR>
+size_t rfindT(BasicStringPiece<STR> self,
typename STR::value_type c,
size_t pos) {
if (self.size() == 0)
@@ -156,18 +140,16 @@
return BasicStringPiece<STR>::npos;
}
-size_t rfind(const StringPiece& self, char c, size_t pos) {
+size_t rfind(StringPiece self, char c, size_t pos) {
return rfindT(self, c, pos);
}
-size_t rfind(const StringPiece16& self, char16 c, size_t pos) {
+size_t rfind(StringPiece16 self, char16 c, size_t pos) {
return rfindT(self, c, pos);
}
// 8-bit version using lookup table.
-size_t find_first_of(const StringPiece& self,
- const StringPiece& s,
- size_t pos) {
+size_t find_first_of(StringPiece self, StringPiece s, size_t pos) {
if (self.size() == 0 || s.size() == 0)
return StringPiece::npos;
@@ -186,9 +168,7 @@
}
// 16-bit brute force version.
-size_t find_first_of(const StringPiece16& self,
- const StringPiece16& s,
- size_t pos) {
+size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) {
// Use the faster std::find() if searching for a single character.
StringPiece16::const_iterator found =
s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0])
@@ -200,9 +180,7 @@
}
// 8-bit version using lookup table.
-size_t find_first_not_of(const StringPiece& self,
- const StringPiece& s,
- size_t pos) {
+size_t find_first_not_of(StringPiece self, StringPiece s, size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
@@ -224,8 +202,8 @@
}
// 16-bit brute-force version.
-BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
- const StringPiece16& s,
+BASE_EXPORT size_t find_first_not_of(StringPiece16 self,
+ StringPiece16 s,
size_t pos) {
if (self.size() == 0)
return StringPiece16::npos;
@@ -244,8 +222,8 @@
return StringPiece16::npos;
}
-template<typename STR>
-size_t find_first_not_ofT(const BasicStringPiece<STR>& self,
+template <typename STR>
+size_t find_first_not_ofT(BasicStringPiece<STR> self,
typename STR::value_type c,
size_t pos) {
if (self.size() == 0)
@@ -259,20 +237,16 @@
return BasicStringPiece<STR>::npos;
}
-size_t find_first_not_of(const StringPiece& self,
- char c,
- size_t pos) {
+size_t find_first_not_of(StringPiece self, char c, size_t pos) {
return find_first_not_ofT(self, c, pos);
}
-size_t find_first_not_of(const StringPiece16& self,
- char16 c,
- size_t pos) {
+size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos) {
return find_first_not_ofT(self, c, pos);
}
// 8-bit version using lookup table.
-size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) {
+size_t find_last_of(StringPiece self, StringPiece s, size_t pos) {
if (self.size() == 0 || s.size() == 0)
return StringPiece::npos;
@@ -292,9 +266,7 @@
}
// 16-bit brute-force version.
-size_t find_last_of(const StringPiece16& self,
- const StringPiece16& s,
- size_t pos) {
+size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) {
if (self.size() == 0)
return StringPiece16::npos;
@@ -311,9 +283,7 @@
}
// 8-bit version using lookup table.
-size_t find_last_not_of(const StringPiece& self,
- const StringPiece& s,
- size_t pos) {
+size_t find_last_not_of(StringPiece self, StringPiece s, size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
@@ -337,9 +307,7 @@
}
// 16-bit brute-force version.
-size_t find_last_not_of(const StringPiece16& self,
- const StringPiece16& s,
- size_t pos) {
+size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
@@ -359,8 +327,8 @@
return StringPiece16::npos;
}
-template<typename STR>
-size_t find_last_not_ofT(const BasicStringPiece<STR>& self,
+template <typename STR>
+size_t find_last_not_ofT(BasicStringPiece<STR> self,
typename STR::value_type c,
size_t pos) {
if (self.size() == 0)
@@ -375,15 +343,11 @@
return BasicStringPiece<STR>::npos;
}
-size_t find_last_not_of(const StringPiece& self,
- char c,
- size_t pos) {
+size_t find_last_not_of(StringPiece self, char c, size_t pos) {
return find_last_not_ofT(self, c, pos);
}
-size_t find_last_not_of(const StringPiece16& self,
- char16 c,
- size_t pos) {
+size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos) {
return find_last_not_ofT(self, c, pos);
}
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index de54e94..dea85e3 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h
@@ -34,6 +34,7 @@
#include "base/strings/char_traits.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece_forward.h"
+#include "build/build_config.h"
namespace gurl_base {
@@ -48,86 +49,45 @@
// template internal to the .cc file.
namespace internal {
-BASE_EXPORT size_t copy(const StringPiece& self,
- char* buf,
- size_t n,
- size_t pos);
-BASE_EXPORT size_t copy(const StringPiece16& self,
- char16* buf,
- size_t n,
- size_t pos);
+BASE_EXPORT size_t find(StringPiece self, StringPiece s, size_t pos);
+BASE_EXPORT size_t find(StringPiece16 self, StringPiece16 s, size_t pos);
+BASE_EXPORT size_t find(StringPiece self, char c, size_t pos);
+BASE_EXPORT size_t find(StringPiece16 self, char16 c, size_t pos);
-BASE_EXPORT size_t find(const StringPiece& self,
- const StringPiece& s,
- size_t pos);
-BASE_EXPORT size_t find(const StringPiece16& self,
- const StringPiece16& s,
- size_t pos);
-BASE_EXPORT size_t find(const StringPiece& self,
- char c,
- size_t pos);
-BASE_EXPORT size_t find(const StringPiece16& self,
- char16 c,
- size_t pos);
+BASE_EXPORT size_t rfind(StringPiece self, StringPiece s, size_t pos);
+BASE_EXPORT size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos);
+BASE_EXPORT size_t rfind(StringPiece self, char c, size_t pos);
+BASE_EXPORT size_t rfind(StringPiece16 self, char16 c, size_t pos);
-BASE_EXPORT size_t rfind(const StringPiece& self,
- const StringPiece& s,
- size_t pos);
-BASE_EXPORT size_t rfind(const StringPiece16& self,
- const StringPiece16& s,
- size_t pos);
-BASE_EXPORT size_t rfind(const StringPiece& self,
- char c,
- size_t pos);
-BASE_EXPORT size_t rfind(const StringPiece16& self,
- char16 c,
- size_t pos);
-
-BASE_EXPORT size_t find_first_of(const StringPiece& self,
- const StringPiece& s,
- size_t pos);
-BASE_EXPORT size_t find_first_of(const StringPiece16& self,
- const StringPiece16& s,
+BASE_EXPORT size_t find_first_of(StringPiece self, StringPiece s, size_t pos);
+BASE_EXPORT size_t find_first_of(StringPiece16 self,
+ StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
- const StringPiece& s,
+BASE_EXPORT size_t find_first_not_of(StringPiece self,
+ StringPiece s,
size_t pos);
-BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
- const StringPiece16& s,
+BASE_EXPORT size_t find_first_not_of(StringPiece16 self,
+ StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
- char c,
- size_t pos);
-BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
- char16 c,
- size_t pos);
+BASE_EXPORT size_t find_first_not_of(StringPiece self, char c, size_t pos);
+BASE_EXPORT size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos);
-BASE_EXPORT size_t find_last_of(const StringPiece& self,
- const StringPiece& s,
+BASE_EXPORT size_t find_last_of(StringPiece self, StringPiece s, size_t pos);
+BASE_EXPORT size_t find_last_of(StringPiece16 self,
+ StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_last_of(const StringPiece16& self,
- const StringPiece16& s,
- size_t pos);
-BASE_EXPORT size_t find_last_of(const StringPiece& self,
- char c,
- size_t pos);
-BASE_EXPORT size_t find_last_of(const StringPiece16& self,
- char16 c,
- size_t pos);
+BASE_EXPORT size_t find_last_of(StringPiece self, char c, size_t pos);
+BASE_EXPORT size_t find_last_of(StringPiece16 self, char16 c, size_t pos);
-BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
- const StringPiece& s,
+BASE_EXPORT size_t find_last_not_of(StringPiece self,
+ StringPiece s,
size_t pos);
-BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
- const StringPiece16& s,
+BASE_EXPORT size_t find_last_not_of(StringPiece16 self,
+ StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
- char16 c,
- size_t pos);
-BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
- char c,
- size_t pos);
+BASE_EXPORT size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos);
+BASE_EXPORT size_t find_last_not_of(StringPiece self, char c, size_t pos);
} // namespace internal
@@ -181,15 +141,6 @@
: ptr_(str.data()), length_(str.size()) {}
constexpr BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {}
- BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
- const typename STRING_TYPE::const_iterator& end) {
- GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
- length_ = static_cast<size_t>(std::distance(begin, end));
-
- // The length test before assignment is to avoid dereferencing an iterator
- // that may point to the end() of a string.
- ptr_ = length_ > 0 ? &*begin : nullptr;
- }
// data() may return a pointer to a buffer with embedded NULs, and the
// returned buffer may or may not be null terminated. Therefore it is
@@ -226,16 +177,6 @@
length_ -= n;
}
- constexpr int compare(BasicStringPiece x) const noexcept {
- int r = CharTraits<value_type>::compare(
- ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_));
- if (r == 0) {
- if (length_ < x.length_) r = -1;
- else if (length_ > x.length_) r = +1;
- }
- return r;
- }
-
// This is the style of conversion preferred by std::string_view in C++17.
explicit operator STRING_TYPE() const {
return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
@@ -261,72 +202,187 @@
size_type max_size() const { return length_; }
size_type capacity() const { return length_; }
- size_type copy(value_type* buf, size_type n, size_type pos = 0) const {
- return internal::copy(*this, buf, n, pos);
+ // String operations, see https://wg21.link/string.view.ops.
+ constexpr size_type copy(value_type* s,
+ size_type n,
+ size_type pos = 0) const {
+ GURL_CHECK_LE(pos, size());
+ size_type rlen = std::min(n, size() - pos);
+ traits_type::copy(s, data() + pos, rlen);
+ return rlen;
}
+ constexpr BasicStringPiece substr(size_type pos = 0,
+ size_type n = npos) const {
+ GURL_CHECK_LE(pos, size());
+ return {data() + pos, std::min(n, size() - pos)};
+ }
+
+ constexpr int compare(BasicStringPiece str) const noexcept {
+ size_type rlen = std::min(size(), str.size());
+ int result = CharTraits<value_type>::compare(data(), str.data(), rlen);
+ if (result == 0)
+ result = size() == str.size() ? 0 : (size() < str.size() ? -1 : 1);
+ return result;
+ }
+
+ constexpr int compare(size_type pos,
+ size_type n,
+ BasicStringPiece str) const {
+ return substr(pos, n).compare(str);
+ }
+
+ constexpr int compare(size_type pos1,
+ size_type n1,
+ BasicStringPiece str,
+ size_type pos2,
+ size_type n2) const {
+ return substr(pos1, n1).compare(str.substr(pos2, n2));
+ }
+
+ constexpr int compare(const value_type* s) const {
+ return compare(BasicStringPiece(s));
+ }
+
+ constexpr int compare(size_type pos, size_type n, const value_type* s) const {
+ return substr(pos, n).compare(BasicStringPiece(s));
+ }
+
+ constexpr int compare(size_type pos,
+ size_type n1,
+ const value_type* s,
+ size_type n2) const {
+ return substr(pos, n1).compare(BasicStringPiece(s, n2));
+ }
+
+ // Searching, see https://wg21.link/string.view.find.
+
// find: Search for a character or substring at a given offset.
- size_type find(const BasicStringPiece<STRING_TYPE>& s,
- size_type pos = 0) const {
+ constexpr size_type find(BasicStringPiece s,
+ size_type pos = 0) const noexcept {
return internal::find(*this, s, pos);
}
- size_type find(value_type c, size_type pos = 0) const {
+
+ constexpr size_type find(value_type c, size_type pos = 0) const noexcept {
return internal::find(*this, c, pos);
}
+ constexpr size_type find(const value_type* s,
+ size_type pos,
+ size_type n) const {
+ return find(BasicStringPiece(s, n), pos);
+ }
+
+ constexpr size_type find(const value_type* s, size_type pos = 0) const {
+ return find(BasicStringPiece(s), pos);
+ }
+
// rfind: Reverse find.
- size_type rfind(const BasicStringPiece& s,
- size_type pos = BasicStringPiece::npos) const {
+ constexpr size_type rfind(BasicStringPiece s,
+ size_type pos = npos) const noexcept {
return internal::rfind(*this, s, pos);
}
- size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const {
+
+ constexpr size_type rfind(value_type c, size_type pos = npos) const noexcept {
return internal::rfind(*this, c, pos);
}
- // find_first_of: Find the first occurence of one of a set of characters.
- size_type find_first_of(const BasicStringPiece& s,
- size_type pos = 0) const {
+ constexpr size_type rfind(const value_type* s,
+ size_type pos,
+ size_type n) const {
+ return rfind(BasicStringPiece(s, n), pos);
+ }
+
+ constexpr size_type rfind(const value_type* s, size_type pos = npos) const {
+ return rfind(BasicStringPiece(s), pos);
+ }
+
+ // find_first_of: Find the first occurrence of one of a set of characters.
+ constexpr size_type find_first_of(BasicStringPiece s,
+ size_type pos = 0) const noexcept {
return internal::find_first_of(*this, s, pos);
}
- size_type find_first_of(value_type c, size_type pos = 0) const {
+
+ constexpr size_type find_first_of(value_type c,
+ size_type pos = 0) const noexcept {
return find(c, pos);
}
- // find_first_not_of: Find the first occurence not of a set of characters.
- size_type find_first_not_of(const BasicStringPiece& s,
- size_type pos = 0) const {
- return internal::find_first_not_of(*this, s, pos);
- }
- size_type find_first_not_of(value_type c, size_type pos = 0) const {
- return internal::find_first_not_of(*this, c, pos);
+ constexpr size_type find_first_of(const value_type* s,
+ size_type pos,
+ size_type n) const {
+ return find_first_of(BasicStringPiece(s, n), pos);
}
- // find_last_of: Find the last occurence of one of a set of characters.
- size_type find_last_of(const BasicStringPiece& s,
- size_type pos = BasicStringPiece::npos) const {
+ constexpr size_type find_first_of(const value_type* s,
+ size_type pos = 0) const {
+ return find_first_of(BasicStringPiece(s), pos);
+ }
+
+ // find_last_of: Find the last occurrence of one of a set of characters.
+ constexpr size_type find_last_of(BasicStringPiece s,
+ size_type pos = npos) const noexcept {
return internal::find_last_of(*this, s, pos);
}
- size_type find_last_of(value_type c,
- size_type pos = BasicStringPiece::npos) const {
+
+ constexpr size_type find_last_of(value_type c,
+ size_type pos = npos) const noexcept {
return rfind(c, pos);
}
- // find_last_not_of: Find the last occurence not of a set of characters.
- size_type find_last_not_of(const BasicStringPiece& s,
- size_type pos = BasicStringPiece::npos) const {
+ constexpr size_type find_last_of(const value_type* s,
+ size_type pos,
+ size_type n) const {
+ return find_last_of(BasicStringPiece(s, n), pos);
+ }
+
+ constexpr size_type find_last_of(const value_type* s,
+ size_type pos = npos) const {
+ return find_last_of(BasicStringPiece(s), pos);
+ }
+
+ // find_first_not_of: Find the first occurrence not of a set of characters.
+ constexpr size_type find_first_not_of(BasicStringPiece s,
+ size_type pos = 0) const noexcept {
+ return internal::find_first_not_of(*this, s, pos);
+ }
+
+ constexpr size_type find_first_not_of(value_type c,
+ size_type pos = 0) const noexcept {
+ return internal::find_first_not_of(*this, c, pos);
+ }
+
+ constexpr size_type find_first_not_of(const value_type* s,
+ size_type pos,
+ size_type n) const {
+ return find_first_not_of(BasicStringPiece(s, n), pos);
+ }
+
+ constexpr size_type find_first_not_of(const value_type* s,
+ size_type pos = 0) const {
+ return find_first_not_of(BasicStringPiece(s), pos);
+ }
+
+ // find_last_not_of: Find the last occurrence not of a set of characters.
+ constexpr size_type find_last_not_of(BasicStringPiece s,
+ size_type pos = npos) const noexcept {
return internal::find_last_not_of(*this, s, pos);
}
- size_type find_last_not_of(value_type c,
- size_type pos = BasicStringPiece::npos) const {
+
+ constexpr size_type find_last_not_of(value_type c,
+ size_type pos = npos) const noexcept {
return internal::find_last_not_of(*this, c, pos);
}
- // substr.
- constexpr BasicStringPiece substr(
- size_type pos,
- size_type n = BasicStringPiece::npos) const {
- GURL_CHECK_LE(pos, size());
- return {data() + pos, std::min(n, size() - pos)};
+ constexpr size_type find_last_not_of(const value_type* s,
+ size_type pos,
+ size_type n) const {
+ return find_last_not_of(BasicStringPiece(s, n), pos);
+ }
+
+ constexpr size_type find_last_not_of(const value_type* s,
+ size_type pos = npos) const {
+ return find_last_not_of(BasicStringPiece(s), pos);
}
protected:
@@ -472,11 +528,12 @@
return !(lhs < rhs);
}
-BASE_EXPORT std::ostream& operator<<(std::ostream& o,
- const StringPiece& piece);
+BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece piece);
+BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece16 piece);
-BASE_EXPORT std::ostream& operator<<(std::ostream& o,
- const StringPiece16& piece);
+#if !defined(WCHAR_T_IS_UTF16)
+BASE_EXPORT std::ostream& operator<<(std::ostream& o, WStringPiece piece);
+#endif
// Hashing ---------------------------------------------------------------------
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc
index e0d812b..76de8f7 100644
--- a/base/strings/string_piece_unittest.cc
+++ b/base/strings/string_piece_unittest.cc
@@ -269,12 +269,13 @@
ASSERT_EQ(e.find(d, 4), std::string().find(std::string(), 4));
ASSERT_EQ(e.find(e, 4), std::string().find(std::string(), 4));
+ constexpr typename TypeParam::value_type kNul = '\0';
ASSERT_EQ(a.find('a'), 0U);
ASSERT_EQ(a.find('c'), 2U);
ASSERT_EQ(a.find('z'), 25U);
ASSERT_EQ(a.find('$'), Piece::npos);
- ASSERT_EQ(a.find('\0'), Piece::npos);
- ASSERT_EQ(f.find('\0'), 3U);
+ ASSERT_EQ(a.find(kNul), Piece::npos);
+ ASSERT_EQ(f.find(kNul), 3U);
ASSERT_EQ(f.find('3'), 2U);
ASSERT_EQ(f.find('5'), 5U);
ASSERT_EQ(g.find('o'), 4U);
@@ -282,15 +283,44 @@
ASSERT_EQ(g.find('o', 5), 8U);
ASSERT_EQ(a.find('b', 5), Piece::npos);
// empty string nonsense
- ASSERT_EQ(d.find('\0'), Piece::npos);
- ASSERT_EQ(e.find('\0'), Piece::npos);
- ASSERT_EQ(d.find('\0', 4), Piece::npos);
- ASSERT_EQ(e.find('\0', 7), Piece::npos);
+ ASSERT_EQ(d.find(kNul), Piece::npos);
+ ASSERT_EQ(e.find(kNul), Piece::npos);
+ ASSERT_EQ(d.find(kNul, 4), Piece::npos);
+ ASSERT_EQ(e.find(kNul, 7), Piece::npos);
ASSERT_EQ(d.find('x'), Piece::npos);
ASSERT_EQ(e.find('x'), Piece::npos);
ASSERT_EQ(d.find('x', 4), Piece::npos);
ASSERT_EQ(e.find('x', 7), Piece::npos);
+ ASSERT_EQ(a.find(b.data(), 1, 0), 1U);
+ ASSERT_EQ(a.find(c.data(), 9, 0), 9U);
+ ASSERT_EQ(a.find(c.data(), Piece::npos, 0), Piece::npos);
+ ASSERT_EQ(b.find(c.data(), Piece::npos, 0), Piece::npos);
+ ASSERT_EQ(a.find(d.data(), 12, 0), 12U);
+ ASSERT_EQ(a.find(e.data(), 17, 0), 17U);
+ // empty string nonsense
+ ASSERT_EQ(d.find(b.data(), 4, 0), Piece::npos);
+ ASSERT_EQ(e.find(b.data(), 7, 0), Piece::npos);
+
+ ASSERT_EQ(a.find(b.data(), 1), Piece::npos);
+ ASSERT_EQ(a.find(c.data(), 9), 23U);
+ ASSERT_EQ(a.find(c.data(), Piece::npos), Piece::npos);
+ ASSERT_EQ(b.find(c.data(), Piece::npos), Piece::npos);
+ ASSERT_EQ(a.find(d.data(), 12), 12U);
+ ASSERT_EQ(a.find(e.data(), 17), 17U);
+ // empty string nonsense
+ ASSERT_EQ(d.find(b.data(), 4), Piece::npos);
+ ASSERT_EQ(e.find(b.data(), 7), Piece::npos);
+
+ ASSERT_EQ(d.find(d.data(), 4, 0),
+ std::string().find(std::string().data(), 4, 0));
+ ASSERT_EQ(d.find(e.data(), 4, 1),
+ std::string().find(std::string().data(), 4, 1));
+ ASSERT_EQ(e.find(d.data(), 4, 2),
+ std::string().find(std::string().data(), 4, 2));
+ ASSERT_EQ(e.find(e.data(), 4, 3),
+ std::string().find(std::string().data(), 4, 3));
+
ASSERT_EQ(a.rfind(b), 0U);
ASSERT_EQ(a.rfind(b, 1), 0U);
ASSERT_EQ(a.rfind(c), 23U);
@@ -325,8 +355,8 @@
ASSERT_EQ(g.rfind('o', 8), 8U);
ASSERT_EQ(g.rfind('o', 7), 4U);
ASSERT_EQ(g.rfind('o', 3), Piece::npos);
- ASSERT_EQ(f.rfind('\0'), 3U);
- ASSERT_EQ(f.rfind('\0', 12), 3U);
+ ASSERT_EQ(f.rfind(kNul), 3U);
+ ASSERT_EQ(f.rfind(kNul, 12), 3U);
ASSERT_EQ(f.rfind('3'), 2U);
ASSERT_EQ(f.rfind('5'), 5U);
// empty string nonsense
@@ -335,6 +365,22 @@
ASSERT_EQ(d.rfind('o', 4), Piece::npos);
ASSERT_EQ(e.rfind('o', 7), Piece::npos);
+ ASSERT_EQ(a.rfind(b.data(), 1, 0), 1U);
+ ASSERT_EQ(a.rfind(c.data(), 22U, 0), 22U);
+ ASSERT_EQ(a.rfind(c.data(), 1U, 0), 1U);
+ ASSERT_EQ(a.rfind(c.data(), 0U, 0), 0U);
+ ASSERT_EQ(b.rfind(c.data(), 0U, 0), 0U);
+ ASSERT_EQ(a.rfind(d.data(), 12, 0), 12U);
+ ASSERT_EQ(a.rfind(e.data(), 17, 0), 17U);
+ ASSERT_EQ(d.rfind(b.data(), 4, 0), 0U);
+ ASSERT_EQ(e.rfind(b.data(), 7, 0), 0U);
+
+ // empty string nonsense
+ ASSERT_EQ(d.rfind(d.data(), 4), std::string().rfind(std::string()));
+ ASSERT_EQ(e.rfind(d.data(), 7), std::string().rfind(std::string()));
+ ASSERT_EQ(d.rfind(e.data(), 4), std::string().rfind(std::string()));
+ ASSERT_EQ(e.rfind(e.data(), 7), std::string().rfind(std::string()));
+
TypeParam one_two_three_four(TestFixture::as_string("one,two:three;four"));
TypeParam comma_colon(TestFixture::as_string(",:"));
ASSERT_EQ(3U, Piece(one_two_three_four).find_first_of(comma_colon));
@@ -382,16 +428,16 @@
Piece h(equals);
ASSERT_EQ(h.find_first_not_of('='), Piece::npos);
ASSERT_EQ(h.find_first_not_of('=', 3), Piece::npos);
- ASSERT_EQ(h.find_first_not_of('\0'), 0U);
+ ASSERT_EQ(h.find_first_not_of(kNul), 0U);
ASSERT_EQ(g.find_first_not_of('x'), 2U);
- ASSERT_EQ(f.find_first_not_of('\0'), 0U);
- ASSERT_EQ(f.find_first_not_of('\0', 3), 4U);
- ASSERT_EQ(f.find_first_not_of('\0', 2), 2U);
+ ASSERT_EQ(f.find_first_not_of(kNul), 0U);
+ ASSERT_EQ(f.find_first_not_of(kNul, 3), 4U);
+ ASSERT_EQ(f.find_first_not_of(kNul, 2), 2U);
// empty string nonsense
ASSERT_EQ(d.find_first_not_of('x'), Piece::npos);
ASSERT_EQ(e.find_first_not_of('x'), Piece::npos);
- ASSERT_EQ(d.find_first_not_of('\0'), Piece::npos);
- ASSERT_EQ(e.find_first_not_of('\0'), Piece::npos);
+ ASSERT_EQ(d.find_first_not_of(kNul), Piece::npos);
+ ASSERT_EQ(e.find_first_not_of(kNul), Piece::npos);
// Piece g("xx not found bb");
TypeParam fifty_six(TestFixture::as_string("56"));
@@ -465,13 +511,14 @@
// empty string nonsense
ASSERT_EQ(d.find_last_not_of('x'), Piece::npos);
ASSERT_EQ(e.find_last_not_of('x'), Piece::npos);
- ASSERT_EQ(d.find_last_not_of('\0'), Piece::npos);
- ASSERT_EQ(e.find_last_not_of('\0'), Piece::npos);
+ ASSERT_EQ(d.find_last_not_of(kNul), Piece::npos);
+ ASSERT_EQ(e.find_last_not_of(kNul), Piece::npos);
ASSERT_EQ(a.substr(0, 3), b);
ASSERT_EQ(a.substr(23), c);
ASSERT_EQ(a.substr(23, 3), c);
ASSERT_EQ(a.substr(23, 99), c);
+ ASSERT_EQ(a.substr(), a);
ASSERT_EQ(a.substr(0), a);
ASSERT_EQ(a.substr(3, 2), TestFixture::as_string("de"));
ASSERT_EQ(d.substr(0, 99), e);
@@ -626,10 +673,7 @@
nullptr,
static_cast<typename BasicStringPiece<TypeParam>::size_type>(0)));
ASSERT_EQ(empty, BasicStringPiece<TypeParam>());
- ASSERT_EQ(str, BasicStringPiece<TypeParam>(str.begin(), str.end()));
- ASSERT_EQ(empty, BasicStringPiece<TypeParam>(str.begin(), str.begin()));
ASSERT_EQ(empty, BasicStringPiece<TypeParam>(empty));
- ASSERT_EQ(empty, BasicStringPiece<TypeParam>(empty.begin(), empty.end()));
}
TEST(StringPieceTest, ConstexprCtor) {
@@ -677,6 +721,11 @@
{
StringPiece piece;
+ ASSERT_DEATH_IF_SUPPORTED(piece.copy(nullptr, 0, 1), "");
+ }
+
+ {
+ StringPiece piece;
ASSERT_DEATH_IF_SUPPORTED(piece.substr(1), "");
}
}
@@ -738,6 +787,57 @@
static_assert(piece.compare("gh") == -1, "");
static_assert(piece.compare("ghi") == -1, "");
static_assert(piece.compare("ghij") == -1, "");
+
+ static_assert(piece.compare(0, 0, "") == 0, "");
+ static_assert(piece.compare(0, 1, "d") == 0, "");
+ static_assert(piece.compare(0, 2, "de") == 0, "");
+ static_assert(piece.compare(0, 3, "def") == 0, "");
+ static_assert(piece.compare(1, 0, "") == 0, "");
+ static_assert(piece.compare(1, 1, "e") == 0, "");
+ static_assert(piece.compare(1, 2, "ef") == 0, "");
+ static_assert(piece.compare(1, 3, "ef") == 0, "");
+ static_assert(piece.compare(2, 0, "") == 0, "");
+ static_assert(piece.compare(2, 1, "f") == 0, "");
+ static_assert(piece.compare(2, 2, "f") == 0, "");
+ static_assert(piece.compare(2, 3, "f") == 0, "");
+ static_assert(piece.compare(3, 0, "") == 0, "");
+ static_assert(piece.compare(3, 1, "") == 0, "");
+ static_assert(piece.compare(3, 2, "") == 0, "");
+ static_assert(piece.compare(3, 3, "") == 0, "");
+
+ static_assert(piece.compare(0, 0, "def", 0) == 0, "");
+ static_assert(piece.compare(0, 1, "def", 1) == 0, "");
+ static_assert(piece.compare(0, 2, "def", 2) == 0, "");
+ static_assert(piece.compare(0, 3, "def", 3) == 0, "");
+ static_assert(piece.compare(1, 0, "ef", 0) == 0, "");
+ static_assert(piece.compare(1, 1, "ef", 1) == 0, "");
+ static_assert(piece.compare(1, 2, "ef", 2) == 0, "");
+ static_assert(piece.compare(1, 3, "ef", 2) == 0, "");
+ static_assert(piece.compare(2, 0, "f", 0) == 0, "");
+ static_assert(piece.compare(2, 1, "f", 1) == 0, "");
+ static_assert(piece.compare(2, 2, "f", 1) == 0, "");
+ static_assert(piece.compare(2, 3, "f", 1) == 0, "");
+ static_assert(piece.compare(3, 0, "", 0) == 0, "");
+ static_assert(piece.compare(3, 1, "", 0) == 0, "");
+ static_assert(piece.compare(3, 2, "", 0) == 0, "");
+ static_assert(piece.compare(3, 3, "", 0) == 0, "");
+
+ static_assert(piece.compare(0, 0, "def", 0, 0) == 0, "");
+ static_assert(piece.compare(0, 1, "def", 0, 1) == 0, "");
+ static_assert(piece.compare(0, 2, "def", 0, 2) == 0, "");
+ static_assert(piece.compare(0, 3, "def", 0, 3) == 0, "");
+ static_assert(piece.compare(1, 0, "def", 1, 0) == 0, "");
+ static_assert(piece.compare(1, 1, "def", 1, 1) == 0, "");
+ static_assert(piece.compare(1, 2, "def", 1, 2) == 0, "");
+ static_assert(piece.compare(1, 3, "def", 1, 3) == 0, "");
+ static_assert(piece.compare(2, 0, "def", 2, 0) == 0, "");
+ static_assert(piece.compare(2, 1, "def", 2, 1) == 0, "");
+ static_assert(piece.compare(2, 2, "def", 2, 2) == 0, "");
+ static_assert(piece.compare(2, 3, "def", 2, 3) == 0, "");
+ static_assert(piece.compare(3, 0, "def", 3, 0) == 0, "");
+ static_assert(piece.compare(3, 1, "def", 3, 1) == 0, "");
+ static_assert(piece.compare(3, 2, "def", 3, 2) == 0, "");
+ static_assert(piece.compare(3, 3, "def", 3, 3) == 0, "");
}
TEST(StringPieceTest, Substr) {
@@ -751,6 +851,7 @@
static_assert(piece.substr(23) == "xyz", "");
static_assert(piece.substr(23, 3) == "xyz", "");
static_assert(piece.substr(23, 99) == "xyz", "");
+ static_assert(piece.substr() == piece, "");
static_assert(piece.substr(0) == piece, "");
static_assert(piece.substr(0, 99) == piece, "");
}
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index f43a8ac..a1e5c59 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -84,6 +84,31 @@
// This function is intended to be called from gurl_base::vswprintf.
BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
+// Simplified implementation of C++20's std::basic_string_view(It, End).
+// Reference: https://wg21.link/string.view.cons
+template <typename StringT, typename Iter>
+constexpr BasicStringPiece<StringT> MakeBasicStringPiece(Iter begin, Iter end) {
+ GURL_DCHECK_GE(end - begin, 0);
+ return {gurl_base::to_address(begin), end - begin};
+}
+
+// Explicit instantiations of MakeBasicStringPiece for the BasicStringPiece
+// aliases defined in base/strings/string_piece_forward.h
+template <typename Iter>
+constexpr StringPiece MakeStringPiece(Iter begin, Iter end) {
+ return MakeBasicStringPiece<std::string>(begin, end);
+}
+
+template <typename Iter>
+constexpr StringPiece16 MakeStringPiece16(Iter begin, Iter end) {
+ return MakeBasicStringPiece<string16>(begin, end);
+}
+
+template <typename Iter>
+constexpr WStringPiece MakeWStringPiece(Iter begin, Iter end) {
+ return MakeBasicStringPiece<std::wstring>(begin, end);
+}
+
// ASCII-specific tolower. The standard library's tolower is locale sensitive,
// so we don't want to use it here.
template <typename CharT,
@@ -291,9 +316,9 @@
// Compare the lower-case form of the given string against the given
// previously-lower-cased ASCII string (typically a constant).
BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
- StringPiece lowecase_ascii);
+ StringPiece lowercase_ascii);
BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
- StringPiece lowecase_ascii);
+ StringPiece lowercase_ascii);
// Performs a case-sensitive string compare of the given 16-bit string against
// the given 8-bit ASCII string (typically a constant). The behavior is
diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h
index 006aeb0..ccc1367 100644
--- a/base/strings/string_util_internal.h
+++ b/base/strings/string_util_internal.h
@@ -228,7 +228,7 @@
}
template <bool (*Validator)(uint32_t)>
-inline static bool DoIsStringUTF8(StringPiece str) {
+inline bool DoIsStringUTF8(StringPiece str) {
const char* src = str.data();
int32_t src_len = static_cast<int32_t>(str.length());
int32_t char_index = 0;
@@ -258,8 +258,8 @@
// string piece gives additional flexibility for the caller (doesn't have to be
// null terminated) so we choose the StringPiece route.
template <typename Str>
-static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
- StringPiece lowercase_ascii) {
+inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
+ StringPiece lowercase_ascii) {
return std::equal(
str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(),
[](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; });
diff --git a/base/strings/string_util_posix.h b/base/strings/string_util_posix.h
index 7d5a67b..91cf7a6 100644
--- a/base/strings/string_util_posix.h
+++ b/base/strings/string_util_posix.h
@@ -33,17 +33,6 @@
return ::vswprintf(buffer, size, format, arguments);
}
-// These mirror the APIs in string_util_win.h. Since gurl_base::StringPiece is
-// already the native string type on POSIX platforms these APIs are simple
-// no-ops.
-inline StringPiece AsCrossPlatformPiece(StringPiece str) {
- return str;
-}
-
-inline StringPiece AsNativeStringPiece(StringPiece str) {
- return str;
-}
-
} // namespace base
#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index f73b895..beb99e2 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc
@@ -1310,6 +1310,47 @@
EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input));
}
+TEST(StringUtilTest, MakeBasicStringPieceTest) {
+ constexpr char kFoo[] = "Foo";
+ static_assert(MakeStringPiece(kFoo, kFoo + 3) == kFoo, "");
+ static_assert(MakeStringPiece(kFoo, kFoo + 3).data() == kFoo, "");
+ static_assert(MakeStringPiece(kFoo, kFoo + 3).size() == 3, "");
+ static_assert(MakeStringPiece(kFoo + 3, kFoo + 3).empty(), "");
+ static_assert(MakeStringPiece(kFoo + 4, kFoo + 4).empty(), "");
+
+ std::string foo = kFoo;
+ EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()), foo);
+ EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).data(), foo.data());
+ EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size());
+ EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty());
+
+ constexpr char16 kBar[] = STRING16_LITERAL("Bar");
+ static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, "");
+ static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, "");
+ static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, "");
+ static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), "");
+ static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), "");
+
+ string16 bar = kBar;
+ EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar);
+ EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data());
+ EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size());
+ EXPECT_TRUE(MakeStringPiece16(bar.end(), bar.end()).empty());
+
+ constexpr wchar_t kBaz[] = L"Baz";
+ static_assert(MakeWStringPiece(kBaz, kBaz + 3) == kBaz, "");
+ static_assert(MakeWStringPiece(kBaz, kBaz + 3).data() == kBaz, "");
+ static_assert(MakeWStringPiece(kBaz, kBaz + 3).size() == 3, "");
+ static_assert(MakeWStringPiece(kBaz + 3, kBaz + 3).empty(), "");
+ static_assert(MakeWStringPiece(kBaz + 4, kBaz + 4).empty(), "");
+
+ std::wstring baz = kBaz;
+ EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()), baz);
+ EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()).data(), baz.data());
+ EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()).size(), baz.size());
+ EXPECT_TRUE(MakeWStringPiece(baz.end(), baz.end()).empty());
+}
+
TEST(StringUtilTest, RemoveChars) {
const char kRemoveChars[] = "-/+*";
std::string input = "A-+bc/d!*";
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h
index 51a6a2b..3ddbc92 100644
--- a/base/strings/string_util_win.h
+++ b/base/strings/string_util_win.h
@@ -107,18 +107,6 @@
return string16(as_u16cstr(str.data()), str.size());
}
-// Compatibility shim for cross-platform code that passes a StringPieceType to a
-// cross platform string utility function. Most of these functions are only
-// implemented for gurl_base::StringPiece and gurl_base::StringPiece16, which is why
-// gurl_base::WStringPieces need to be converted on API boundaries.
-inline StringPiece16 AsCrossPlatformPiece(WStringPiece str) {
- return AsStringPiece16(str);
-}
-
-inline WStringPiece AsNativeStringPiece(StringPiece16 str) {
- return AsWStringPiece(str);
-}
-
// The following section contains overloads of the cross-platform APIs for
// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring
// and gurl_base::string16 are distinct types, as otherwise this would result in an
@@ -167,7 +155,7 @@
BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters);
BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str,
- StringPiece lowecase_ascii);
+ StringPiece lowercase_ascii);
BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
diff --git a/build_config/build_config.bzl b/build_config/build_config.bzl
index 9357584..117bc96 100644
--- a/build_config/build_config.bzl
+++ b/build_config/build_config.bzl
@@ -10,11 +10,6 @@
],
})
-_strings_srcs = select({
- "//build_config:windows_x86_64": [],
- "//conditions:default": ["string16.cc"],
-})
-
_strings_hdrs = select({
"//build_config:windows_x86_64": ["string_util_win.h"],
"//conditions:default": ["string_util_posix.h"],
@@ -35,7 +30,6 @@
build_config = struct(
default_copts = _default_copts,
url_linkopts = _url_linkopts,
- strings_srcs = _strings_srcs,
strings_hdrs = _strings_hdrs,
icuuc_deps = _icuuc_deps,
)
diff --git a/copy.bara.sky b/copy.bara.sky
index fefc63d..2ed782d 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -13,10 +13,12 @@
"AUTHORS",
"LICENSE",
"base/compiler_specific.h",
+ "base/containers/contains.h",
"base/containers/checked_iterators.h",
"base/containers/contiguous_iterator.h",
"base/containers/span.h",
"base/containers/util.h",
+ "base/i18n/uchar.h",
"base/functional/*.h",
"base/ranges/*.h",
"base/macros.h",
@@ -82,10 +84,10 @@
# Ugly hack. In Chromium, ICU is built with UChar = uint16_t. We can't
# really do that with the system ICU, so we have to work this around with a
# cast.
- core.replace(
- "src, src_len, output->data(),",
- "(UChar*)src, src_len, (UChar*)output->data(),",
- ),
+ #core.replace(
+ # "src, src_len, output->data(),",
+ # "(UChar*)src, src_len, (UChar*)output->data(),",
+ #),
# Use system ICU.
core.replace(
diff --git a/url/gurl.cc b/url/gurl.cc
index 68f3f8c..3b7d9f5 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -485,17 +485,23 @@
if (has_host() || has_username() || has_password() || has_port())
return false;
- if (!gurl_base::StartsWith(path_piece(), allowed_path))
+ return IsAboutPath(path_piece(), allowed_path);
+}
+
+// static
+bool GURL::IsAboutPath(gurl_base::StringPiece actual_path,
+ gurl_base::StringPiece allowed_path) {
+ if (!gurl_base::StartsWith(actual_path, allowed_path))
return false;
- if (path_piece().size() == allowed_path.size()) {
- GURL_DCHECK_EQ(path_piece(), allowed_path);
+ if (actual_path.size() == allowed_path.size()) {
+ GURL_DCHECK_EQ(actual_path, allowed_path);
return true;
}
- if ((path_piece().size() == allowed_path.size() + 1) &&
- path_piece().back() == '/') {
- GURL_DCHECK_EQ(path_piece(), allowed_path.as_string() + '/');
+ if ((actual_path.size() == allowed_path.size() + 1) &&
+ actual_path.back() == '/') {
+ GURL_DCHECK_EQ(actual_path, allowed_path.as_string() + '/');
return true;
}
diff --git a/url/gurl.h b/url/gurl.h
index aa33094..37e1c8d 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -434,6 +434,10 @@
// See base/trace_event/memory_usage_estimator.h for more info.
size_t EstimateMemoryUsage() const;
+ // Helper used by GURL::IsAboutUrl and KURL::IsAboutURL.
+ static bool IsAboutPath(gurl_base::StringPiece actual_path,
+ gurl_base::StringPiece allowed_path);
+
private:
// Variant of the string parsing constructor that allows the caller to elect
// retain trailing whitespace, if any, on the passed URL spec, but only if
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index f0f72cd..6295d98 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -9,6 +9,7 @@
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
+#include "url/gurl_abstract_tests.h"
#include "url/origin.h"
#include "url/url_canon.h"
#include "url/url_test_utils.h"
@@ -883,44 +884,6 @@
}
}
-TEST(GURLTest, IsAboutBlank) {
- const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo",
- "about:blank/#foo",
- "about:blank?foo#foo"};
- for (const auto& url : kAboutBlankUrls)
- EXPECT_TRUE(GURL(url).IsAboutBlank()) << url;
-
- const std::string kNotAboutBlankUrls[] = {
- "http:blank", "about:blan", "about://blank",
- "about:blank/foo", "about://:8000/blank", "about://foo:foo@/blank",
- "foo@about:blank", "foo:bar@about:blank", "about:blank:8000",
- "about:blANk"};
- for (const auto& url : kNotAboutBlankUrls)
- EXPECT_FALSE(GURL(url).IsAboutBlank()) << url;
-}
-
-TEST(GURLTest, IsAboutSrcdoc) {
- const std::string kAboutSrcdocUrls[] = {
- "about:srcdoc", "about:srcdoc/", "about:srcdoc?foo", "about:srcdoc/#foo",
- "about:srcdoc?foo#foo"};
- for (const auto& url : kAboutSrcdocUrls)
- EXPECT_TRUE(GURL(url).IsAboutSrcdoc()) << url;
-
- const std::string kNotAboutSrcdocUrls[] = {"http:srcdoc",
- "about:srcdo",
- "about://srcdoc",
- "about://srcdoc\\",
- "about:srcdoc/foo",
- "about://:8000/srcdoc",
- "about://foo:foo@/srcdoc",
- "foo@about:srcdoc",
- "foo:bar@about:srcdoc",
- "about:srcdoc:8000",
- "about:srCDOc"};
- for (const auto& url : kNotAboutSrcdocUrls)
- EXPECT_FALSE(GURL(url).IsAboutSrcdoc()) << url;
-}
-
TEST(GURLTest, EqualsIgnoringRef) {
const struct {
const char* url_a;
@@ -1029,4 +992,18 @@
EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin));
}
+class GURLTestTraits {
+ public:
+ using UrlType = GURL;
+
+ static UrlType CreateUrlFromString(gurl_base::StringPiece s) { return GURL(s); }
+ static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
+ static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
+
+ // Only static members.
+ GURLTestTraits() = delete;
+};
+
+INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits);
+
} // namespace url
diff --git a/url/origin.cc b/url/origin.cc
index d04e557..ca37428 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -11,9 +11,9 @@
#include "base/base64.h"
#include "polyfills/base/check_op.h"
+#include "base/containers/contains.h"
#include "base/containers/span.h"
#include "base/pickle.h"
-#include "base/stl_util.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index 7793b9a..2f342c5 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -10,32 +10,11 @@
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/origin.h"
+#include "url/origin_abstract_tests.h"
#include "url/url_util.h"
namespace url {
-void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
- EXPECT_EQ(a, b);
- const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
- const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
- EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
- EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
- EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
- EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
- EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
- EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
- EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
- EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
- EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
- EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
- EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
- EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
- EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
- EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
- EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
- EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
-}
-
class OriginTest : public ::testing::Test {
public:
void SetUp() override {
@@ -197,68 +176,6 @@
EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:srcdoc"), opaque_b));
EXPECT_EQ(opaque_b,
url::Origin::Resolve(GURL("about:blank?hello#whee"), opaque_b));
-
- const char* const urls[] = {
- "data:text/html,Hello!",
- "javascript:alert(1)",
- "about:blank",
- "file://example.com:443/etc/passwd",
- "unknown-scheme:foo",
- "unknown-scheme://bar",
- "http",
- "http:",
- "http:/",
- "http://",
- "http://:",
- "http://:1",
- "yay",
- "http::///invalid.example.com/",
- "blob:null/foo", // blob:null (actually a valid URL)
- "blob:data:foo", // blob + data (which is nonstandard)
- "blob:about://blank/", // blob + about (which is nonstandard)
- "blob:about:blank/", // blob + about (which is nonstandard)
- "filesystem:http://example.com/", // Invalid (missing /type/)
- "filesystem:local-but-nonstandard:baz./type/", // fs requires standard
- "filesystem:local-but-nonstandard://hostname/type/",
- "filesystem:unknown-scheme://hostname/type/",
- "local-but-nonstandar:foo", // Prefix of registered scheme.
- "but-nonstandard:foo", // Suffix of registered scheme.
- "local-and-standard:", // Standard scheme needs a hostname.
- "standard-but-noaccess:", // Standard scheme needs a hostname.
- "blob:blob:http://www.example.com/guid-goes-here", // Double blob.
- };
-
- for (auto* test_url : urls) {
- SCOPED_TRACE(test_url);
- GURL url(test_url);
- const url::Origin opaque_origin;
-
- // Opaque origins returned by Origin::Create().
- {
- Origin origin = Origin::Create(url);
- EXPECT_EQ("", origin.scheme());
- EXPECT_EQ("", origin.host());
- EXPECT_EQ(0, origin.port());
- EXPECT_TRUE(origin.opaque());
- // An origin is always same-origin with itself.
- EXPECT_EQ(origin, origin);
- EXPECT_NE(origin, url::Origin());
- EXPECT_EQ(SchemeHostPort(), origin.GetTupleOrPrecursorTupleIfOpaque());
- // A copy of |origin| should be same-origin as well.
- Origin origin_copy = origin;
- EXPECT_EQ("", origin_copy.scheme());
- EXPECT_EQ("", origin_copy.host());
- EXPECT_EQ(0, origin_copy.port());
- EXPECT_TRUE(origin_copy.opaque());
- EXPECT_EQ(origin, origin_copy);
- // And it should always be cross-origin to another opaque Origin.
- EXPECT_NE(origin, opaque_origin);
- // Re-creating from the URL should also be cross-origin.
- EXPECT_NE(origin, Origin::Create(url));
-
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
- }
- }
}
TEST_F(OriginTest, ConstructFromTuple) {
@@ -286,132 +203,6 @@
}
}
-TEST_F(OriginTest, ConstructFromGURL) {
- Origin different_origin =
- Origin::Create(GURL("https://not-in-the-list.test/"));
-
- struct TestCases {
- const char* const url;
- const char* const expected_scheme;
- const char* const expected_host;
- const uint16_t expected_port;
- } cases[] = {
- // IP Addresses
- {"http://192.168.9.1/", "http", "192.168.9.1", 80},
- {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
- {"http://1/", "http", "0.0.0.1", 80},
- {"http://1:1/", "http", "0.0.0.1", 1},
- {"http://3232237825/", "http", "192.168.9.1", 80},
-
- // Punycode
- {"http://☃.net/", "http", "xn--n3h.net", 80},
- {"blob:http://☃.net/", "http", "xn--n3h.net", 80},
-
- // Generic URLs
- {"http://example.com/", "http", "example.com", 80},
- {"http://example.com:123/", "http", "example.com", 123},
- {"https://example.com/", "https", "example.com", 443},
- {"https://example.com:123/", "https", "example.com", 123},
- {"http://user:pass@example.com/", "http", "example.com", 80},
- {"http://example.com:123/?query", "http", "example.com", 123},
- {"https://example.com/#1234", "https", "example.com", 443},
- {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123},
-
- // Registered URLs
- {"ftp://example.com/", "ftp", "example.com", 21},
- {"ws://example.com/", "ws", "example.com", 80},
- {"wss://example.com/", "wss", "example.com", 443},
- {"wss://user:pass@example.com/", "wss", "example.com", 443},
-
- // Scheme (registered in SetUp()) that's both local and standard.
- // TODO: Is it really appropriate to do network-host canonicalization of
- // schemes without ports?
- {"local-and-standard:20", "local-and-standard", "0.0.0.20", 0},
- {"local-and-standard:20.", "local-and-standard", "0.0.0.20", 0},
- {"local-and-standard:↑↑↓↓←→←→ba.↑↑↓↓←→←→ba.0.bg", "local-and-standard",
- "xn--ba-rzuadaibfa.xn--ba-rzuadaibfa.0.bg", 0},
- {"local-and-standard:foo", "local-and-standard", "foo", 0},
- {"local-and-standard://bar:20", "local-and-standard", "bar", 0},
- {"local-and-standard:baz.", "local-and-standard", "baz.", 0},
- {"local-and-standard:baz..", "local-and-standard", "baz..", 0},
- {"local-and-standard:baz..bar", "local-and-standard", "baz..bar", 0},
- {"local-and-standard:baz...", "local-and-standard", "baz...", 0},
-
- // Scheme (registered in SetUp()) that's local but nonstandard. These
- // always have empty hostnames, but are allowed to be url::Origins.
- {"local-but-nonstandard:", "local-but-nonstandard", "", 0},
- {"local-but-nonstandard:foo", "local-but-nonstandard", "", 0},
- {"local-but-nonstandard://bar", "local-but-nonstandard", "", 0},
- {"also-local-but-nonstandard://bar", "also-local-but-nonstandard", "", 0},
-
- // Scheme (registered in SetUp()) that's standard but marked as noaccess.
- // url::Origin doesn't currently take the noaccess property into account,
- // so these aren't expected to result in opaque origins.
- {"standard-but-noaccess:foo", "standard-but-noaccess", "foo", 0},
- {"standard-but-noaccess://bar", "standard-but-noaccess", "bar", 0},
-
- // file: URLs
- {"file:///etc/passwd", "file", "", 0},
- {"file://example.com/etc/passwd", "file", "example.com", 0},
-
- // Filesystem:
- {"filesystem:http://example.com/type/", "http", "example.com", 80},
- {"filesystem:http://example.com:123/type/", "http", "example.com", 123},
- {"filesystem:https://example.com/type/", "https", "example.com", 443},
- {"filesystem:https://example.com:123/type/", "https", "example.com", 123},
- {"filesystem:local-and-standard:baz./type/", "local-and-standard", "baz.",
- 0},
-
- // Blob:
- {"blob:http://example.com/guid-goes-here", "http", "example.com", 80},
- {"blob:http://example.com:123/guid-goes-here", "http", "example.com",
- 123},
- {"blob:https://example.com/guid-goes-here", "https", "example.com", 443},
- {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80},
- };
-
- for (const auto& test_case : cases) {
- SCOPED_TRACE(test_case.url);
- GURL url(test_case.url);
- EXPECT_TRUE(url.is_valid());
- Origin origin = Origin::Create(url);
- EXPECT_EQ(test_case.expected_scheme, origin.scheme());
- EXPECT_EQ(test_case.expected_host, origin.host());
- EXPECT_EQ(test_case.expected_port, origin.port());
- EXPECT_FALSE(origin.opaque());
- EXPECT_EQ(origin, origin);
- EXPECT_NE(different_origin, origin);
- EXPECT_NE(origin, different_origin);
- EXPECT_EQ(origin, Origin::Resolve(GURL("about:blank"), origin));
- EXPECT_EQ(origin, Origin::Resolve(GURL("about:blank?bar#foo"), origin));
-
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
-
- url::Origin derived_opaque =
- Origin::Resolve(GURL("about:blank?bar#foo"), origin)
- .DeriveNewOpaqueOrigin();
- EXPECT_TRUE(derived_opaque.opaque());
- EXPECT_NE(origin, derived_opaque);
- EXPECT_TRUE(derived_opaque.GetTupleOrPrecursorTupleIfOpaque().IsValid());
- EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(),
- derived_opaque.GetTupleOrPrecursorTupleIfOpaque());
- EXPECT_EQ(derived_opaque, derived_opaque);
-
- url::Origin derived_opaque_via_data_url =
- Origin::Resolve(GURL("data:text/html,baz"), origin);
- EXPECT_TRUE(derived_opaque_via_data_url.opaque());
- EXPECT_NE(origin, derived_opaque_via_data_url);
- EXPECT_TRUE(derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque()
- .IsValid());
- EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(),
- derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque());
- EXPECT_NE(derived_opaque, derived_opaque_via_data_url);
- EXPECT_NE(derived_opaque_via_data_url, derived_opaque);
- EXPECT_NE(derived_opaque.DeriveNewOpaqueOrigin(), derived_opaque);
- EXPECT_EQ(derived_opaque_via_data_url, derived_opaque_via_data_url);
- }
-}
-
TEST_F(OriginTest, Serialization) {
struct TestCases {
const char* const url;
@@ -666,20 +457,6 @@
EXPECT_STREQ("https://foo.com", origin1_debug_alias);
}
-TEST_F(OriginTest, NonStandardScheme) {
- Origin origin = Origin::Create(GURL("cow://"));
- EXPECT_TRUE(origin.opaque());
-}
-
-TEST_F(OriginTest, NonStandardSchemeWithAndroidWebViewHack) {
- EnableNonStandardSchemesForAndroidWebView();
- Origin origin = Origin::Create(GURL("cow://"));
- EXPECT_FALSE(origin.opaque());
- EXPECT_EQ("cow", origin.scheme());
- EXPECT_EQ("", origin.host());
- EXPECT_EQ(0, origin.port());
-}
-
TEST_F(OriginTest, CanBeDerivedFrom) {
AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST);
Origin opaque_unique_origin = Origin();
@@ -818,10 +595,10 @@
{"standard-but-noaccess://a.com/foo", ®ular_origin, false},
{"standard-but-noaccess://a.com/foo", &opaque_precursor_origin, false},
{"standard-but-noaccess://a.com/foo", &opaque_unique_origin, true},
- {"standard-but-noaccess://a.com/foo", &no_access_origin, false},
+ {"standard-but-noaccess://a.com/foo", &no_access_origin, true},
{"standard-but-noaccess://a.com/foo", &no_access_opaque_precursor_origin,
- false},
- {"standard-but-noaccess://b.com/foo", &no_access_origin, false},
+ true},
+ {"standard-but-noaccess://b.com/foo", &no_access_origin, true},
{"standard-but-noaccess://b.com/foo", &no_access_opaque_precursor_origin,
true},
@@ -967,4 +744,8 @@
EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
}
+INSTANTIATE_TYPED_TEST_SUITE_P(UrlOrigin,
+ AbstractOriginTest,
+ UrlOriginTestTraits);
+
} // namespace url
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
index 265761b..21b473d 100644
--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -10,9 +10,9 @@
#include <tuple>
#include "polyfills/base/check_op.h"
+#include "base/containers/contains.h"
#include "polyfills/base/notreached.h"
#include "base/numerics/safe_conversions.h"
-#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
@@ -49,6 +49,10 @@
return host == canon_host;
}
+// Note: When changing IsValidInput, consider also updating
+// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
+// behavior between these 2 layers, but we should avoid introducing new
+// differences).
bool IsValidInput(const gurl_base::StringPiece& scheme,
const gurl_base::StringPiece& host,
uint16_t port,
@@ -57,15 +61,21 @@
if (scheme.empty())
return false;
+ // about:blank and other no-access schemes translate into an opaque origin.
+ // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
+ if (gurl_base::Contains(GetNoAccessSchemes(), scheme))
+ return false;
+
SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
bool is_standard = GetStandardSchemeType(
scheme.data(),
Component(0, gurl_base::checked_cast<int>(scheme.length())),
&scheme_type);
if (!is_standard) {
- // To be consistent with blink, local non-standard schemes are currently
- // allowed to be tuple origins. Nonstandard schemes don't have hostnames,
- // so their tuple is just ("protocol", "", 0).
+ // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
+ // non-standard schemes are currently allowed to be tuple origins.
+ // Nonstandard schemes don't have hostnames, so their tuple is just
+ // ("protocol", "", 0).
//
// TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
// remove this local scheme exception.
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc
index a4cbdb3..a1f415d 100644
--- a/url/scheme_host_port_unittest.cc
+++ b/url/scheme_host_port_unittest.cc
@@ -55,8 +55,15 @@
EXPECT_EQ(invalid, invalid);
const char* urls[] = {
- "data:text/html,Hello!", "javascript:alert(1)",
- "file://example.com:443/etc/passwd",
+ // about:, data:, javascript: and other no-access schemes translate into
+ // an invalid SchemeHostPort
+ "about:blank", "about:blank#ref", "about:blank?query=123", "about:srcdoc",
+ "about:srcdoc#ref", "about:srcdoc?query=123", "data:text/html,Hello!",
+ "javascript:alert(1)",
+
+ // GURLs where GURL::is_valid returns false translate into an invalid
+ // SchemeHostPort.
+ "file://example.com:443/etc/passwd", "#!^%!$!&*",
// These schemes do not follow the generic URL syntax, so make sure we
// treat them as invalid (scheme, host, port) tuples (even though such
diff --git a/url/url_canon_icu.cc b/url/url_canon_icu.cc
index 614e338..93c9247 100644
--- a/url/url_canon_icu.cc
+++ b/url/url_canon_icu.cc
@@ -9,6 +9,7 @@
#include <string.h>
#include "polyfills/base/check.h"
+#include "base/i18n/uchar.h"
#include <unicode/ucnv.h>
#include <unicode/ucnv_cb.h>
#include <unicode/utypes.h>
@@ -94,8 +95,9 @@
do {
UErrorCode err = U_ZERO_ERROR;
char* dest = &output->data()[begin_offset];
- int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
- input, input_len, &err);
+ int required_capacity =
+ ucnv_fromUChars(converter_, dest, dest_capacity,
+ gurl_base::i18n::ToUCharPtr(input), input_len, &err);
if (err != U_BUFFER_OVERFLOW_ERROR) {
output->set_length(begin_offset + required_capacity);
return;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 22002b5..da32bd8 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -5,6 +5,7 @@
#include <limits.h>
#include "polyfills/base/check.h"
+#include "polyfills/base/check_op.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_parse_internal.h"
@@ -261,6 +262,7 @@
bool success = true;
for (int i = path.begin; i < end; i++) {
+ GURL_DCHECK_LT(last_invalid_percent_index, output->length());
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > 1 && uch >= 0x80) {
// We only need to test wide input for having non-ASCII characters. For
@@ -303,6 +305,9 @@
break;
case DIRECTORY_UP:
BackUpToPreviousSlash(path_begin_in_output, output);
+ if (last_invalid_percent_index >= output->length()) {
+ last_invalid_percent_index = INT_MIN;
+ }
i += dotlen + consumed_len - 1;
break;
}
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 55ef089..e2469ca 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -1423,6 +1423,9 @@
{"ws:)W\x1eW\xef\xb9\xaa"
"81:80/",
"ws://%29w%1ew%81/", false},
+ // Regression test for the last_invalid_percent_index bug described in
+ // https://crbug.com/1080890#c10.
+ {R"(HTTP:S/5%\../>%41)", "http://s/%3EA", true},
};
for (size_t i = 0; i < gurl_base::size(cases); i++) {
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index 18e1895..d9256a2 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc
@@ -11,6 +11,7 @@
#include <ostream>
#include "polyfills/base/check_op.h"
+#include "base/i18n/uchar.h"
#include "base/no_destructor.h"
#include <unicode/uidna.h>
#include <unicode/utypes.h>
@@ -90,8 +91,10 @@
while (true) {
UErrorCode err = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
- int output_length = uidna_nameToASCII(uidna, (UChar*)src, src_len, (UChar*)output->data(),
- output->capacity(), &info, &err);
+ int output_length =
+ uidna_nameToASCII(uidna, gurl_base::i18n::ToUCharPtr(src), src_len,
+ gurl_base::i18n::ToUCharPtr(output->data()),
+ output->capacity(), &info, &err);
if (U_SUCCESS(err) && info.errors == 0) {
output->set_length(output_length);
return true;