Update googleurl from upstream The revision used is 9418d10b236dffce823870895b0e0b672c37fd86, from Mon Feb 1 19:47:01 2021 +0000.
diff --git a/AUTHORS b/AUTHORS index a619990..ce38168 100644 --- a/AUTHORS +++ b/AUTHORS
@@ -54,6 +54,7 @@ Alexander Zhirov <ciberst@gmail.com> Alexandre Abreu <wiss1976@gmail.com> Alexandru Chiculita <achicu@adobe.com> +Alexey Knyazev <lexa.knyazev@gmail.com> Alexey Korepanov <alexkorep@gmail.com> Alexey Kuts <kruntuid@gmail.com> Alexey Kuzmin <alex.s.kuzmin@gmail.com> @@ -89,6 +90,7 @@ Andrew Hung <andrhung@amazon.com> Andrew Jorgensen <ajorgens@amazon.com> Andrew MacPherson <andrew.macpherson@soundtrap.com> +Andrew Nicols <andrewrn@gmail.com> Andrew Tulloch <andrew@tullo.ch> Andriy Rysin <arysin@gmail.com> Anish Patankar <anish.p@samsung.com> @@ -327,6 +329,7 @@ Frédéric Jacob <frederic.jacob.78@gmail.com> Frédéric Wang <fred.wang@free.fr> Fu Junwei <junwei.fu@intel.com> +Gabriel Campana <gabriel.campana@ledger.fr> Gabor Rapcsanyi <g.rapcsanyi@samsung.com> Gaetano Mendola <mendola@gmail.com> Gajendra N <gajendra.n@samsung.com> @@ -441,6 +444,7 @@ James Vega <vega.james@gmail.com> James Wei <james.wei@intel.com> James Willcox <jwillcox@litl.com> +Jan Grulich <grulja@gmail.com> Jan Rucka <ruckajan10@gmail.com> Jan Sauer <jan@jansauer.de> Janusz Majnert <jmajnert@gmail.com> @@ -449,6 +453,7 @@ Jared Sohn <jared.sohn@gmail.com> Jared Wein <weinjared@gmail.com> Jari Karppanen <jkarp@amazon.com> +Jason Gronn <jasontopia03@gmail.com> Jay Oster <jay@kodewerx.org> Jay Soffian <jaysoffian@gmail.com> Jeado Ko <haibane84@gmail.com> @@ -461,6 +466,7 @@ Jeremy Spiegel <jeremysspiegel@gmail.com> Jeroen Van den Berghe <vandenberghe.jeroen@gmail.com> Jerry Lin <wahahab11@gmail.com> +Jerry Zhang <zhj8407@gmail.com> Jesper Storm Bache <jsbache@gmail.com> Jesse Miller <jesse@jmiller.biz> Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com> @@ -492,6 +498,7 @@ Jinsong Fan <fanjinsong@sogou-inc.com> Jinsong Fan <jinsong.van@gmail.com> Jinwoo Song <jinwoo7.song@samsung.com> +Jinyoung Hur <hur.ims@navercorp.com> Jinyoung Hur <hurims@gmail.com> Jitendra Kumar Sahoo <jitendra.ks@samsung.com> Joachim Bauch <jbauch@webrtc.org> @@ -500,6 +507,7 @@ Joe Knoll <joe.knoll@workday.com> Joe Thomas <mhx348@motorola.com> Joel Stanley <joel@jms.id.au> +Joey Jiao <joeyjiao0810@gmail.com> Johannes Rudolph <johannes.rudolph@googlemail.com> John Kleinschmidt <kleinschmidtorama@gmail.com> John Yani <vanuan@gmail.com> @@ -556,6 +564,7 @@ Karan Thakkar <karanjthakkar@gmail.com> Kartikey Bhatt <kartikey@amazon.com> Kaspar Brand <googlecontrib@velox.ch> +Kaushalendra Mishra <k.mishra@samsung.com> Kaustubh Atrawalkar <kaustubh.a@samsung.com> Kaustubh Atrawalkar <kaustubh.ra@gmail.com> Ke He <ke.he@intel.com> @@ -611,6 +620,7 @@ Le Hoang Quyen <le.hoang.q@gmail.com> Legend Lee <guanxian.li@intel.com> Leith Bade <leith@leithalweapon.geek.nz> +Lei Gao <leigao@huawei.com> Lei Li <lli.kernel.kvm@gmail.com> Lenny Khazan <lenny.khazan@gmail.com> Leo Wolf <jclw@ymail.com> @@ -660,6 +670,7 @@ Mariusz Mlynski <marius.mlynski@gmail.com> Mark Hahnenberg <mhahnenb@andrew.cmu.edu> Mark Seaborn <mrs@mythic-beasts.com> +Mark Winter <wintermarkedward@gmail.com> Martijn Croonen <martijn@martijnc.be> Martin Bednorz <m.s.bednorz@gmail.com> Martin Persson <mnpn03@gmail.com> @@ -696,6 +707,7 @@ Michael Constant <mconst@gmail.com> Michael Forney <mforney@mforney.org> Michael Gilbert <floppymaster@gmail.com> +Michael Kolomeytsev <michael.kolomeytsev@gmail.com> Michael Lopez <lopes92290@gmail.com> Michael Morrison <codebythepound@gmail.com> Michael Müller <michael@fds-team.de> @@ -868,6 +880,7 @@ Robert O'Callahan <rocallahan@gmail.com> Robert Nagy <robert.nagy@gmail.com> Robert Sesek <rsesek@bluestatic.org> +Roee Kasher <roee91@gmail.com> Roger Zanoni <rogerzanoni@gmail.com> Roland Takacs <rtakacs.u-szeged@partner.samsung.com> Romain Pokrzywka <romain.pokrzywka@gmail.com> @@ -965,10 +978,12 @@ Siddharth Shankar <funkysidd@gmail.com> Simeon Kuran <simeon.kuran@gmail.com> Simon Arlott <simon.arlott@gmail.com> +Simon Jackson <simon.jackson@sonocent.com> Simon La Macchia <smacchia@amazon.com> Siva Kumar Gunturi <siva.gunturi@samsung.com> Sohan Jyoti Ghosh <sohan.jyoti@huawei.com> Sohan Jyoti Ghosh <sohan.jyoti@samsung.com> +Song Fangzhen <songfangzhen@bytedance.com> Song YeWen <ffmpeg@gmail.com> Sooho Park <sooho1000@gmail.com> Soojung Choi <crystal2840@gmail.com> @@ -1077,6 +1092,7 @@ Wojciech Bielawski <wojciech.bielawski@gmail.com> Wanming Lin <wanming.lin@intel.com> Wei Li <wei.c.li@intel.com> +Wen Fan <fanwen1@huawei.com> Wenxiang Qian <leonwxqian@gmail.com> WenSheng He <wensheng.he@samsung.com> Wesley Lancel <wesleylancel@gmail.com>
diff --git a/base/BUILD b/base/BUILD index 2ab77e8..63787b7 100644 --- a/base/BUILD +++ b/base/BUILD
@@ -9,6 +9,7 @@ hdrs = [ "compiler_specific.h", "containers/checked_iterators.h", + "containers/contains.h", "containers/contiguous_iterator.h", "containers/span.h", "containers/util.h", @@ -16,6 +17,7 @@ "functional/identity.h", "functional/invoke.h", "functional/not_fn.h", + "i18n/uchar.h", "macros.h", "no_destructor.h", "optional.h",
diff --git a/base/compiler_specific.h b/base/compiler_specific.h index fe3d499..fa961b0 100644 --- a/base/compiler_specific.h +++ b/base/compiler_specific.h
@@ -65,7 +65,7 @@ // To provide the complementary behavior (prevent the annotated function from // being omitted) look at NOINLINE. Also note that this doesn't prevent code // folding of multiple identical caller functions into a single signature. To -// prevent code folding, see gurl_base::debug::Alias. +// prevent code folding, see NO_CODE_FOLDING() in base/debug/alias.h. // Use like: // void NOT_TAIL_CALLED FooBar(); #if defined(__clang__) && __has_attribute(not_tail_called)
diff --git a/base/containers/contiguous_iterator.h b/base/containers/contiguous_iterator.h index a1c1f9b..48b2755 100644 --- a/base/containers/contiguous_iterator.h +++ b/base/containers/contiguous_iterator.h
@@ -26,13 +26,19 @@ template <typename T> struct IsPointer : std::is_pointer<T> {}; +template <typename T, typename StringT = std::basic_string<iter_value_t<T>>> +struct IsStringIterImpl + : disjunction<std::is_same<T, typename StringT::const_iterator>, + std::is_same<T, typename StringT::iterator>> {}; + // An iterator to std::basic_string is contiguous. // Reference: https://wg21.link/basic.string.general#2 -template <typename T, typename StringT = std::basic_string<iter_value_t<T>>> +// +// Note: Requires indirection via `IsStringIterImpl` to avoid triggering a +// `static_assert(is_trivial_v<value_type>)` inside libc++'s std::basic_string. +template <typename T> struct IsStringIter - : conjunction<std::is_trivial<iter_value_t<T>>, - disjunction<std::is_same<T, typename StringT::const_iterator>, - std::is_same<T, typename StringT::iterator>>> {}; + : conjunction<std::is_trivial<iter_value_t<T>>, IsStringIterImpl<T>> {}; // An iterator to std::array is contiguous. // Reference: https://wg21.link/array.overview#1
diff --git a/base/containers/util.h b/base/containers/util.h index 14f012a..7a65b6a 100644 --- a/base/containers/util.h +++ b/base/containers/util.h
@@ -12,7 +12,7 @@ // TODO(crbug.com/817982): What we really need is for checked_math.h to be // able to do checked arithmetic on pointers. template <typename T> -static inline uintptr_t get_uintptr(const T* t) { +inline uintptr_t get_uintptr(const T* t) { return reinterpret_cast<uintptr_t>(t); }
diff --git a/base/macros.h b/base/macros.h index c67bdbd..19d15ca 100644 --- a/base/macros.h +++ b/base/macros.h
@@ -14,21 +14,20 @@ // Use explicit deletions instead. See the section on copyability/movability in // //styleguide/c++/c++-dos-and-donts.md for more information. -// Put this in the declarations for a class to be uncopyable. +// DEPRECATED: See above. Makes a class uncopyable. #define DISALLOW_COPY(TypeName) \ TypeName(const TypeName&) = delete -// Put this in the declarations for a class to be unassignable. +// DEPRECATED: See above. Makes a class unassignable. #define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete -// Put this in the declarations for a class to be uncopyable and unassignable. +// DEPRECATED: See above. Makes a class uncopyable and unassignable. #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ DISALLOW_COPY(TypeName); \ DISALLOW_ASSIGN(TypeName) -// A macro to disallow all the implicit constructors, namely the +// DEPRECATED: See above. Disallow all implicit constructors, namely the // default constructor, copy constructor and operator= functions. -// This is especially useful for classes containing only static methods. #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ TypeName() = delete; \ DISALLOW_COPY_AND_ASSIGN(TypeName)
diff --git a/base/ranges/algorithm.h b/base/ranges/algorithm.h index 646b733..e6432f8 100644 --- a/base/ranges/algorithm.h +++ b/base/ranges/algorithm.h
@@ -419,11 +419,12 @@ InputIterator last, const T& value, Proj proj = {}) { - // Note: In order to be able to apply `proj` to each element in [first, last) - // we are dispatching to std::find_if instead of std::find. - return std::find_if(first, last, [&proj, &value](auto&& lhs) { - return gurl_base::invoke(proj, std::forward<decltype(lhs)>(lhs)) == value; - }); + for (; first != last; ++first) { + if (gurl_base::invoke(proj, *first) == value) + break; + } + + return first; } // Let `E(i)` be `bool(invoke(proj, *i) == value)`. @@ -4299,7 +4300,7 @@ // // Returns: `last`. // -// Complexity: At most `3 log(last - first)` comparisons and twice as many +// Complexity: At most `3 * (last - first)` comparisons and twice as many // projections. // // Reference: https://wg21.link/make.heap#:~:text=ranges::make_heap(I @@ -4323,7 +4324,7 @@ // // Returns: `end(range)`. // -// Complexity: At most `3 log(size(range))` comparisons and twice as many +// Complexity: At most `3 * size(range)` comparisons and twice as many // projections. // // Reference: https://wg21.link/make.heap#:~:text=ranges::make_heap(R
diff --git a/base/stl_util.h b/base/stl_util.h index f3d86ef..29f200e 100644 --- a/base/stl_util.h +++ b/base/stl_util.h
@@ -24,6 +24,7 @@ #include <vector> #include "polyfills/base/check.h" +#include "base/containers/contains.h" #include "base/optional.h" #include "base/ranges/algorithm.h" #include "base/template_util.h" @@ -51,38 +52,6 @@ std::is_same<typename std::iterator_traits<Iter>::iterator_category, std::random_access_iterator_tag>::value; -// Utility type traits used for specializing gurl_base::Contains() below. -template <typename Container, typename Element, typename = void> -struct HasFindWithNpos : std::false_type {}; - -template <typename Container, typename Element> -struct HasFindWithNpos< - Container, - Element, - void_t<decltype(std::declval<const Container&>().find( - std::declval<const Element&>()) != Container::npos)>> - : std::true_type {}; - -template <typename Container, typename Element, typename = void> -struct HasFindWithEnd : std::false_type {}; - -template <typename Container, typename Element> -struct HasFindWithEnd<Container, - Element, - void_t<decltype(std::declval<const Container&>().find( - std::declval<const Element&>()) != - std::declval<const Container&>().end())>> - : std::true_type {}; - -template <typename Container, typename Element, typename = void> -struct HasContains : std::false_type {}; - -template <typename Container, typename Element> -struct HasContains<Container, - Element, - void_t<decltype(std::declval<const Container&>().contains( - std::declval<const Element&>()))>> : std::true_type {}; - } // namespace internal // C++14 implementation of C++17's std::size(): @@ -219,51 +188,6 @@ return std::count(container.begin(), container.end(), val); } -// General purpose implementation to check if |container| contains |value|. -template <typename Container, - typename Value, - std::enable_if_t< - !internal::HasFindWithNpos<Container, Value>::value && - !internal::HasFindWithEnd<Container, Value>::value && - !internal::HasContains<Container, Value>::value>* = nullptr> -bool Contains(const Container& container, const Value& value) { - using std::begin; - using std::end; - return std::find(begin(container), end(container), value) != end(container); -} - -// Specialized Contains() implementation for when |container| has a find() -// member function and a static npos member, but no contains() member function. -template <typename Container, - typename Value, - std::enable_if_t<internal::HasFindWithNpos<Container, Value>::value && - !internal::HasContains<Container, Value>::value>* = - nullptr> -bool Contains(const Container& container, const Value& value) { - return container.find(value) != Container::npos; -} - -// Specialized Contains() implementation for when |container| has a find() -// and end() member function, but no contains() member function. -template <typename Container, - typename Value, - std::enable_if_t<internal::HasFindWithEnd<Container, Value>::value && - !internal::HasContains<Container, Value>::value>* = - nullptr> -bool Contains(const Container& container, const Value& value) { - return container.find(value) != container.end(); -} - -// Specialized Contains() implementation for when |container| has a contains() -// member function. -template < - typename Container, - typename Value, - std::enable_if_t<internal::HasContains<Container, Value>::value>* = nullptr> -bool Contains(const Container& container, const Value& value) { - return container.contains(value); -} - // O(1) implementation of const casting an iterator for any sequence, // associative or unordered associative container in the STL. //
diff --git a/base/strings/BUILD b/base/strings/BUILD index c76b35a..65f3293 100644 --- a/base/strings/BUILD +++ b/base/strings/BUILD
@@ -12,7 +12,7 @@ "string_util_constants.cc", "utf_string_conversion_utils.cc", "utf_string_conversions.cc", - ] + build_config.strings_srcs, + ], hdrs = [ "char_traits.h", "string16.h",
diff --git a/base/strings/char_traits.h b/base/strings/char_traits.h index 0fe9f26..13f5833 100644 --- a/base/strings/char_traits.h +++ b/base/strings/char_traits.h
@@ -7,6 +7,8 @@ #include <stddef.h> +#include <string> + #include "base/compiler_specific.h" namespace gurl_base { @@ -33,10 +35,14 @@ constexpr int CharTraits<T>::compare(const T* s1, const T* s2, size_t n) noexcept { + // Comparison with operator < fails, because of signed/unsigned + // mismatch, https://crbug.com/941696 + // std::char_traits<T>::lt is guaranteed to be constexpr in C++14: + // https://timsong-cpp.github.io/cppwp/n4140/char.traits.specializations#char for (; n; --n, ++s1, ++s2) { - if (*s1 < *s2) + if (std::char_traits<T>::lt(*s1, *s2)) return -1; - if (*s1 > *s2) + if (std::char_traits<T>::lt(*s2, *s1)) return 1; } return 0; @@ -50,42 +56,35 @@ return i; } -// char specialization of CharTraits that can use clang's constexpr instrinsics, -// where available. +// char and wchar_t specialization of CharTraits that can use clang's constexpr +// instrinsics, where available. +#if HAS_FEATURE(cxx_constexpr_string_builtins) template <> struct CharTraits<char> { static constexpr int compare(const char* s1, const char* s2, - size_t n) noexcept; - static constexpr size_t length(const char* s) noexcept; + size_t n) noexcept { + return __builtin_memcmp(s1, s2, n); + } + + static constexpr size_t length(const char* s) noexcept { + return __builtin_strlen(s); + } }; -constexpr int CharTraits<char>::compare(const char* s1, - const char* s2, - size_t n) noexcept { -#if HAS_FEATURE(cxx_constexpr_string_builtins) - return __builtin_memcmp(s1, s2, n); -#else - for (; n; --n, ++s1, ++s2) { - if (*s1 < *s2) - return -1; - if (*s1 > *s2) - return 1; +template <> +struct CharTraits<wchar_t> { + static constexpr int compare(const wchar_t* s1, + const wchar_t* s2, + size_t n) noexcept { + return __builtin_wmemcmp(s1, s2, n); } - return 0; -#endif -} -constexpr size_t CharTraits<char>::length(const char* s) noexcept { -#if defined(__clang__) - return __builtin_strlen(s); -#else - size_t i = 0; - for (; *s; ++s) - ++i; - return i; + static constexpr size_t length(const wchar_t* s) noexcept { + return __builtin_wcslen(s); + } +}; #endif -} } // namespace base
diff --git a/base/strings/string16.cc b/base/strings/string16.cc deleted file mode 100644 index 426d5b6..0000000 --- a/base/strings/string16.cc +++ /dev/null
@@ -1,89 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/strings/string16.h" - -#if defined(WCHAR_T_IS_UTF16) && !defined(_AIX) - -#error This file should not be used on 2-byte wchar_t systems -// If this winds up being needed on 2-byte wchar_t systems, either the -// definitions below can be used, or the host system's wide character -// functions like wmemcmp can be wrapped. - -#elif defined(WCHAR_T_IS_UTF32) - -#include <string.h> - -#include <ostream> - -#include "base/strings/string_piece.h" - -namespace gurl_base { - -int c16memcmp(const char16* s1, const char16* s2, size_t n) { - // We cannot call memcmp because that changes the semantics. - while (n-- > 0) { - if (*s1 != *s2) { - // We cannot use (*s1 - *s2) because char16 is unsigned. - return ((*s1 < *s2) ? -1 : 1); - } - ++s1; - ++s2; - } - return 0; -} - -size_t c16len(const char16* s) { - const char16 *s_orig = s; - while (*s) { - ++s; - } - return s - s_orig; -} - -const char16* c16memchr(const char16* s, char16 c, size_t n) { - while (n-- > 0) { - if (*s == c) { - return s; - } - ++s; - } - return nullptr; -} - -char16* c16memmove(char16* s1, const char16* s2, size_t n) { - return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16))); -} - -char16* c16memcpy(char16* s1, const char16* s2, size_t n) { - return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16))); -} - -char16* c16memset(char16* s, char16 c, size_t n) { - char16 *s_orig = s; - while (n-- > 0) { - *s = c; - ++s; - } - return s_orig; -} - -namespace string16_internals { - -std::ostream& operator<<(std::ostream& out, const string16& str) { - return out << gurl_base::StringPiece16(str); -} - -void PrintTo(const string16& str, std::ostream* out) { - *out << str; -} - -} // namespace string16_internals - -} // namespace base - -template class std:: - basic_string<gurl_base::char16, gurl_base::string16_internals::string16_char_traits>; - -#endif // WCHAR_T_IS_UTF32
diff --git a/base/strings/string16.h b/base/strings/string16.h index f17a57f..dc3ddc7 100644 --- a/base/strings/string16.h +++ b/base/strings/string16.h
@@ -6,34 +6,18 @@ #define BASE_STRINGS_STRING16_H_ // WHAT: -// A version of std::basic_string that provides 2-byte characters even when -// wchar_t is not implemented as a 2-byte type. You can access this class as -// string16. We also define char16, which string16 is based upon. +// Type aliases for string and character types supporting UTF-16 data. Prior to +// C++11 there was no standard library solution for this, which is why wstring +// was used where possible (i.e. where wchar_t holds UTF-16 encoded data). // -// WHY: -// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 -// data. Plenty of existing code operates on strings encoded as UTF-16. -// -// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make -// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails -// at run time, because it calls some functions (like wcslen) that come from -// the system's native C library -- which was built with a 4-byte wchar_t! -// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's -// entirely improper on those systems where the encoding of wchar_t is defined -// as UTF-32. -// -// Here, we define string16, which is similar to std::wstring but replaces all -// libc functions with custom, 2-byte-char compatible routines. It is capable -// of carrying UTF-16-encoded data. +// In C++11 we gained std::u16string, which is a cross-platform solution for +// UTF-16 strings. This is now the string16 type where ever wchar_t does not +// hold UTF16 data (i.e. commonly non-Windows platforms). Eventually this should +// be used everywhere, at which point this type alias and this file should be +// removed. https://crbug.com/911896 tracks the migration effort. -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> - -#include <functional> #include <string> -#include "polyfills/base/base_export.h" #include "build/build_config.h" #if defined(WCHAR_T_IS_UTF16) @@ -42,188 +26,24 @@ // a literal string. This indirection allows for an easier migration of // gurl_base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one // character change to the macro will be necessary. -// This macro does not exist when WCHAR_T_IS_UTF32, as it is currently not -// possible to create a char array form a literal in this case. // TODO(https://crbug.com/911896): Remove this macro once gurl_base::char16 is // char16_t on all platforms. #define STRING16_LITERAL(x) L##x namespace gurl_base { - -typedef wchar_t char16; -typedef std::wstring string16; - +using char16 = wchar_t; +using string16 = std::wstring; } // namespace base -#elif defined(WCHAR_T_IS_UTF32) +#else -#include <wchar.h> // for mbstate_t +#define STRING16_LITERAL(x) u##x namespace gurl_base { - -typedef uint16_t char16; - -// char16 versions of the functions required by string16_char_traits; these -// are based on the wide character functions of similar names ("w" or "wcs" -// instead of "c16"). -BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n); -BASE_EXPORT size_t c16len(const char16* s); -BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n); -BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n); -BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); -BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); - -// This namespace contains the implementation of gurl_base::string16 along with -// things that need to be found via argument-dependent lookup from a -// gurl_base::string16. -namespace string16_internals { - -struct string16_char_traits { - typedef char16 char_type; - typedef int int_type; - - // int_type needs to be able to hold each possible value of char_type, and in - // addition, the distinct value of eof(). - static_assert(sizeof(int_type) > sizeof(char_type), - "int must be larger than 16 bits wide"); - - typedef std::streamoff off_type; - typedef mbstate_t state_type; - typedef std::fpos<state_type> pos_type; - - static void assign(char_type& c1, const char_type& c2) { - c1 = c2; - } - - static bool eq(const char_type& c1, const char_type& c2) { - return c1 == c2; - } - static bool lt(const char_type& c1, const char_type& c2) { - return c1 < c2; - } - - static int compare(const char_type* s1, const char_type* s2, size_t n) { - return c16memcmp(s1, s2, n); - } - - static size_t length(const char_type* s) { - return c16len(s); - } - - static const char_type* find(const char_type* s, size_t n, - const char_type& a) { - return c16memchr(s, a, n); - } - - static char_type* move(char_type* s1, const char_type* s2, size_t n) { - return c16memmove(s1, s2, n); - } - - static char_type* copy(char_type* s1, const char_type* s2, size_t n) { - return c16memcpy(s1, s2, n); - } - - static char_type* assign(char_type* s, size_t n, char_type a) { - return c16memset(s, a, n); - } - - static int_type not_eof(const int_type& c) { - return eq_int_type(c, eof()) ? 0 : c; - } - - static char_type to_char_type(const int_type& c) { - return char_type(c); - } - - static int_type to_int_type(const char_type& c) { - return int_type(c); - } - - static bool eq_int_type(const int_type& c1, const int_type& c2) { - return c1 == c2; - } - - static int_type eof() { - return static_cast<int_type>(EOF); - } -}; - -} // namespace string16_internals - -typedef std::basic_string<char16, - gurl_base::string16_internals::string16_char_traits> - string16; - -namespace string16_internals { - -BASE_EXPORT extern std::ostream& operator<<(std::ostream& out, - const string16& str); - -// This is required by googletest to print a readable output on test failures. -BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out); - -} // namespace string16_internals - +using char16 = char16_t; +using string16 = std::u16string; } // namespace base -// The string class will be explicitly instantiated only once, in string16.cc. -// -// std::basic_string<> in GNU libstdc++ contains a static data member, -// _S_empty_rep_storage, to represent empty strings. When an operation such -// as assignment or destruction is performed on a string, causing its existing -// data member to be invalidated, it must not be freed if this static data -// member is being used. Otherwise, it counts as an attempt to free static -// (and not allocated) data, which is a memory error. -// -// Generally, due to C++ template magic, _S_empty_rep_storage will be marked -// as a coalesced symbol, meaning that the linker will combine multiple -// instances into a single one when generating output. -// -// If a string class is used by multiple shared libraries, a problem occurs. -// Each library will get its own copy of _S_empty_rep_storage. When strings -// are passed across a library boundary for alteration or destruction, memory -// errors will result. GNU libstdc++ contains a configuration option, -// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which -// disables the static data member optimization, but it's a good optimization -// and non-STL code is generally at the mercy of the system's STL -// configuration. Fully-dynamic strings are not the default for GNU libstdc++ -// libstdc++ itself or for the libstdc++ installations on the systems we care -// about, such as Mac OS X and relevant flavors of Linux. -// -// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . -// -// To avoid problems, string classes need to be explicitly instantiated only -// once, in exactly one library. All other string users see it via an "extern" -// declaration. This is precisely how GNU libstdc++ handles -// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring). -// -// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), -// in which the linker does not fully coalesce symbols when dead code -// stripping is enabled. This bug causes the memory errors described above -// to occur even when a std::basic_string<> does not cross shared library -// boundaries, such as in statically-linked executables. -// -// TODO(mark): File this bug with Apple and update this note with a bug number. - -extern template class BASE_EXPORT - std::basic_string<gurl_base::char16, - gurl_base::string16_internals::string16_char_traits>; - -// Specialize std::hash for gurl_base::string16. Although the style guide forbids -// this in general, it is necessary for consistency with WCHAR_T_IS_UTF16 -// platforms, where gurl_base::string16 is a type alias for std::wstring. -namespace std { -template <> -struct hash<gurl_base::string16> { - std::size_t operator()(const gurl_base::string16& s) const { - std::size_t result = 0; - for (gurl_base::char16 c : s) - result = (result * 131) + c; - return result; - } -}; -} // namespace std - -#endif // WCHAR_T_IS_UTF32 +#endif // WCHAR_T_IS_UTF16 #endif // BASE_STRINGS_STRING16_H_
diff --git a/base/strings/string16_unittest.cc b/base/strings/string16_unittest.cc index a9aecef..9cdb075 100644 --- a/base/strings/string16_unittest.cc +++ b/base/strings/string16_unittest.cc
@@ -2,74 +2,22 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include <sstream> -#include <unordered_set> - #include "base/strings/string16.h" -#include "base/strings/utf_string_conversions.h" -#include "build/build_config.h" #include "testing/gtest/include/gtest/gtest.h" namespace gurl_base { -#if defined(WCHAR_T_IS_UTF16) +// Ensure that STRING16_LITERAL can be used to instantiate constants of type +// char16 and char16[], respectively. TEST(String16Test, String16Literal) { - static constexpr char16 kHelloWorld[] = STRING16_LITERAL("Hello, World"); - string16 hello_world = kHelloWorld; - EXPECT_EQ(kHelloWorld, hello_world); -} -#endif + static constexpr char16 kHelloChars[] = { + STRING16_LITERAL('H'), STRING16_LITERAL('e'), STRING16_LITERAL('l'), + STRING16_LITERAL('l'), STRING16_LITERAL('o'), STRING16_LITERAL('\0'), + }; -// We define a custom operator<< for string16 so we can use it with logging. -// This tests that conversion. -TEST(String16Test, OutputStream) { - // Basic stream test. - { - std::ostringstream stream; - stream << "Empty '" << string16() << "' standard '" - << string16(ASCIIToUTF16("Hello, world")) << "'"; - EXPECT_STREQ("Empty '' standard 'Hello, world'", - stream.str().c_str()); - } - - // Interesting edge cases. - { - // These should each get converted to the invalid character: EF BF BD. - string16 initial_surrogate; - initial_surrogate.push_back(0xd800); - string16 final_surrogate; - final_surrogate.push_back(0xdc00); - - // Old italic A = U+10300, will get converted to: F0 90 8C 80 'z'. - string16 surrogate_pair; - surrogate_pair.push_back(0xd800); - surrogate_pair.push_back(0xdf00); - surrogate_pair.push_back('z'); - - // Will get converted to the invalid char + 's': EF BF BD 's'. - string16 unterminated_surrogate; - unterminated_surrogate.push_back(0xd800); - unterminated_surrogate.push_back('s'); - - std::ostringstream stream; - stream << initial_surrogate << "," << final_surrogate << "," - << surrogate_pair << "," << unterminated_surrogate; - - EXPECT_STREQ("\xef\xbf\xbd,\xef\xbf\xbd,\xf0\x90\x8c\x80z,\xef\xbf\xbds", - stream.str().c_str()); - } -} - -TEST(String16Test, Hash) { - string16 str1 = ASCIIToUTF16("hello"); - string16 str2 = ASCIIToUTF16("world"); - - std::unordered_set<string16> set; - - set.insert(str1); - EXPECT_EQ(1u, set.count(str1)); - EXPECT_EQ(0u, set.count(str2)); + static constexpr char16 kHelloStr[] = STRING16_LITERAL("Hello"); + EXPECT_EQ(std::char_traits<char16>::compare(kHelloChars, kHelloStr, 6), 0); } } // namespace base
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc index 62ba11f..fe9be5f 100644 --- a/base/strings/string_piece.cc +++ b/base/strings/string_piece.cc
@@ -12,6 +12,7 @@ #include <ostream> #include "base/strings/utf_string_conversions.h" +#include "build/build_config.h" namespace gurl_base { namespace { @@ -24,8 +25,7 @@ // the possible values of an unsigned char. Thus it should be be declared // as follows: // bool table[UCHAR_MAX + 1] -inline void BuildLookupTable(const StringPiece& characters_wanted, - bool* table) { +inline void BuildLookupTable(StringPiece characters_wanted, bool* table) { const size_t length = characters_wanted.length(); const char* const data = characters_wanted.data(); for (size_t i = 0; i < length; ++i) { @@ -41,39 +41,25 @@ template class BasicStringPiece<string16>; #endif -std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { +std::ostream& operator<<(std::ostream& o, StringPiece piece) { o.write(piece.data(), static_cast<std::streamsize>(piece.size())); return o; } -std::ostream& operator<<(std::ostream& o, const StringPiece16& piece) { +std::ostream& operator<<(std::ostream& o, StringPiece16 piece) { return o << UTF16ToUTF8(piece); } +#if !defined(WCHAR_T_IS_UTF16) +std::ostream& operator<<(std::ostream& o, WStringPiece piece) { + return o << WideToUTF8(piece); +} +#endif + namespace internal { -template<typename STR> -size_t copyT(const BasicStringPiece<STR>& self, - typename STR::value_type* buf, - size_t n, - size_t pos) { - size_t ret = std::min(self.size() - pos, n); - memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type)); - return ret; -} - -size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) { - return copyT(self, buf, n, pos); -} - -size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) { - return copyT(self, buf, n, pos); -} - -template<typename STR> -size_t findT(const BasicStringPiece<STR>& self, - const BasicStringPiece<STR>& s, - size_t pos) { +template <typename STR> +size_t findT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) { if (pos > self.size()) return BasicStringPiece<STR>::npos; @@ -84,16 +70,16 @@ return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos; } -size_t find(const StringPiece& self, const StringPiece& s, size_t pos) { +size_t find(StringPiece self, StringPiece s, size_t pos) { return findT(self, s, pos); } -size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) { +size_t find(StringPiece16 self, StringPiece16 s, size_t pos) { return findT(self, s, pos); } -template<typename STR> -size_t findT(const BasicStringPiece<STR>& self, +template <typename STR> +size_t findT(BasicStringPiece<STR> self, typename STR::value_type c, size_t pos) { if (pos >= self.size()) @@ -105,18 +91,16 @@ static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; } -size_t find(const StringPiece& self, char c, size_t pos) { +size_t find(StringPiece self, char c, size_t pos) { return findT(self, c, pos); } -size_t find(const StringPiece16& self, char16 c, size_t pos) { +size_t find(StringPiece16 self, char16 c, size_t pos) { return findT(self, c, pos); } -template<typename STR> -size_t rfindT(const BasicStringPiece<STR>& self, - const BasicStringPiece<STR>& s, - size_t pos) { +template <typename STR> +size_t rfindT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) { if (self.size() < s.size()) return BasicStringPiece<STR>::npos; @@ -131,16 +115,16 @@ static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; } -size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) { +size_t rfind(StringPiece self, StringPiece s, size_t pos) { return rfindT(self, s, pos); } -size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) { +size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos) { return rfindT(self, s, pos); } -template<typename STR> -size_t rfindT(const BasicStringPiece<STR>& self, +template <typename STR> +size_t rfindT(BasicStringPiece<STR> self, typename STR::value_type c, size_t pos) { if (self.size() == 0) @@ -156,18 +140,16 @@ return BasicStringPiece<STR>::npos; } -size_t rfind(const StringPiece& self, char c, size_t pos) { +size_t rfind(StringPiece self, char c, size_t pos) { return rfindT(self, c, pos); } -size_t rfind(const StringPiece16& self, char16 c, size_t pos) { +size_t rfind(StringPiece16 self, char16 c, size_t pos) { return rfindT(self, c, pos); } // 8-bit version using lookup table. -size_t find_first_of(const StringPiece& self, - const StringPiece& s, - size_t pos) { +size_t find_first_of(StringPiece self, StringPiece s, size_t pos) { if (self.size() == 0 || s.size() == 0) return StringPiece::npos; @@ -186,9 +168,7 @@ } // 16-bit brute force version. -size_t find_first_of(const StringPiece16& self, - const StringPiece16& s, - size_t pos) { +size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) { // Use the faster std::find() if searching for a single character. StringPiece16::const_iterator found = s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0]) @@ -200,9 +180,7 @@ } // 8-bit version using lookup table. -size_t find_first_not_of(const StringPiece& self, - const StringPiece& s, - size_t pos) { +size_t find_first_not_of(StringPiece self, StringPiece s, size_t pos) { if (self.size() == 0) return StringPiece::npos; @@ -224,8 +202,8 @@ } // 16-bit brute-force version. -BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, - const StringPiece16& s, +BASE_EXPORT size_t find_first_not_of(StringPiece16 self, + StringPiece16 s, size_t pos) { if (self.size() == 0) return StringPiece16::npos; @@ -244,8 +222,8 @@ return StringPiece16::npos; } -template<typename STR> -size_t find_first_not_ofT(const BasicStringPiece<STR>& self, +template <typename STR> +size_t find_first_not_ofT(BasicStringPiece<STR> self, typename STR::value_type c, size_t pos) { if (self.size() == 0) @@ -259,20 +237,16 @@ return BasicStringPiece<STR>::npos; } -size_t find_first_not_of(const StringPiece& self, - char c, - size_t pos) { +size_t find_first_not_of(StringPiece self, char c, size_t pos) { return find_first_not_ofT(self, c, pos); } -size_t find_first_not_of(const StringPiece16& self, - char16 c, - size_t pos) { +size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos) { return find_first_not_ofT(self, c, pos); } // 8-bit version using lookup table. -size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) { +size_t find_last_of(StringPiece self, StringPiece s, size_t pos) { if (self.size() == 0 || s.size() == 0) return StringPiece::npos; @@ -292,9 +266,7 @@ } // 16-bit brute-force version. -size_t find_last_of(const StringPiece16& self, - const StringPiece16& s, - size_t pos) { +size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) { if (self.size() == 0) return StringPiece16::npos; @@ -311,9 +283,7 @@ } // 8-bit version using lookup table. -size_t find_last_not_of(const StringPiece& self, - const StringPiece& s, - size_t pos) { +size_t find_last_not_of(StringPiece self, StringPiece s, size_t pos) { if (self.size() == 0) return StringPiece::npos; @@ -337,9 +307,7 @@ } // 16-bit brute-force version. -size_t find_last_not_of(const StringPiece16& self, - const StringPiece16& s, - size_t pos) { +size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) { if (self.size() == 0) return StringPiece::npos; @@ -359,8 +327,8 @@ return StringPiece16::npos; } -template<typename STR> -size_t find_last_not_ofT(const BasicStringPiece<STR>& self, +template <typename STR> +size_t find_last_not_ofT(BasicStringPiece<STR> self, typename STR::value_type c, size_t pos) { if (self.size() == 0) @@ -375,15 +343,11 @@ return BasicStringPiece<STR>::npos; } -size_t find_last_not_of(const StringPiece& self, - char c, - size_t pos) { +size_t find_last_not_of(StringPiece self, char c, size_t pos) { return find_last_not_ofT(self, c, pos); } -size_t find_last_not_of(const StringPiece16& self, - char16 c, - size_t pos) { +size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos) { return find_last_not_ofT(self, c, pos); }
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h index de54e94..dea85e3 100644 --- a/base/strings/string_piece.h +++ b/base/strings/string_piece.h
@@ -34,6 +34,7 @@ #include "base/strings/char_traits.h" #include "base/strings/string16.h" #include "base/strings/string_piece_forward.h" +#include "build/build_config.h" namespace gurl_base { @@ -48,86 +49,45 @@ // template internal to the .cc file. namespace internal { -BASE_EXPORT size_t copy(const StringPiece& self, - char* buf, - size_t n, - size_t pos); -BASE_EXPORT size_t copy(const StringPiece16& self, - char16* buf, - size_t n, - size_t pos); +BASE_EXPORT size_t find(StringPiece self, StringPiece s, size_t pos); +BASE_EXPORT size_t find(StringPiece16 self, StringPiece16 s, size_t pos); +BASE_EXPORT size_t find(StringPiece self, char c, size_t pos); +BASE_EXPORT size_t find(StringPiece16 self, char16 c, size_t pos); -BASE_EXPORT size_t find(const StringPiece& self, - const StringPiece& s, - size_t pos); -BASE_EXPORT size_t find(const StringPiece16& self, - const StringPiece16& s, - size_t pos); -BASE_EXPORT size_t find(const StringPiece& self, - char c, - size_t pos); -BASE_EXPORT size_t find(const StringPiece16& self, - char16 c, - size_t pos); +BASE_EXPORT size_t rfind(StringPiece self, StringPiece s, size_t pos); +BASE_EXPORT size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos); +BASE_EXPORT size_t rfind(StringPiece self, char c, size_t pos); +BASE_EXPORT size_t rfind(StringPiece16 self, char16 c, size_t pos); -BASE_EXPORT size_t rfind(const StringPiece& self, - const StringPiece& s, - size_t pos); -BASE_EXPORT size_t rfind(const StringPiece16& self, - const StringPiece16& s, - size_t pos); -BASE_EXPORT size_t rfind(const StringPiece& self, - char c, - size_t pos); -BASE_EXPORT size_t rfind(const StringPiece16& self, - char16 c, - size_t pos); - -BASE_EXPORT size_t find_first_of(const StringPiece& self, - const StringPiece& s, - size_t pos); -BASE_EXPORT size_t find_first_of(const StringPiece16& self, - const StringPiece16& s, +BASE_EXPORT size_t find_first_of(StringPiece self, StringPiece s, size_t pos); +BASE_EXPORT size_t find_first_of(StringPiece16 self, + StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_first_not_of(const StringPiece& self, - const StringPiece& s, +BASE_EXPORT size_t find_first_not_of(StringPiece self, + StringPiece s, size_t pos); -BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, - const StringPiece16& s, +BASE_EXPORT size_t find_first_not_of(StringPiece16 self, + StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_first_not_of(const StringPiece& self, - char c, - size_t pos); -BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, - char16 c, - size_t pos); +BASE_EXPORT size_t find_first_not_of(StringPiece self, char c, size_t pos); +BASE_EXPORT size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos); -BASE_EXPORT size_t find_last_of(const StringPiece& self, - const StringPiece& s, +BASE_EXPORT size_t find_last_of(StringPiece self, StringPiece s, size_t pos); +BASE_EXPORT size_t find_last_of(StringPiece16 self, + StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_last_of(const StringPiece16& self, - const StringPiece16& s, - size_t pos); -BASE_EXPORT size_t find_last_of(const StringPiece& self, - char c, - size_t pos); -BASE_EXPORT size_t find_last_of(const StringPiece16& self, - char16 c, - size_t pos); +BASE_EXPORT size_t find_last_of(StringPiece self, char c, size_t pos); +BASE_EXPORT size_t find_last_of(StringPiece16 self, char16 c, size_t pos); -BASE_EXPORT size_t find_last_not_of(const StringPiece& self, - const StringPiece& s, +BASE_EXPORT size_t find_last_not_of(StringPiece self, + StringPiece s, size_t pos); -BASE_EXPORT size_t find_last_not_of(const StringPiece16& self, - const StringPiece16& s, +BASE_EXPORT size_t find_last_not_of(StringPiece16 self, + StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_last_not_of(const StringPiece16& self, - char16 c, - size_t pos); -BASE_EXPORT size_t find_last_not_of(const StringPiece& self, - char c, - size_t pos); +BASE_EXPORT size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos); +BASE_EXPORT size_t find_last_not_of(StringPiece self, char c, size_t pos); } // namespace internal @@ -181,15 +141,6 @@ : ptr_(str.data()), length_(str.size()) {} constexpr BasicStringPiece(const value_type* offset, size_type len) : ptr_(offset), length_(len) {} - BasicStringPiece(const typename STRING_TYPE::const_iterator& begin, - const typename STRING_TYPE::const_iterator& end) { - GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; - length_ = static_cast<size_t>(std::distance(begin, end)); - - // The length test before assignment is to avoid dereferencing an iterator - // that may point to the end() of a string. - ptr_ = length_ > 0 ? &*begin : nullptr; - } // data() may return a pointer to a buffer with embedded NULs, and the // returned buffer may or may not be null terminated. Therefore it is @@ -226,16 +177,6 @@ length_ -= n; } - constexpr int compare(BasicStringPiece x) const noexcept { - int r = CharTraits<value_type>::compare( - ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_)); - if (r == 0) { - if (length_ < x.length_) r = -1; - else if (length_ > x.length_) r = +1; - } - return r; - } - // This is the style of conversion preferred by std::string_view in C++17. explicit operator STRING_TYPE() const { return empty() ? STRING_TYPE() : STRING_TYPE(data(), size()); @@ -261,72 +202,187 @@ size_type max_size() const { return length_; } size_type capacity() const { return length_; } - size_type copy(value_type* buf, size_type n, size_type pos = 0) const { - return internal::copy(*this, buf, n, pos); + // String operations, see https://wg21.link/string.view.ops. + constexpr size_type copy(value_type* s, + size_type n, + size_type pos = 0) const { + GURL_CHECK_LE(pos, size()); + size_type rlen = std::min(n, size() - pos); + traits_type::copy(s, data() + pos, rlen); + return rlen; } + constexpr BasicStringPiece substr(size_type pos = 0, + size_type n = npos) const { + GURL_CHECK_LE(pos, size()); + return {data() + pos, std::min(n, size() - pos)}; + } + + constexpr int compare(BasicStringPiece str) const noexcept { + size_type rlen = std::min(size(), str.size()); + int result = CharTraits<value_type>::compare(data(), str.data(), rlen); + if (result == 0) + result = size() == str.size() ? 0 : (size() < str.size() ? -1 : 1); + return result; + } + + constexpr int compare(size_type pos, + size_type n, + BasicStringPiece str) const { + return substr(pos, n).compare(str); + } + + constexpr int compare(size_type pos1, + size_type n1, + BasicStringPiece str, + size_type pos2, + size_type n2) const { + return substr(pos1, n1).compare(str.substr(pos2, n2)); + } + + constexpr int compare(const value_type* s) const { + return compare(BasicStringPiece(s)); + } + + constexpr int compare(size_type pos, size_type n, const value_type* s) const { + return substr(pos, n).compare(BasicStringPiece(s)); + } + + constexpr int compare(size_type pos, + size_type n1, + const value_type* s, + size_type n2) const { + return substr(pos, n1).compare(BasicStringPiece(s, n2)); + } + + // Searching, see https://wg21.link/string.view.find. + // find: Search for a character or substring at a given offset. - size_type find(const BasicStringPiece<STRING_TYPE>& s, - size_type pos = 0) const { + constexpr size_type find(BasicStringPiece s, + size_type pos = 0) const noexcept { return internal::find(*this, s, pos); } - size_type find(value_type c, size_type pos = 0) const { + + constexpr size_type find(value_type c, size_type pos = 0) const noexcept { return internal::find(*this, c, pos); } + constexpr size_type find(const value_type* s, + size_type pos, + size_type n) const { + return find(BasicStringPiece(s, n), pos); + } + + constexpr size_type find(const value_type* s, size_type pos = 0) const { + return find(BasicStringPiece(s), pos); + } + // rfind: Reverse find. - size_type rfind(const BasicStringPiece& s, - size_type pos = BasicStringPiece::npos) const { + constexpr size_type rfind(BasicStringPiece s, + size_type pos = npos) const noexcept { return internal::rfind(*this, s, pos); } - size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const { + + constexpr size_type rfind(value_type c, size_type pos = npos) const noexcept { return internal::rfind(*this, c, pos); } - // find_first_of: Find the first occurence of one of a set of characters. - size_type find_first_of(const BasicStringPiece& s, - size_type pos = 0) const { + constexpr size_type rfind(const value_type* s, + size_type pos, + size_type n) const { + return rfind(BasicStringPiece(s, n), pos); + } + + constexpr size_type rfind(const value_type* s, size_type pos = npos) const { + return rfind(BasicStringPiece(s), pos); + } + + // find_first_of: Find the first occurrence of one of a set of characters. + constexpr size_type find_first_of(BasicStringPiece s, + size_type pos = 0) const noexcept { return internal::find_first_of(*this, s, pos); } - size_type find_first_of(value_type c, size_type pos = 0) const { + + constexpr size_type find_first_of(value_type c, + size_type pos = 0) const noexcept { return find(c, pos); } - // find_first_not_of: Find the first occurence not of a set of characters. - size_type find_first_not_of(const BasicStringPiece& s, - size_type pos = 0) const { - return internal::find_first_not_of(*this, s, pos); - } - size_type find_first_not_of(value_type c, size_type pos = 0) const { - return internal::find_first_not_of(*this, c, pos); + constexpr size_type find_first_of(const value_type* s, + size_type pos, + size_type n) const { + return find_first_of(BasicStringPiece(s, n), pos); } - // find_last_of: Find the last occurence of one of a set of characters. - size_type find_last_of(const BasicStringPiece& s, - size_type pos = BasicStringPiece::npos) const { + constexpr size_type find_first_of(const value_type* s, + size_type pos = 0) const { + return find_first_of(BasicStringPiece(s), pos); + } + + // find_last_of: Find the last occurrence of one of a set of characters. + constexpr size_type find_last_of(BasicStringPiece s, + size_type pos = npos) const noexcept { return internal::find_last_of(*this, s, pos); } - size_type find_last_of(value_type c, - size_type pos = BasicStringPiece::npos) const { + + constexpr size_type find_last_of(value_type c, + size_type pos = npos) const noexcept { return rfind(c, pos); } - // find_last_not_of: Find the last occurence not of a set of characters. - size_type find_last_not_of(const BasicStringPiece& s, - size_type pos = BasicStringPiece::npos) const { + constexpr size_type find_last_of(const value_type* s, + size_type pos, + size_type n) const { + return find_last_of(BasicStringPiece(s, n), pos); + } + + constexpr size_type find_last_of(const value_type* s, + size_type pos = npos) const { + return find_last_of(BasicStringPiece(s), pos); + } + + // find_first_not_of: Find the first occurrence not of a set of characters. + constexpr size_type find_first_not_of(BasicStringPiece s, + size_type pos = 0) const noexcept { + return internal::find_first_not_of(*this, s, pos); + } + + constexpr size_type find_first_not_of(value_type c, + size_type pos = 0) const noexcept { + return internal::find_first_not_of(*this, c, pos); + } + + constexpr size_type find_first_not_of(const value_type* s, + size_type pos, + size_type n) const { + return find_first_not_of(BasicStringPiece(s, n), pos); + } + + constexpr size_type find_first_not_of(const value_type* s, + size_type pos = 0) const { + return find_first_not_of(BasicStringPiece(s), pos); + } + + // find_last_not_of: Find the last occurrence not of a set of characters. + constexpr size_type find_last_not_of(BasicStringPiece s, + size_type pos = npos) const noexcept { return internal::find_last_not_of(*this, s, pos); } - size_type find_last_not_of(value_type c, - size_type pos = BasicStringPiece::npos) const { + + constexpr size_type find_last_not_of(value_type c, + size_type pos = npos) const noexcept { return internal::find_last_not_of(*this, c, pos); } - // substr. - constexpr BasicStringPiece substr( - size_type pos, - size_type n = BasicStringPiece::npos) const { - GURL_CHECK_LE(pos, size()); - return {data() + pos, std::min(n, size() - pos)}; + constexpr size_type find_last_not_of(const value_type* s, + size_type pos, + size_type n) const { + return find_last_not_of(BasicStringPiece(s, n), pos); + } + + constexpr size_type find_last_not_of(const value_type* s, + size_type pos = npos) const { + return find_last_not_of(BasicStringPiece(s), pos); } protected: @@ -472,11 +528,12 @@ return !(lhs < rhs); } -BASE_EXPORT std::ostream& operator<<(std::ostream& o, - const StringPiece& piece); +BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece piece); +BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece16 piece); -BASE_EXPORT std::ostream& operator<<(std::ostream& o, - const StringPiece16& piece); +#if !defined(WCHAR_T_IS_UTF16) +BASE_EXPORT std::ostream& operator<<(std::ostream& o, WStringPiece piece); +#endif // Hashing ---------------------------------------------------------------------
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc index e0d812b..76de8f7 100644 --- a/base/strings/string_piece_unittest.cc +++ b/base/strings/string_piece_unittest.cc
@@ -269,12 +269,13 @@ ASSERT_EQ(e.find(d, 4), std::string().find(std::string(), 4)); ASSERT_EQ(e.find(e, 4), std::string().find(std::string(), 4)); + constexpr typename TypeParam::value_type kNul = '\0'; ASSERT_EQ(a.find('a'), 0U); ASSERT_EQ(a.find('c'), 2U); ASSERT_EQ(a.find('z'), 25U); ASSERT_EQ(a.find('$'), Piece::npos); - ASSERT_EQ(a.find('\0'), Piece::npos); - ASSERT_EQ(f.find('\0'), 3U); + ASSERT_EQ(a.find(kNul), Piece::npos); + ASSERT_EQ(f.find(kNul), 3U); ASSERT_EQ(f.find('3'), 2U); ASSERT_EQ(f.find('5'), 5U); ASSERT_EQ(g.find('o'), 4U); @@ -282,15 +283,44 @@ ASSERT_EQ(g.find('o', 5), 8U); ASSERT_EQ(a.find('b', 5), Piece::npos); // empty string nonsense - ASSERT_EQ(d.find('\0'), Piece::npos); - ASSERT_EQ(e.find('\0'), Piece::npos); - ASSERT_EQ(d.find('\0', 4), Piece::npos); - ASSERT_EQ(e.find('\0', 7), Piece::npos); + ASSERT_EQ(d.find(kNul), Piece::npos); + ASSERT_EQ(e.find(kNul), Piece::npos); + ASSERT_EQ(d.find(kNul, 4), Piece::npos); + ASSERT_EQ(e.find(kNul, 7), Piece::npos); ASSERT_EQ(d.find('x'), Piece::npos); ASSERT_EQ(e.find('x'), Piece::npos); ASSERT_EQ(d.find('x', 4), Piece::npos); ASSERT_EQ(e.find('x', 7), Piece::npos); + ASSERT_EQ(a.find(b.data(), 1, 0), 1U); + ASSERT_EQ(a.find(c.data(), 9, 0), 9U); + ASSERT_EQ(a.find(c.data(), Piece::npos, 0), Piece::npos); + ASSERT_EQ(b.find(c.data(), Piece::npos, 0), Piece::npos); + ASSERT_EQ(a.find(d.data(), 12, 0), 12U); + ASSERT_EQ(a.find(e.data(), 17, 0), 17U); + // empty string nonsense + ASSERT_EQ(d.find(b.data(), 4, 0), Piece::npos); + ASSERT_EQ(e.find(b.data(), 7, 0), Piece::npos); + + ASSERT_EQ(a.find(b.data(), 1), Piece::npos); + ASSERT_EQ(a.find(c.data(), 9), 23U); + ASSERT_EQ(a.find(c.data(), Piece::npos), Piece::npos); + ASSERT_EQ(b.find(c.data(), Piece::npos), Piece::npos); + ASSERT_EQ(a.find(d.data(), 12), 12U); + ASSERT_EQ(a.find(e.data(), 17), 17U); + // empty string nonsense + ASSERT_EQ(d.find(b.data(), 4), Piece::npos); + ASSERT_EQ(e.find(b.data(), 7), Piece::npos); + + ASSERT_EQ(d.find(d.data(), 4, 0), + std::string().find(std::string().data(), 4, 0)); + ASSERT_EQ(d.find(e.data(), 4, 1), + std::string().find(std::string().data(), 4, 1)); + ASSERT_EQ(e.find(d.data(), 4, 2), + std::string().find(std::string().data(), 4, 2)); + ASSERT_EQ(e.find(e.data(), 4, 3), + std::string().find(std::string().data(), 4, 3)); + ASSERT_EQ(a.rfind(b), 0U); ASSERT_EQ(a.rfind(b, 1), 0U); ASSERT_EQ(a.rfind(c), 23U); @@ -325,8 +355,8 @@ ASSERT_EQ(g.rfind('o', 8), 8U); ASSERT_EQ(g.rfind('o', 7), 4U); ASSERT_EQ(g.rfind('o', 3), Piece::npos); - ASSERT_EQ(f.rfind('\0'), 3U); - ASSERT_EQ(f.rfind('\0', 12), 3U); + ASSERT_EQ(f.rfind(kNul), 3U); + ASSERT_EQ(f.rfind(kNul, 12), 3U); ASSERT_EQ(f.rfind('3'), 2U); ASSERT_EQ(f.rfind('5'), 5U); // empty string nonsense @@ -335,6 +365,22 @@ ASSERT_EQ(d.rfind('o', 4), Piece::npos); ASSERT_EQ(e.rfind('o', 7), Piece::npos); + ASSERT_EQ(a.rfind(b.data(), 1, 0), 1U); + ASSERT_EQ(a.rfind(c.data(), 22U, 0), 22U); + ASSERT_EQ(a.rfind(c.data(), 1U, 0), 1U); + ASSERT_EQ(a.rfind(c.data(), 0U, 0), 0U); + ASSERT_EQ(b.rfind(c.data(), 0U, 0), 0U); + ASSERT_EQ(a.rfind(d.data(), 12, 0), 12U); + ASSERT_EQ(a.rfind(e.data(), 17, 0), 17U); + ASSERT_EQ(d.rfind(b.data(), 4, 0), 0U); + ASSERT_EQ(e.rfind(b.data(), 7, 0), 0U); + + // empty string nonsense + ASSERT_EQ(d.rfind(d.data(), 4), std::string().rfind(std::string())); + ASSERT_EQ(e.rfind(d.data(), 7), std::string().rfind(std::string())); + ASSERT_EQ(d.rfind(e.data(), 4), std::string().rfind(std::string())); + ASSERT_EQ(e.rfind(e.data(), 7), std::string().rfind(std::string())); + TypeParam one_two_three_four(TestFixture::as_string("one,two:three;four")); TypeParam comma_colon(TestFixture::as_string(",:")); ASSERT_EQ(3U, Piece(one_two_three_four).find_first_of(comma_colon)); @@ -382,16 +428,16 @@ Piece h(equals); ASSERT_EQ(h.find_first_not_of('='), Piece::npos); ASSERT_EQ(h.find_first_not_of('=', 3), Piece::npos); - ASSERT_EQ(h.find_first_not_of('\0'), 0U); + ASSERT_EQ(h.find_first_not_of(kNul), 0U); ASSERT_EQ(g.find_first_not_of('x'), 2U); - ASSERT_EQ(f.find_first_not_of('\0'), 0U); - ASSERT_EQ(f.find_first_not_of('\0', 3), 4U); - ASSERT_EQ(f.find_first_not_of('\0', 2), 2U); + ASSERT_EQ(f.find_first_not_of(kNul), 0U); + ASSERT_EQ(f.find_first_not_of(kNul, 3), 4U); + ASSERT_EQ(f.find_first_not_of(kNul, 2), 2U); // empty string nonsense ASSERT_EQ(d.find_first_not_of('x'), Piece::npos); ASSERT_EQ(e.find_first_not_of('x'), Piece::npos); - ASSERT_EQ(d.find_first_not_of('\0'), Piece::npos); - ASSERT_EQ(e.find_first_not_of('\0'), Piece::npos); + ASSERT_EQ(d.find_first_not_of(kNul), Piece::npos); + ASSERT_EQ(e.find_first_not_of(kNul), Piece::npos); // Piece g("xx not found bb"); TypeParam fifty_six(TestFixture::as_string("56")); @@ -465,13 +511,14 @@ // empty string nonsense ASSERT_EQ(d.find_last_not_of('x'), Piece::npos); ASSERT_EQ(e.find_last_not_of('x'), Piece::npos); - ASSERT_EQ(d.find_last_not_of('\0'), Piece::npos); - ASSERT_EQ(e.find_last_not_of('\0'), Piece::npos); + ASSERT_EQ(d.find_last_not_of(kNul), Piece::npos); + ASSERT_EQ(e.find_last_not_of(kNul), Piece::npos); ASSERT_EQ(a.substr(0, 3), b); ASSERT_EQ(a.substr(23), c); ASSERT_EQ(a.substr(23, 3), c); ASSERT_EQ(a.substr(23, 99), c); + ASSERT_EQ(a.substr(), a); ASSERT_EQ(a.substr(0), a); ASSERT_EQ(a.substr(3, 2), TestFixture::as_string("de")); ASSERT_EQ(d.substr(0, 99), e); @@ -626,10 +673,7 @@ nullptr, static_cast<typename BasicStringPiece<TypeParam>::size_type>(0))); ASSERT_EQ(empty, BasicStringPiece<TypeParam>()); - ASSERT_EQ(str, BasicStringPiece<TypeParam>(str.begin(), str.end())); - ASSERT_EQ(empty, BasicStringPiece<TypeParam>(str.begin(), str.begin())); ASSERT_EQ(empty, BasicStringPiece<TypeParam>(empty)); - ASSERT_EQ(empty, BasicStringPiece<TypeParam>(empty.begin(), empty.end())); } TEST(StringPieceTest, ConstexprCtor) { @@ -677,6 +721,11 @@ { StringPiece piece; + ASSERT_DEATH_IF_SUPPORTED(piece.copy(nullptr, 0, 1), ""); + } + + { + StringPiece piece; ASSERT_DEATH_IF_SUPPORTED(piece.substr(1), ""); } } @@ -738,6 +787,57 @@ static_assert(piece.compare("gh") == -1, ""); static_assert(piece.compare("ghi") == -1, ""); static_assert(piece.compare("ghij") == -1, ""); + + static_assert(piece.compare(0, 0, "") == 0, ""); + static_assert(piece.compare(0, 1, "d") == 0, ""); + static_assert(piece.compare(0, 2, "de") == 0, ""); + static_assert(piece.compare(0, 3, "def") == 0, ""); + static_assert(piece.compare(1, 0, "") == 0, ""); + static_assert(piece.compare(1, 1, "e") == 0, ""); + static_assert(piece.compare(1, 2, "ef") == 0, ""); + static_assert(piece.compare(1, 3, "ef") == 0, ""); + static_assert(piece.compare(2, 0, "") == 0, ""); + static_assert(piece.compare(2, 1, "f") == 0, ""); + static_assert(piece.compare(2, 2, "f") == 0, ""); + static_assert(piece.compare(2, 3, "f") == 0, ""); + static_assert(piece.compare(3, 0, "") == 0, ""); + static_assert(piece.compare(3, 1, "") == 0, ""); + static_assert(piece.compare(3, 2, "") == 0, ""); + static_assert(piece.compare(3, 3, "") == 0, ""); + + static_assert(piece.compare(0, 0, "def", 0) == 0, ""); + static_assert(piece.compare(0, 1, "def", 1) == 0, ""); + static_assert(piece.compare(0, 2, "def", 2) == 0, ""); + static_assert(piece.compare(0, 3, "def", 3) == 0, ""); + static_assert(piece.compare(1, 0, "ef", 0) == 0, ""); + static_assert(piece.compare(1, 1, "ef", 1) == 0, ""); + static_assert(piece.compare(1, 2, "ef", 2) == 0, ""); + static_assert(piece.compare(1, 3, "ef", 2) == 0, ""); + static_assert(piece.compare(2, 0, "f", 0) == 0, ""); + static_assert(piece.compare(2, 1, "f", 1) == 0, ""); + static_assert(piece.compare(2, 2, "f", 1) == 0, ""); + static_assert(piece.compare(2, 3, "f", 1) == 0, ""); + static_assert(piece.compare(3, 0, "", 0) == 0, ""); + static_assert(piece.compare(3, 1, "", 0) == 0, ""); + static_assert(piece.compare(3, 2, "", 0) == 0, ""); + static_assert(piece.compare(3, 3, "", 0) == 0, ""); + + static_assert(piece.compare(0, 0, "def", 0, 0) == 0, ""); + static_assert(piece.compare(0, 1, "def", 0, 1) == 0, ""); + static_assert(piece.compare(0, 2, "def", 0, 2) == 0, ""); + static_assert(piece.compare(0, 3, "def", 0, 3) == 0, ""); + static_assert(piece.compare(1, 0, "def", 1, 0) == 0, ""); + static_assert(piece.compare(1, 1, "def", 1, 1) == 0, ""); + static_assert(piece.compare(1, 2, "def", 1, 2) == 0, ""); + static_assert(piece.compare(1, 3, "def", 1, 3) == 0, ""); + static_assert(piece.compare(2, 0, "def", 2, 0) == 0, ""); + static_assert(piece.compare(2, 1, "def", 2, 1) == 0, ""); + static_assert(piece.compare(2, 2, "def", 2, 2) == 0, ""); + static_assert(piece.compare(2, 3, "def", 2, 3) == 0, ""); + static_assert(piece.compare(3, 0, "def", 3, 0) == 0, ""); + static_assert(piece.compare(3, 1, "def", 3, 1) == 0, ""); + static_assert(piece.compare(3, 2, "def", 3, 2) == 0, ""); + static_assert(piece.compare(3, 3, "def", 3, 3) == 0, ""); } TEST(StringPieceTest, Substr) { @@ -751,6 +851,7 @@ static_assert(piece.substr(23) == "xyz", ""); static_assert(piece.substr(23, 3) == "xyz", ""); static_assert(piece.substr(23, 99) == "xyz", ""); + static_assert(piece.substr() == piece, ""); static_assert(piece.substr(0) == piece, ""); static_assert(piece.substr(0, 99) == piece, ""); }
diff --git a/base/strings/string_util.h b/base/strings/string_util.h index f43a8ac..a1e5c59 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h
@@ -84,6 +84,31 @@ // This function is intended to be called from gurl_base::vswprintf. BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); +// Simplified implementation of C++20's std::basic_string_view(It, End). +// Reference: https://wg21.link/string.view.cons +template <typename StringT, typename Iter> +constexpr BasicStringPiece<StringT> MakeBasicStringPiece(Iter begin, Iter end) { + GURL_DCHECK_GE(end - begin, 0); + return {gurl_base::to_address(begin), end - begin}; +} + +// Explicit instantiations of MakeBasicStringPiece for the BasicStringPiece +// aliases defined in base/strings/string_piece_forward.h +template <typename Iter> +constexpr StringPiece MakeStringPiece(Iter begin, Iter end) { + return MakeBasicStringPiece<std::string>(begin, end); +} + +template <typename Iter> +constexpr StringPiece16 MakeStringPiece16(Iter begin, Iter end) { + return MakeBasicStringPiece<string16>(begin, end); +} + +template <typename Iter> +constexpr WStringPiece MakeWStringPiece(Iter begin, Iter end) { + return MakeBasicStringPiece<std::wstring>(begin, end); +} + // ASCII-specific tolower. The standard library's tolower is locale sensitive, // so we don't want to use it here. template <typename CharT, @@ -291,9 +316,9 @@ // Compare the lower-case form of the given string against the given // previously-lower-cased ASCII string (typically a constant). BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str, - StringPiece lowecase_ascii); + StringPiece lowercase_ascii); BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str, - StringPiece lowecase_ascii); + StringPiece lowercase_ascii); // Performs a case-sensitive string compare of the given 16-bit string against // the given 8-bit ASCII string (typically a constant). The behavior is
diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h index 006aeb0..ccc1367 100644 --- a/base/strings/string_util_internal.h +++ b/base/strings/string_util_internal.h
@@ -228,7 +228,7 @@ } template <bool (*Validator)(uint32_t)> -inline static bool DoIsStringUTF8(StringPiece str) { +inline bool DoIsStringUTF8(StringPiece str) { const char* src = str.data(); int32_t src_len = static_cast<int32_t>(str.length()); int32_t char_index = 0; @@ -258,8 +258,8 @@ // string piece gives additional flexibility for the caller (doesn't have to be // null terminated) so we choose the StringPiece route. template <typename Str> -static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, - StringPiece lowercase_ascii) { +inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, + StringPiece lowercase_ascii) { return std::equal( str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(), [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; });
diff --git a/base/strings/string_util_posix.h b/base/strings/string_util_posix.h index 7d5a67b..91cf7a6 100644 --- a/base/strings/string_util_posix.h +++ b/base/strings/string_util_posix.h
@@ -33,17 +33,6 @@ return ::vswprintf(buffer, size, format, arguments); } -// These mirror the APIs in string_util_win.h. Since gurl_base::StringPiece is -// already the native string type on POSIX platforms these APIs are simple -// no-ops. -inline StringPiece AsCrossPlatformPiece(StringPiece str) { - return str; -} - -inline StringPiece AsNativeStringPiece(StringPiece str) { - return str; -} - } // namespace base #endif // BASE_STRINGS_STRING_UTIL_POSIX_H_
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc index f73b895..beb99e2 100644 --- a/base/strings/string_util_unittest.cc +++ b/base/strings/string_util_unittest.cc
@@ -1310,6 +1310,47 @@ EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input)); } +TEST(StringUtilTest, MakeBasicStringPieceTest) { + constexpr char kFoo[] = "Foo"; + static_assert(MakeStringPiece(kFoo, kFoo + 3) == kFoo, ""); + static_assert(MakeStringPiece(kFoo, kFoo + 3).data() == kFoo, ""); + static_assert(MakeStringPiece(kFoo, kFoo + 3).size() == 3, ""); + static_assert(MakeStringPiece(kFoo + 3, kFoo + 3).empty(), ""); + static_assert(MakeStringPiece(kFoo + 4, kFoo + 4).empty(), ""); + + std::string foo = kFoo; + EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()), foo); + EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).data(), foo.data()); + EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size()); + EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty()); + + constexpr char16 kBar[] = STRING16_LITERAL("Bar"); + static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, ""); + static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, ""); + static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, ""); + static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), ""); + static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), ""); + + string16 bar = kBar; + EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar); + EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data()); + EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size()); + EXPECT_TRUE(MakeStringPiece16(bar.end(), bar.end()).empty()); + + constexpr wchar_t kBaz[] = L"Baz"; + static_assert(MakeWStringPiece(kBaz, kBaz + 3) == kBaz, ""); + static_assert(MakeWStringPiece(kBaz, kBaz + 3).data() == kBaz, ""); + static_assert(MakeWStringPiece(kBaz, kBaz + 3).size() == 3, ""); + static_assert(MakeWStringPiece(kBaz + 3, kBaz + 3).empty(), ""); + static_assert(MakeWStringPiece(kBaz + 4, kBaz + 4).empty(), ""); + + std::wstring baz = kBaz; + EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()), baz); + EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()).data(), baz.data()); + EXPECT_EQ(MakeWStringPiece(baz.begin(), baz.end()).size(), baz.size()); + EXPECT_TRUE(MakeWStringPiece(baz.end(), baz.end()).empty()); +} + TEST(StringUtilTest, RemoveChars) { const char kRemoveChars[] = "-/+*"; std::string input = "A-+bc/d!*";
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h index 51a6a2b..3ddbc92 100644 --- a/base/strings/string_util_win.h +++ b/base/strings/string_util_win.h
@@ -107,18 +107,6 @@ return string16(as_u16cstr(str.data()), str.size()); } -// Compatibility shim for cross-platform code that passes a StringPieceType to a -// cross platform string utility function. Most of these functions are only -// implemented for gurl_base::StringPiece and gurl_base::StringPiece16, which is why -// gurl_base::WStringPieces need to be converted on API boundaries. -inline StringPiece16 AsCrossPlatformPiece(WStringPiece str) { - return AsStringPiece16(str); -} - -inline WStringPiece AsNativeStringPiece(StringPiece16 str) { - return AsWStringPiece(str); -} - // The following section contains overloads of the cross-platform APIs for // std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring // and gurl_base::string16 are distinct types, as otherwise this would result in an @@ -167,7 +155,7 @@ BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters); BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str, - StringPiece lowecase_ascii); + StringPiece lowercase_ascii); BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
diff --git a/build_config/build_config.bzl b/build_config/build_config.bzl index 9357584..117bc96 100644 --- a/build_config/build_config.bzl +++ b/build_config/build_config.bzl
@@ -10,11 +10,6 @@ ], }) -_strings_srcs = select({ - "//build_config:windows_x86_64": [], - "//conditions:default": ["string16.cc"], -}) - _strings_hdrs = select({ "//build_config:windows_x86_64": ["string_util_win.h"], "//conditions:default": ["string_util_posix.h"], @@ -35,7 +30,6 @@ build_config = struct( default_copts = _default_copts, url_linkopts = _url_linkopts, - strings_srcs = _strings_srcs, strings_hdrs = _strings_hdrs, icuuc_deps = _icuuc_deps, )
diff --git a/copy.bara.sky b/copy.bara.sky index fefc63d..2ed782d 100644 --- a/copy.bara.sky +++ b/copy.bara.sky
@@ -13,10 +13,12 @@ "AUTHORS", "LICENSE", "base/compiler_specific.h", + "base/containers/contains.h", "base/containers/checked_iterators.h", "base/containers/contiguous_iterator.h", "base/containers/span.h", "base/containers/util.h", + "base/i18n/uchar.h", "base/functional/*.h", "base/ranges/*.h", "base/macros.h", @@ -82,10 +84,10 @@ # Ugly hack. In Chromium, ICU is built with UChar = uint16_t. We can't # really do that with the system ICU, so we have to work this around with a # cast. - core.replace( - "src, src_len, output->data(),", - "(UChar*)src, src_len, (UChar*)output->data(),", - ), + #core.replace( + # "src, src_len, output->data(),", + # "(UChar*)src, src_len, (UChar*)output->data(),", + #), # Use system ICU. core.replace(
diff --git a/url/gurl.cc b/url/gurl.cc index 68f3f8c..3b7d9f5 100644 --- a/url/gurl.cc +++ b/url/gurl.cc
@@ -485,17 +485,23 @@ if (has_host() || has_username() || has_password() || has_port()) return false; - if (!gurl_base::StartsWith(path_piece(), allowed_path)) + return IsAboutPath(path_piece(), allowed_path); +} + +// static +bool GURL::IsAboutPath(gurl_base::StringPiece actual_path, + gurl_base::StringPiece allowed_path) { + if (!gurl_base::StartsWith(actual_path, allowed_path)) return false; - if (path_piece().size() == allowed_path.size()) { - GURL_DCHECK_EQ(path_piece(), allowed_path); + if (actual_path.size() == allowed_path.size()) { + GURL_DCHECK_EQ(actual_path, allowed_path); return true; } - if ((path_piece().size() == allowed_path.size() + 1) && - path_piece().back() == '/') { - GURL_DCHECK_EQ(path_piece(), allowed_path.as_string() + '/'); + if ((actual_path.size() == allowed_path.size() + 1) && + actual_path.back() == '/') { + GURL_DCHECK_EQ(actual_path, allowed_path.as_string() + '/'); return true; }
diff --git a/url/gurl.h b/url/gurl.h index aa33094..37e1c8d 100644 --- a/url/gurl.h +++ b/url/gurl.h
@@ -434,6 +434,10 @@ // See base/trace_event/memory_usage_estimator.h for more info. size_t EstimateMemoryUsage() const; + // Helper used by GURL::IsAboutUrl and KURL::IsAboutURL. + static bool IsAboutPath(gurl_base::StringPiece actual_path, + gurl_base::StringPiece allowed_path); + private: // Variant of the string parsing constructor that allows the caller to elect // retain trailing whitespace, if any, on the passed URL spec, but only if
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index f0f72cd..6295d98 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc
@@ -9,6 +9,7 @@ #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" +#include "url/gurl_abstract_tests.h" #include "url/origin.h" #include "url/url_canon.h" #include "url/url_test_utils.h" @@ -883,44 +884,6 @@ } } -TEST(GURLTest, IsAboutBlank) { - const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo", - "about:blank/#foo", - "about:blank?foo#foo"}; - for (const auto& url : kAboutBlankUrls) - EXPECT_TRUE(GURL(url).IsAboutBlank()) << url; - - const std::string kNotAboutBlankUrls[] = { - "http:blank", "about:blan", "about://blank", - "about:blank/foo", "about://:8000/blank", "about://foo:foo@/blank", - "foo@about:blank", "foo:bar@about:blank", "about:blank:8000", - "about:blANk"}; - for (const auto& url : kNotAboutBlankUrls) - EXPECT_FALSE(GURL(url).IsAboutBlank()) << url; -} - -TEST(GURLTest, IsAboutSrcdoc) { - const std::string kAboutSrcdocUrls[] = { - "about:srcdoc", "about:srcdoc/", "about:srcdoc?foo", "about:srcdoc/#foo", - "about:srcdoc?foo#foo"}; - for (const auto& url : kAboutSrcdocUrls) - EXPECT_TRUE(GURL(url).IsAboutSrcdoc()) << url; - - const std::string kNotAboutSrcdocUrls[] = {"http:srcdoc", - "about:srcdo", - "about://srcdoc", - "about://srcdoc\\", - "about:srcdoc/foo", - "about://:8000/srcdoc", - "about://foo:foo@/srcdoc", - "foo@about:srcdoc", - "foo:bar@about:srcdoc", - "about:srcdoc:8000", - "about:srCDOc"}; - for (const auto& url : kNotAboutSrcdocUrls) - EXPECT_FALSE(GURL(url).IsAboutSrcdoc()) << url; -} - TEST(GURLTest, EqualsIgnoringRef) { const struct { const char* url_a; @@ -1029,4 +992,18 @@ EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin)); } +class GURLTestTraits { + public: + using UrlType = GURL; + + static UrlType CreateUrlFromString(gurl_base::StringPiece s) { return GURL(s); } + static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); } + static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); } + + // Only static members. + GURLTestTraits() = delete; +}; + +INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits); + } // namespace url
diff --git a/url/origin.cc b/url/origin.cc index d04e557..ca37428 100644 --- a/url/origin.cc +++ b/url/origin.cc
@@ -11,9 +11,9 @@ #include "base/base64.h" #include "polyfills/base/check_op.h" +#include "base/containers/contains.h" #include "base/containers/span.h" #include "base/pickle.h" -#include "base/stl_util.h" #include "base/strings/strcat.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h"
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc index 7793b9a..2f342c5 100644 --- a/url/origin_unittest.cc +++ b/url/origin_unittest.cc
@@ -10,32 +10,11 @@ #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" #include "url/origin.h" +#include "url/origin_abstract_tests.h" #include "url/url_util.h" namespace url { -void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) { - EXPECT_EQ(a, b); - const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec(); - const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec(); - EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin); - EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len); - EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin); - EXPECT_EQ(a_parsed.username.len, b_parsed.username.len); - EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin); - EXPECT_EQ(a_parsed.password.len, b_parsed.password.len); - EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin); - EXPECT_EQ(a_parsed.host.len, b_parsed.host.len); - EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin); - EXPECT_EQ(a_parsed.port.len, b_parsed.port.len); - EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin); - EXPECT_EQ(a_parsed.path.len, b_parsed.path.len); - EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin); - EXPECT_EQ(a_parsed.query.len, b_parsed.query.len); - EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin); - EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len); -} - class OriginTest : public ::testing::Test { public: void SetUp() override { @@ -197,68 +176,6 @@ EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:srcdoc"), opaque_b)); EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:blank?hello#whee"), opaque_b)); - - const char* const urls[] = { - "data:text/html,Hello!", - "javascript:alert(1)", - "about:blank", - "file://example.com:443/etc/passwd", - "unknown-scheme:foo", - "unknown-scheme://bar", - "http", - "http:", - "http:/", - "http://", - "http://:", - "http://:1", - "yay", - "http::///invalid.example.com/", - "blob:null/foo", // blob:null (actually a valid URL) - "blob:data:foo", // blob + data (which is nonstandard) - "blob:about://blank/", // blob + about (which is nonstandard) - "blob:about:blank/", // blob + about (which is nonstandard) - "filesystem:http://example.com/", // Invalid (missing /type/) - "filesystem:local-but-nonstandard:baz./type/", // fs requires standard - "filesystem:local-but-nonstandard://hostname/type/", - "filesystem:unknown-scheme://hostname/type/", - "local-but-nonstandar:foo", // Prefix of registered scheme. - "but-nonstandard:foo", // Suffix of registered scheme. - "local-and-standard:", // Standard scheme needs a hostname. - "standard-but-noaccess:", // Standard scheme needs a hostname. - "blob:blob:http://www.example.com/guid-goes-here", // Double blob. - }; - - for (auto* test_url : urls) { - SCOPED_TRACE(test_url); - GURL url(test_url); - const url::Origin opaque_origin; - - // Opaque origins returned by Origin::Create(). - { - Origin origin = Origin::Create(url); - EXPECT_EQ("", origin.scheme()); - EXPECT_EQ("", origin.host()); - EXPECT_EQ(0, origin.port()); - EXPECT_TRUE(origin.opaque()); - // An origin is always same-origin with itself. - EXPECT_EQ(origin, origin); - EXPECT_NE(origin, url::Origin()); - EXPECT_EQ(SchemeHostPort(), origin.GetTupleOrPrecursorTupleIfOpaque()); - // A copy of |origin| should be same-origin as well. - Origin origin_copy = origin; - EXPECT_EQ("", origin_copy.scheme()); - EXPECT_EQ("", origin_copy.host()); - EXPECT_EQ(0, origin_copy.port()); - EXPECT_TRUE(origin_copy.opaque()); - EXPECT_EQ(origin, origin_copy); - // And it should always be cross-origin to another opaque Origin. - EXPECT_NE(origin, opaque_origin); - // Re-creating from the URL should also be cross-origin. - EXPECT_NE(origin, Origin::Create(url)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - } - } } TEST_F(OriginTest, ConstructFromTuple) { @@ -286,132 +203,6 @@ } } -TEST_F(OriginTest, ConstructFromGURL) { - Origin different_origin = - Origin::Create(GURL("https://not-in-the-list.test/")); - - struct TestCases { - const char* const url; - const char* const expected_scheme; - const char* const expected_host; - const uint16_t expected_port; - } cases[] = { - // IP Addresses - {"http://192.168.9.1/", "http", "192.168.9.1", 80}, - {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80}, - {"http://1/", "http", "0.0.0.1", 80}, - {"http://1:1/", "http", "0.0.0.1", 1}, - {"http://3232237825/", "http", "192.168.9.1", 80}, - - // Punycode - {"http://☃.net/", "http", "xn--n3h.net", 80}, - {"blob:http://☃.net/", "http", "xn--n3h.net", 80}, - - // Generic URLs - {"http://example.com/", "http", "example.com", 80}, - {"http://example.com:123/", "http", "example.com", 123}, - {"https://example.com/", "https", "example.com", 443}, - {"https://example.com:123/", "https", "example.com", 123}, - {"http://user:pass@example.com/", "http", "example.com", 80}, - {"http://example.com:123/?query", "http", "example.com", 123}, - {"https://example.com/#1234", "https", "example.com", 443}, - {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123}, - - // Registered URLs - {"ftp://example.com/", "ftp", "example.com", 21}, - {"ws://example.com/", "ws", "example.com", 80}, - {"wss://example.com/", "wss", "example.com", 443}, - {"wss://user:pass@example.com/", "wss", "example.com", 443}, - - // Scheme (registered in SetUp()) that's both local and standard. - // TODO: Is it really appropriate to do network-host canonicalization of - // schemes without ports? - {"local-and-standard:20", "local-and-standard", "0.0.0.20", 0}, - {"local-and-standard:20.", "local-and-standard", "0.0.0.20", 0}, - {"local-and-standard:↑↑↓↓←→←→ba.↑↑↓↓←→←→ba.0.bg", "local-and-standard", - "xn--ba-rzuadaibfa.xn--ba-rzuadaibfa.0.bg", 0}, - {"local-and-standard:foo", "local-and-standard", "foo", 0}, - {"local-and-standard://bar:20", "local-and-standard", "bar", 0}, - {"local-and-standard:baz.", "local-and-standard", "baz.", 0}, - {"local-and-standard:baz..", "local-and-standard", "baz..", 0}, - {"local-and-standard:baz..bar", "local-and-standard", "baz..bar", 0}, - {"local-and-standard:baz...", "local-and-standard", "baz...", 0}, - - // Scheme (registered in SetUp()) that's local but nonstandard. These - // always have empty hostnames, but are allowed to be url::Origins. - {"local-but-nonstandard:", "local-but-nonstandard", "", 0}, - {"local-but-nonstandard:foo", "local-but-nonstandard", "", 0}, - {"local-but-nonstandard://bar", "local-but-nonstandard", "", 0}, - {"also-local-but-nonstandard://bar", "also-local-but-nonstandard", "", 0}, - - // Scheme (registered in SetUp()) that's standard but marked as noaccess. - // url::Origin doesn't currently take the noaccess property into account, - // so these aren't expected to result in opaque origins. - {"standard-but-noaccess:foo", "standard-but-noaccess", "foo", 0}, - {"standard-but-noaccess://bar", "standard-but-noaccess", "bar", 0}, - - // file: URLs - {"file:///etc/passwd", "file", "", 0}, - {"file://example.com/etc/passwd", "file", "example.com", 0}, - - // Filesystem: - {"filesystem:http://example.com/type/", "http", "example.com", 80}, - {"filesystem:http://example.com:123/type/", "http", "example.com", 123}, - {"filesystem:https://example.com/type/", "https", "example.com", 443}, - {"filesystem:https://example.com:123/type/", "https", "example.com", 123}, - {"filesystem:local-and-standard:baz./type/", "local-and-standard", "baz.", - 0}, - - // Blob: - {"blob:http://example.com/guid-goes-here", "http", "example.com", 80}, - {"blob:http://example.com:123/guid-goes-here", "http", "example.com", - 123}, - {"blob:https://example.com/guid-goes-here", "https", "example.com", 443}, - {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80}, - }; - - for (const auto& test_case : cases) { - SCOPED_TRACE(test_case.url); - GURL url(test_case.url); - EXPECT_TRUE(url.is_valid()); - Origin origin = Origin::Create(url); - EXPECT_EQ(test_case.expected_scheme, origin.scheme()); - EXPECT_EQ(test_case.expected_host, origin.host()); - EXPECT_EQ(test_case.expected_port, origin.port()); - EXPECT_FALSE(origin.opaque()); - EXPECT_EQ(origin, origin); - EXPECT_NE(different_origin, origin); - EXPECT_NE(origin, different_origin); - EXPECT_EQ(origin, Origin::Resolve(GURL("about:blank"), origin)); - EXPECT_EQ(origin, Origin::Resolve(GURL("about:blank?bar#foo"), origin)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - - url::Origin derived_opaque = - Origin::Resolve(GURL("about:blank?bar#foo"), origin) - .DeriveNewOpaqueOrigin(); - EXPECT_TRUE(derived_opaque.opaque()); - EXPECT_NE(origin, derived_opaque); - EXPECT_TRUE(derived_opaque.GetTupleOrPrecursorTupleIfOpaque().IsValid()); - EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(), - derived_opaque.GetTupleOrPrecursorTupleIfOpaque()); - EXPECT_EQ(derived_opaque, derived_opaque); - - url::Origin derived_opaque_via_data_url = - Origin::Resolve(GURL("data:text/html,baz"), origin); - EXPECT_TRUE(derived_opaque_via_data_url.opaque()); - EXPECT_NE(origin, derived_opaque_via_data_url); - EXPECT_TRUE(derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque() - .IsValid()); - EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(), - derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque()); - EXPECT_NE(derived_opaque, derived_opaque_via_data_url); - EXPECT_NE(derived_opaque_via_data_url, derived_opaque); - EXPECT_NE(derived_opaque.DeriveNewOpaqueOrigin(), derived_opaque); - EXPECT_EQ(derived_opaque_via_data_url, derived_opaque_via_data_url); - } -} - TEST_F(OriginTest, Serialization) { struct TestCases { const char* const url; @@ -666,20 +457,6 @@ EXPECT_STREQ("https://foo.com", origin1_debug_alias); } -TEST_F(OriginTest, NonStandardScheme) { - Origin origin = Origin::Create(GURL("cow://")); - EXPECT_TRUE(origin.opaque()); -} - -TEST_F(OriginTest, NonStandardSchemeWithAndroidWebViewHack) { - EnableNonStandardSchemesForAndroidWebView(); - Origin origin = Origin::Create(GURL("cow://")); - EXPECT_FALSE(origin.opaque()); - EXPECT_EQ("cow", origin.scheme()); - EXPECT_EQ("", origin.host()); - EXPECT_EQ(0, origin.port()); -} - TEST_F(OriginTest, CanBeDerivedFrom) { AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST); Origin opaque_unique_origin = Origin(); @@ -818,10 +595,10 @@ {"standard-but-noaccess://a.com/foo", ®ular_origin, false}, {"standard-but-noaccess://a.com/foo", &opaque_precursor_origin, false}, {"standard-but-noaccess://a.com/foo", &opaque_unique_origin, true}, - {"standard-but-noaccess://a.com/foo", &no_access_origin, false}, + {"standard-but-noaccess://a.com/foo", &no_access_origin, true}, {"standard-but-noaccess://a.com/foo", &no_access_opaque_precursor_origin, - false}, - {"standard-but-noaccess://b.com/foo", &no_access_origin, false}, + true}, + {"standard-but-noaccess://b.com/foo", &no_access_origin, true}, {"standard-but-noaccess://b.com/foo", &no_access_opaque_precursor_origin, true}, @@ -967,4 +744,8 @@ EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString()); } +INSTANTIATE_TYPED_TEST_SUITE_P(UrlOrigin, + AbstractOriginTest, + UrlOriginTestTraits); + } // namespace url
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc index 265761b..21b473d 100644 --- a/url/scheme_host_port.cc +++ b/url/scheme_host_port.cc
@@ -10,9 +10,9 @@ #include <tuple> #include "polyfills/base/check_op.h" +#include "base/containers/contains.h" #include "polyfills/base/notreached.h" #include "base/numerics/safe_conversions.h" -#include "base/stl_util.h" #include "base/strings/string_number_conversions.h" #include "url/gurl.h" #include "url/third_party/mozilla/url_parse.h" @@ -49,6 +49,10 @@ return host == canon_host; } +// Note: When changing IsValidInput, consider also updating +// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in +// behavior between these 2 layers, but we should avoid introducing new +// differences). bool IsValidInput(const gurl_base::StringPiece& scheme, const gurl_base::StringPiece& host, uint16_t port, @@ -57,15 +61,21 @@ if (scheme.empty()) return false; + // about:blank and other no-access schemes translate into an opaque origin. + // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink. + if (gurl_base::Contains(GetNoAccessSchemes(), scheme)) + return false; + SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; bool is_standard = GetStandardSchemeType( scheme.data(), Component(0, gurl_base::checked_cast<int>(scheme.length())), &scheme_type); if (!is_standard) { - // To be consistent with blink, local non-standard schemes are currently - // allowed to be tuple origins. Nonstandard schemes don't have hostnames, - // so their tuple is just ("protocol", "", 0). + // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local + // non-standard schemes are currently allowed to be tuple origins. + // Nonstandard schemes don't have hostnames, so their tuple is just + // ("protocol", "", 0). // // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and // remove this local scheme exception.
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc index a4cbdb3..a1f415d 100644 --- a/url/scheme_host_port_unittest.cc +++ b/url/scheme_host_port_unittest.cc
@@ -55,8 +55,15 @@ EXPECT_EQ(invalid, invalid); const char* urls[] = { - "data:text/html,Hello!", "javascript:alert(1)", - "file://example.com:443/etc/passwd", + // about:, data:, javascript: and other no-access schemes translate into + // an invalid SchemeHostPort + "about:blank", "about:blank#ref", "about:blank?query=123", "about:srcdoc", + "about:srcdoc#ref", "about:srcdoc?query=123", "data:text/html,Hello!", + "javascript:alert(1)", + + // GURLs where GURL::is_valid returns false translate into an invalid + // SchemeHostPort. + "file://example.com:443/etc/passwd", "#!^%!$!&*", // These schemes do not follow the generic URL syntax, so make sure we // treat them as invalid (scheme, host, port) tuples (even though such
diff --git a/url/url_canon_icu.cc b/url/url_canon_icu.cc index 614e338..93c9247 100644 --- a/url/url_canon_icu.cc +++ b/url/url_canon_icu.cc
@@ -9,6 +9,7 @@ #include <string.h> #include "polyfills/base/check.h" +#include "base/i18n/uchar.h" #include <unicode/ucnv.h> #include <unicode/ucnv_cb.h> #include <unicode/utypes.h> @@ -94,8 +95,9 @@ do { UErrorCode err = U_ZERO_ERROR; char* dest = &output->data()[begin_offset]; - int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity, - input, input_len, &err); + int required_capacity = + ucnv_fromUChars(converter_, dest, dest_capacity, + gurl_base::i18n::ToUCharPtr(input), input_len, &err); if (err != U_BUFFER_OVERFLOW_ERROR) { output->set_length(begin_offset + required_capacity); return;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc index 22002b5..da32bd8 100644 --- a/url/url_canon_path.cc +++ b/url/url_canon_path.cc
@@ -5,6 +5,7 @@ #include <limits.h> #include "polyfills/base/check.h" +#include "polyfills/base/check_op.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" #include "url/url_parse_internal.h" @@ -261,6 +262,7 @@ bool success = true; for (int i = path.begin; i < end; i++) { + GURL_DCHECK_LT(last_invalid_percent_index, output->length()); UCHAR uch = static_cast<UCHAR>(spec[i]); if (sizeof(CHAR) > 1 && uch >= 0x80) { // We only need to test wide input for having non-ASCII characters. For @@ -303,6 +305,9 @@ break; case DIRECTORY_UP: BackUpToPreviousSlash(path_begin_in_output, output); + if (last_invalid_percent_index >= output->length()) { + last_invalid_percent_index = INT_MIN; + } i += dotlen + consumed_len - 1; break; }
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index 55ef089..e2469ca 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc
@@ -1423,6 +1423,9 @@ {"ws:)W\x1eW\xef\xb9\xaa" "81:80/", "ws://%29w%1ew%81/", false}, + // Regression test for the last_invalid_percent_index bug described in + // https://crbug.com/1080890#c10. + {R"(HTTP:S/5%\../>%41)", "http://s/%3EA", true}, }; for (size_t i = 0; i < gurl_base::size(cases); i++) {
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc index 18e1895..d9256a2 100644 --- a/url/url_idna_icu.cc +++ b/url/url_idna_icu.cc
@@ -11,6 +11,7 @@ #include <ostream> #include "polyfills/base/check_op.h" +#include "base/i18n/uchar.h" #include "base/no_destructor.h" #include <unicode/uidna.h> #include <unicode/utypes.h> @@ -90,8 +91,10 @@ while (true) { UErrorCode err = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; - int output_length = uidna_nameToASCII(uidna, (UChar*)src, src_len, (UChar*)output->data(), - output->capacity(), &info, &err); + int output_length = + uidna_nameToASCII(uidna, gurl_base::i18n::ToUCharPtr(src), src_len, + gurl_base::i18n::ToUCharPtr(output->data()), + output->capacity(), &info, &err); if (U_SUCCESS(err) && info.errors == 0) { output->set_length(output_length); return true;