Update to upstream revision d02279b46eb30b0f90a3ac18ec306b77a6d2e21e This updates googleurl standalone copy to the version from Mon Jun 7 15:26:19 2021 +0000.
diff --git a/AUTHORS b/AUTHORS index ce38168..4bb6b20 100644 --- a/AUTHORS +++ b/AUTHORS
@@ -33,6 +33,8 @@ Adenilson Cavalcanti <a.cavalcanti@samsung.com> Aditya Bhargava <heuristicist@gmail.com> Adrian Belgun <adrian.belgun@intel.com> +Adrian Ratiu <adrian.ratiu@collabora.corp-partner.google.com> +Adrià Vilanova Martínez <me@avm99963.com> Ahmet Emir Ercin <ahmetemiremir@gmail.com> Ajay Berwal <a.berwal@samsung.com> Ajay Berwal <ajay.berwal@samsung.com> @@ -41,6 +43,7 @@ Aku Kotkavuo <a.kotkavuo@partner.samsung.com> Aldo Culquicondor <alculquicondor@gmail.com> Aleksandar Stojiljkovic <aleksandar.stojiljkovic@intel.com> +Alex Chronopoulos <achronop@gmail.com> Alex Gabriel <minilogo@gmail.com> Alex Gartrell <agartrell@cmu.edu> Alex Gaynor <alex.gaynor@gmail.com> @@ -66,6 +69,7 @@ Ali Vathi <ali.akbar@gmail.com> Allan Sandfeld Jensen <allan.jensen@qt.io> Alper Çakan <alpercakan98@gmail.com> +Alvaro Silva <alvaro.fagner@gmail.com> Ambarish Rapte <ambarish.r@samsung.com> Amey Jahagirdar <jahagird@amazon.com> Amit Sarkar <amit.srkr@samsung.com> @@ -125,6 +129,7 @@ Asish Singh <asish.singh@samsung.com> Attila Dusnoki <dati91@gmail.com> Avinaash Doreswamy <avi.nitk@samsung.com> +Ayush Dubey <dubeyaayush07@gmail.com> Ayush Khandelwal <k.ayush@samsung.com> Azhar Shaikh <azhar.shaikh@intel.com> Balazs Kelemen <b.kelemen@samsung.com> @@ -150,6 +155,7 @@ Branden Archer <bma4@zips.uakron.edu> Brendan Kirby <brendan.kirby@imgtec.com> Brendan Long <self@brendanlong.com> +Brendon Tiszka <btiszka@gmail.com> Brian Clifton <clifton@brave.com> Brian G. Merrell <bgmerrell@gmail.com> Brian Konzman, SJ <b.g.konzman@gmail.com> @@ -171,6 +177,7 @@ Caitlin Potter <caitpotter88@gmail.com> Calvin Mei <calvimei@amazon.com> Cameron Gutman <aicommander@gmail.com> +Camille Viot <viot.camille@outlook.com> Carlos Santa <carlos.santa@intel.com> Catalin Badea <badea@adobe.com> Cathie Chen <cathiechen@tencent.com> @@ -184,6 +191,7 @@ Chang Shu <c.shu@samsung.com> Changbin Shao <changbin.shao@intel.com> Changjun Yang <changjun.yang@intel.com> +ChangSeok Lee <charlie.lee921@gmail.com> ChangSeok Oh <shivamidow@gmail.com> Changwan Hong <changwan.hong@navercorp.com> Changyeon Kim <cyzero.kim@samsung.com> @@ -235,6 +243,7 @@ Dániel Bátyai <dbatyai@inf.u-szeged.hu> Dániel Vince <vinced@inf.u-szeged.hu> Daoming Qiu <daoming.qiu@intel.com> +Darik Harter <darik.harter@gmail.com> Darshini KN <kn.darshini@samsung.com> Dave Vandyke <kzar@kzar.co.uk> David Benjamin <davidben@mit.edu> @@ -263,6 +272,7 @@ Derek Halman <d.halman@gmail.com> Devlin Cronin <rdevlin.cronin@gmail.com> Dhi Aurrahman <dio@rockybars.com> +Di Wu <meetwudi@gmail.com> Diana Suvorova <diana.suvorova@gmail.com> Diego Ferreiro Val <elfogris@gmail.com> Dillon Sellars <dill.sellars@gmail.com> @@ -458,6 +468,7 @@ Jay Soffian <jaysoffian@gmail.com> Jeado Ko <haibane84@gmail.com> Jeffrey C <jeffreyca16@gmail.com> +Jeffrey Yeung <jeffrey.yeung@poly.com> Jeongeun Kim <je_julie.kim@samsung.com> Jeongmin Kim <kimwjdalsl@gmail.com> Jeongwoo Park <jwoo.park@navercorp.com> @@ -508,7 +519,9 @@ Joe Thomas <mhx348@motorola.com> Joel Stanley <joel@jms.id.au> Joey Jiao <joeyjiao0810@gmail.com> +Joey Mou <joeymou@amazon.com> Johannes Rudolph <johannes.rudolph@googlemail.com> +John Ingve Olsen <johningveolsen@gmail.com> John Kleinschmidt <kleinschmidtorama@gmail.com> John Yani <vanuan@gmail.com> John Yoo <nearbyh13@gmail.com> @@ -562,6 +575,8 @@ Kangil Han <kangil.han@samsung.com> Kangyuan Shu <kangyuan.shu@intel.com> Karan Thakkar <karanjthakkar@gmail.com> +Karel Král <kralkareliv@gmail.com> +Karl <karlpolicechromium@gmail.com> Kartikey Bhatt <kartikey@amazon.com> Kaspar Brand <googlecontrib@velox.ch> Kaushalendra Mishra <k.mishra@samsung.com> @@ -629,6 +644,7 @@ Li Yin <li.yin@intel.com> Lidwine Genevet <lgenevet@cisco.com> Lin Sun <lin.sun@intel.com> +Lin Peng <penglin22@huawei.com> Lingqi Chi <someway.bit@gmail.com> Lingyun Cai <lingyun.cai@intel.com> Lionel Landwerlin <lionel.g.landwerlin@intel.com> @@ -646,6 +662,7 @@ Luke Seunghoe Gu <gulukesh@gmail.com> Luke Zarko <lukezarko@gmail.com> Luoxi Pan <l.panpax@gmail.com> +Lu Yahan <yahan@iscas.ac.cn> Maarten Lankhorst <m.b.lankhorst@gmail.com> Maciej Pawlowski <m.pawlowski@eyeo.com> Magnus Danielsson <fuzzac@gmail.com> @@ -672,6 +689,7 @@ Mark Seaborn <mrs@mythic-beasts.com> Mark Winter <wintermarkedward@gmail.com> Martijn Croonen <martijn@martijnc.be> +Martin Aberer <mail@martin-aberer.at> Martin Bednorz <m.s.bednorz@gmail.com> Martin Persson <mnpn03@gmail.com> Martin Rogalla <martin@martinrogalla.com> @@ -695,13 +713,14 @@ Matthew Willis <appamatto@gmail.com> Matthias Reitinger <reimarvin@gmail.com> Matthieu Rigolot <matthieu.rigolot@gmail.com> +Matthieu Vlad Hauglustaine <matt.hauglustaine@gmail.com> Max Perepelitsyn <pph34r@gmail.com> Max Vujovic <mvujovic@adobe.com> Mayank Gupta <mayank.g1@samsung.com> Mayur Kankanwadi <mayurk.vk@samsung.com> Md Abdullah Al Alamin <a.alamin.cse@gmail.com> Md. Hasanur Rashid <hasanur.r@samsung.com> -Md Jobed Hossain <jrony15@gmail.com> +Md Jobed Hossain <jobed.h@samsung.com> Md Sami Uddin <md.sami@samsung.com> Michael Cirone <mikecirone@gmail.com> Michael Constant <mconst@gmail.com> @@ -712,6 +731,7 @@ Michael Morrison <codebythepound@gmail.com> Michael Müller <michael@fds-team.de> Michael Schechter <mike.schechter@gmail.com> +Michael Smith <sideshowbarker@gmail.com> Michaël Zasso <mic.besace@gmail.com> Michael Zugelder <michael@zugelder.org> Michel Promonet <michel.promonet.1@gmail.com> @@ -747,6 +767,7 @@ Myeongjin Cho <myeongjin.cho@navercorp.com> Myles C. Maxfield <mymax@amazon.com> Myung-jong Kim <mjkim610@gmail.com> +Myunghoon Kim <asdvfrqwe@gmail.com> Nagarajan Narayanan <nagarajan.n@samsung.com> Nagarjuna Atluri <nagarjuna.a@samsung.com> Naiem Shaik <naiem.shaik@gmail.com> @@ -757,10 +778,12 @@ Naveen Kumar Devaraj <devarajn@amazon.com> Naveen Kumar S G <naveensg@samsung.com> Nayan Kumar K <qtc746@motorola.com> +Nayeem Hasan <nayeemhasan.nh01@gmail.com> Neal Gompa <ngompa13@gmail.com> Ned Williamson <nedwilliamson@gmail.com> Nedeljko Babic <nedeljko.babic@imgtec.com> Nidhi Jaju <nidhijaju127@gmail.com> +Niek van der Maas <mail@niekvandermaas.nl> Nikhil Bansal <n.bansal@samsung.com> Nikhil Sahni <nikhil.sahni@samsung.com> Nikita Ofitserov <himikof@gmail.com> @@ -779,6 +802,7 @@ Olli Raula (Old name Olli Syrjälä) <olli.raula@intel.com> Omar Sandoval <osandov@osandov.com> Owen Yuwono <owenyuwono@gmail.com> +Palash Verma <palashverma47@gmail.com> Pan Deng <pan.deng@intel.com> Parag Radke <nrqv63@motorola.com> Paritosh Kumar <paritosh.in@samsung.com> @@ -807,6 +831,7 @@ Peng Hu <penghu@tencent.com> Peng Jiang <leiyi.jp@gmail.com> Peng Xinchao <pxinchao@gmail.com> +Peng-Yu Chen <pengyu@libstarrify.so> Peter Bright <drpizza@quiscalusmexicanus.org> Peter Brophy <pbrophy@adobe.com> Peter Collingbourne <peter@pcc.me.uk> @@ -825,6 +850,7 @@ Pierre Neter <pierreneter@gmail.com> Pierre-Antoine LaFayette <pierre.lafayette@gmail.com> Po-Chun Chang <pochang0403@gmail.com> +Prakhar Shrivastav <p.shri@samsung.com> Pramod Begur Srinath <pramod.bs@samsung.com> Pranay Kumar <pranay.kumar@samsung.com> Pranjal Jumde <pranjal@brave.com> @@ -899,6 +925,7 @@ Ryan Norton <rnorton10@gmail.com> Ryan Sleevi <ryan-chromium-dev@sleevi.com> Ryan Yoakum <ryoakum@skobalt.com> +Ryo Ogawa <negibokken@gmail.com> Ryuan Choi <ryuan.choi@samsung.com> Saikrishna Arcot <saiarcot895@gmail.com> Sajal Khandelwal <skhandelwa22@bloomberg.net> @@ -942,6 +969,7 @@ Sergey Shekyan <shekyan@gmail.com> Sergey Talantov <sergey.talantov@gmail.com> Sergio Carlos Morales Angeles <carloschilazo@gmail.com> +Sergio Garcia Murillo <sergio.garcia.murillo@gmail.com> Sergiy Belozorov <rryk.ua@gmail.com> Seshadri Mahalingam <seshadri.mahalingam@gmail.com> Seungkyu Lee <zx6658@gmail.com> @@ -1001,11 +1029,13 @@ Steven Pennington <spenn@engr.uvic.ca> Steven Roussey <sroussey@gmail.com> Subrahmanya Praveen Munukutla <sataya.m@samsung.com> +Sucheta Saraf <suchetasaraf95@gmail.com> Suchit Agrawal <a.suchit@samsung.com> Sudarsana Babu Nagineni <sudarsana.nagineni@intel.com> Sudarshan Parthasarathy <sudarshan.p@samsung.com> Sujae Jo <sujae33.jo@gmail.com> Sujith S S <sujiths.s@samsung.com> +Sumaid Syed <sumaidsyed@gmail.com> Sunchang Li <johnstonli@tencent.com> Sundoo Kim <nerdooit@gmail.com> Suneel Kota <suneel.kota@samsung.com> @@ -1014,6 +1044,8 @@ Sungmann Cho <sungmann.cho@navercorp.com> Sunil Ratnu <sunil.ratnu@samsung.com> Sunitha Srivatsa <srivats@amazon.com> +Sunwoo Nam <jegalzz88@gmail.com> +Surya K M <suryagowda590@gmail.com> Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> Suvanjan Mukherjee <suvanjanmukherjee@gmail.com> Suyambulingam R M <suyambu.rm@samsung.com> @@ -1059,9 +1091,12 @@ Tomas Popela <tomas.popela@gmail.com> Torsten Kurbad <google@tk-webart.de> Toshihito Kikuchi <leamovret@gmail.com> +Toshiaki Tanaka <zokutyou2@gmail.com> Trent Willis <trentmwillis@gmail.com> Trevor Perrin <unsafe@trevp.net> Tripta Gupta <tripta.g@samsung.com> +Tristan Fraipont <tristan.fraipont@gmail.com> +Tudor Brindus <me@tbrindus.ca> Tuukka Toivonen <tuukka.toivonen@intel.com> U. Artie Eoff <ullysses.a.eoff@intel.com> Umar Hansa <umar.hansa@gmail.com> @@ -1158,13 +1193,16 @@ Yunchao He <yunchao.he@intel.com> Yupei Lin <yplam@yplam.com> Yupei Wang <perryuwang@tencent.com> +Yuqing Cao <caoyuqing@huawei.com> Yura Yaroshevich <yura.yaroshevich@gmail.com> Yuri Gorobets <yuri.gorobets@gmail.com> Yuriy Taraday <yorik.sar@gmail.com> Yuta Kasai <kasai.yuta0810@gmail.com> Yuvanesh Natarajan <yuvanesh.n1@samsung.com> +Zach Bjornson <zbbjornson@gmail.com> Zeno Albisser <zeno.albisser@digia.com> Zeqin Chen <talonchen@tencent.com> +Zhang Hao <15686357310a@gmail.com> Zhaoze Zhou <zhaoze.zhou@partner.samsung.com> Zheda Chen <zheda.chen@intel.com> Zheng Chuang <zhengchuangscu@gmail.com> @@ -1201,6 +1239,7 @@ Dell Technologies Inc. <*@dell.corp-partner.google.com> Duck Duck Go, Inc. <*@duckduckgo.com> Endless Mobile, Inc. <*@endlessm.com> +EngFlow, Inc. <*@engflow.com> Estimote, Inc. <*@estimote.com> Facebook, Inc. <*@fb.com> Facebook, Inc. <*@oculus.com> @@ -1246,6 +1285,7 @@ Venture 3 Systems LLC <*@venture3systems.com> Vewd Software AS <*@vewd.com> Vivaldi Technologies AS <*@vivaldi.com> +Wacom <*@wacom.com> Yandex LLC <*@yandex-team.ru> # Please DO NOT APPEND here. See comments at the top of the file. # END organizations section.
diff --git a/WORKSPACE b/WORKSPACE index 2823b98..809e6b9 100644 --- a/WORKSPACE +++ b/WORKSPACE
@@ -3,3 +3,11 @@ # found in the LICENSE file. workspace(name = "com_google_googleurl") + +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") + +git_repository( + name = "com_google_absl", + commit = "17c954d90d5661e27db8fc5f086085690a8372d9", + remote = "https://github.com/abseil/abseil-cpp.git", +)
diff --git a/base/BUILD b/base/BUILD index 63787b7..c933421 100644 --- a/base/BUILD +++ b/base/BUILD
@@ -13,14 +13,13 @@ "containers/contiguous_iterator.h", "containers/span.h", "containers/util.h", + "cxx17_backports.h", "debug/leak_annotations.h", "functional/identity.h", "functional/invoke.h", "functional/not_fn.h", - "i18n/uchar.h", "macros.h", "no_destructor.h", - "optional.h", "ranges/algorithm.h", "ranges/functional.h", "ranges/ranges.h", @@ -32,5 +31,6 @@ deps = [ "//build:build_config", "//polyfills", + "@com_google_absl//absl/types:optional", ], )
diff --git a/base/compiler_specific.h b/base/compiler_specific.h index fa961b0..3faca72 100644 --- a/base/compiler_specific.h +++ b/base/compiler_specific.h
@@ -24,6 +24,13 @@ #define HAS_CPP_ATTRIBUTE(x) 0 #endif +// A wrapper around `__has_builtin`, similar to HAS_CPP_ATTRIBUTE. +#if defined(__has_builtin) +#define HAS_BUILTIN(x) __has_builtin(x) +#else +#define HAS_BUILTIN(x) 0 +#endif + // Annotate a variable indicating it's ok if the variable is not used. // (Typically used to silence a compiler warning when the assignment // is important for some other reason.) @@ -332,4 +339,59 @@ #endif // defined(__clang_analyzer__) +// Use nomerge attribute to disable optimization of merging multiple same calls. +#if defined(__clang__) && __has_attribute(nomerge) +#define NOMERGE [[clang::nomerge]] +#else +#define NOMERGE +#endif + +// Marks a type as being eligible for the "trivial" ABI despite having a +// non-trivial destructor or copy/move constructor. Such types can be relocated +// after construction by simply copying their memory, which makes them eligible +// to be passed in registers. The canonical example is std::unique_ptr. +// +// Use with caution; this has some subtle effects on constructor/destructor +// ordering and will be very incorrect if the type relies on its address +// remaining constant. When used as a function argument (by value), the value +// may be constructed in the caller's stack frame, passed in a register, and +// then used and destructed in the callee's stack frame. A similar thing can +// occur when values are returned. +// +// TRIVIAL_ABI is not needed for types which have a trivial destructor and +// copy/move constructors, such as gurl_base::TimeTicks and other POD. +// +// It is also not likely to be effective on types too large to be passed in one +// or two registers on typical target ABIs. +// +// See also: +// https://clang.llvm.org/docs/AttributeReference.html#trivial-abi +// https://libcxx.llvm.org/docs/DesignDocs/UniquePtrTrivialAbi.html +#if defined(__clang__) && __has_attribute(trivial_abi) +#define TRIVIAL_ABI [[clang::trivial_abi]] +#else +#define TRIVIAL_ABI +#endif + +// Marks a member function as reinitializing a moved-from variable. +// See also +// https://clang.llvm.org/extra/clang-tidy/checks/bugprone-use-after-move.html#reinitialization +#if defined(__clang__) && __has_attribute(reinitializes) +#define REINITIALIZES_AFTER_MOVE [[clang::reinitializes]] +#else +#define REINITIALIZES_AFTER_MOVE +#endif + +// Requires constant initialization. See constinit in C++20. Allows to rely on a +// variable being initialized before execution, and not requiring a global +// constructor. +#if defined(__has_attribute) +#if __has_attribute(require_constant_initialization) +#define CONSTINIT __attribute__((require_constant_initialization)) +#endif +#endif +#if !defined(CONSTINIT) +#define CONSTINIT +#endif + #endif // BASE_COMPILER_SPECIFIC_H_
diff --git a/base/containers/checked_iterators.h b/base/containers/checked_iterators.h index 30c35bd..b5fe925 100644 --- a/base/containers/checked_iterators.h +++ b/base/containers/checked_iterators.h
@@ -11,6 +11,7 @@ #include "polyfills/base/check_op.h" #include "base/containers/util.h" +#include "build/build_config.h" namespace gurl_base { @@ -27,77 +28,24 @@ template <typename U> friend class CheckedContiguousIterator; - constexpr CheckedContiguousIterator() = default; - -#if defined(_LIBCPP_VERSION) - // The following using declaration, single argument implicit constructor and - // friended `__unwrap_iter` overload are required to use an optimized code - // path when using a CheckedContiguousIterator with libc++ algorithms such as - // std::copy(first, last, result), std::copy_backward(first, last, result), - // std::move(first, last, result) and std::move_backward(first, last, result). - // - // Each of these algorithms dispatches to a std::memmove if this is safe to do - // so, i.e. when all of `first`, `last` and `result` are iterators over - // contiguous storage of the same type modulo const qualifiers. - // - // libc++ implements this for its contiguous iterators by invoking the - // unqualified __unwrap_iter, which returns the underlying pointer for - // iterators over std::vector and std::string, and returns the original - // iterator otherwise. - // - // Thus in order to opt into this optimization for CCI, we need to provide our - // own __unwrap_iter, returning the underlying raw pointer if it is safe to do - // so. - // - // Furthermore, considering that std::copy is implemented as follows, the - // return type of __unwrap_iter(CCI) needs to be convertible to CCI, which is - // why an appropriate implicit single argument constructor is provided for the - // optimized case: - // - // template <class InIter, class OutIter> - // OutIter copy(InIter first, InIter last, OutIter result) { - // return __copy(__unwrap_iter(first), __unwrap_iter(last), - // __unwrap_iter(result)); - // } - // - // Unoptimized __copy() signature: - // template <class InIter, class OutIter> - // OutIter __copy(InIter first, InIter last, OutIter result); - // - // Optimized __copy() signature: - // template <class T, class U> - // U* __copy(T* first, T* last, U* result); - // - // Finally, this single argument constructor sets all internal fields to the - // passed in pointer. This allows the resulting CCI to be used in other - // optimized calls to std::copy (or std::move, std::copy_backward, - // std::move_backward). However, it should not be used otherwise, since - // invoking any of its public API will result in a GURL_CHECK failure. This also - // means that callers should never use the single argument constructor - // directly. - template <typename U> - using PtrIfSafeToMemmove = std::enable_if_t< - std::is_trivially_copy_assignable<std::remove_const_t<U>>::value, - U*>; - - template <int&... ExplicitArgumentBarrier, typename U = T> - constexpr CheckedContiguousIterator(PtrIfSafeToMemmove<U> ptr) - : start_(ptr), current_(ptr), end_(ptr) {} - - template <int&... ExplicitArgumentBarrier, typename U = T> - friend constexpr PtrIfSafeToMemmove<U> __unwrap_iter( - CheckedContiguousIterator iter) { - return iter.current_; - } + // Required for certain libc++ algorithm optimizations that are not available + // for NaCl. +#if defined(_LIBCPP_VERSION) && !defined(OS_NACL) + template <typename Ptr> + friend struct std::pointer_traits; #endif + constexpr CheckedContiguousIterator() = default; + constexpr CheckedContiguousIterator(T* start, const T* end) : CheckedContiguousIterator(start, start, end) {} + constexpr CheckedContiguousIterator(const T* start, T* current, const T* end) : start_(start), current_(current), end_(end) { GURL_CHECK_LE(start, current); GURL_CHECK_LE(current, end); } + constexpr CheckedContiguousIterator(const CheckedContiguousIterator& other) = default; @@ -269,4 +217,49 @@ } // namespace base +#if defined(_LIBCPP_VERSION) && !defined(OS_NACL) +// Specialize both std::__is_cpp17_contiguous_iterator and std::pointer_traits +// for CCI in case we compile with libc++ outside of NaCl. The former is +// required to enable certain algorithm optimizations (e.g. std::copy can be a +// simple std::memmove under certain circumstances), and is a precursor to +// C++20's std::contiguous_iterator concept [1]. Once we actually use C++20 it +// will be enough to add `using iterator_concept = std::contiguous_iterator_tag` +// to the iterator class [2], and we can get rid of this non-standard +// specialization. +// +// The latter is required to obtain the underlying raw pointer without resulting +// in GURL_CHECK failures. The important bit is the `to_address(pointer)` overload, +// which is the standard blessed way to customize `std::to_address(pointer)` in +// C++20 [3]. +// +// [1] https://wg21.link/iterator.concept.contiguous +// [2] https://wg21.link/std.iterator.tags +// [3] https://wg21.link/pointer.traits.optmem +namespace std { + +template <typename T> +struct __is_cpp17_contiguous_iterator<::gurl_base::CheckedContiguousIterator<T>> + : true_type {}; + +template <typename T> +struct pointer_traits<::gurl_base::CheckedContiguousIterator<T>> { + using pointer = ::gurl_base::CheckedContiguousIterator<T>; + using element_type = T; + using difference_type = ptrdiff_t; + + template <typename U> + using rebind = ::gurl_base::CheckedContiguousIterator<U>; + + static constexpr pointer pointer_to(element_type& r) noexcept { + return pointer(&r, &r); + } + + static constexpr element_type* to_address(pointer p) noexcept { + return p.current_; + } +}; + +} // namespace std +#endif + #endif // BASE_CONTAINERS_CHECKED_ITERATORS_H_
diff --git a/base/containers/contains.h b/base/containers/contains.h new file mode 100644 index 0000000..55b1fb5 --- /dev/null +++ b/base/containers/contains.h
@@ -0,0 +1,98 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_CONTAINERS_CONTAINS_H_ +#define BASE_CONTAINERS_CONTAINS_H_ + +#include <type_traits> +#include <utility> + +#include "base/ranges/algorithm.h" +#include "base/ranges/ranges.h" +#include "base/template_util.h" + +namespace gurl_base { + +namespace internal { + +// Small helper to detect whether a given type has a nested `key_type` typedef. +// Used below to catch misuses of the API for associative containers. +template <typename T, typename SFINAE = void> +struct HasKeyType : std::false_type {}; + +template <typename T> +struct HasKeyType<T, void_t<typename T::key_type>> : std::true_type {}; + +// Probe whether a `contains` member function exists and return the result of +// `container.contains(value)` if this is a valid expression. This is the +// highest priority option. +template <typename Container, typename Value> +constexpr auto ContainsImpl(const Container& container, + const Value& value, + priority_tag<2>) + -> decltype(container.contains(value)) { + return container.contains(value); +} + +// Probe whether a `find` member function exists and whether its return value +// can be compared with `container.end()`. Intended for STL style maps and sets +// that lack a `contains` member function. +template <typename Container, typename Value> +constexpr auto ContainsImpl(const Container& container, + const Value& value, + priority_tag<1>) + -> decltype(container.find(value) != container.end()) { + return container.find(value) != container.end(); +} + +// Probe whether a `find` member function exists and whether its return value +// can be compared with `Container::npos`. Intended for STL style strings that +// lack a `contains` member function. +template <typename Container, typename Value> +constexpr auto ContainsImpl(const Container& container, + const Value& value, + priority_tag<1>) + -> decltype(container.find(value) != Container::npos) { + return container.find(value) != Container::npos; +} + +// Generic fallback option, using a linear search over `container` to find +// `value`. Has the lowest priority. This will not compile for associative +// containers, as this likely is a performance bug. +template <typename Container, typename Value> +constexpr bool ContainsImpl(const Container& container, + const Value& value, + priority_tag<0>) { + static_assert( + !HasKeyType<Container>::value, + "Error: About to perform linear search on an associative container. " + "Either use a more generic comparator (e.g. std::less<>) or, if a linear " + "search is desired, provide an explicit projection parameter."); + return ranges::find(container, value) != ranges::end(container); +} + +} // namespace internal + +// A general purpose utility to check whether `container` contains `value`. This +// will probe whether a `contains` or `find` member function on `container` +// exists, and fall back to a generic linear search over `container`. +template <typename Container, typename Value> +constexpr bool Contains(const Container& container, const Value& value) { + return internal::ContainsImpl(container, value, internal::priority_tag<2>()); +} + +// Overload that allows to provide an additional projection invocable. This +// projection will be applied to every element in `container` before comparing +// it with `value`. This will always perform a linear search. +template <typename Container, typename Value, typename Proj> +constexpr bool Contains(const Container& container, + const Value& value, + Proj proj) { + return ranges::find(container, value, std::move(proj)) != + ranges::end(container); +} + +} // namespace base + +#endif // BASE_CONTAINERS_CONTAINS_H_
diff --git a/base/containers/contiguous_iterator.h b/base/containers/contiguous_iterator.h index 48b2755..ca8e7b3 100644 --- a/base/containers/contiguous_iterator.h +++ b/base/containers/contiguous_iterator.h
@@ -95,8 +95,7 @@ // - https://wg21.link/n4284 template <typename T> struct IsContiguousIterator - : internal::IsContiguousIteratorImpl< - std::remove_cv_t<std::remove_reference_t<T>>> {}; + : internal::IsContiguousIteratorImpl<remove_cvref_t<T>> {}; } // namespace base
diff --git a/base/containers/span.h b/base/containers/span.h index 53b6965..1a54de1 100644 --- a/base/containers/span.h +++ b/base/containers/span.h
@@ -47,7 +47,7 @@ struct ExtentImpl<gurl_base::span<T, N>> : size_constant<N> {}; template <typename T> -using Extent = ExtentImpl<std::remove_cv_t<std::remove_reference_t<T>>>; +using Extent = ExtentImpl<remove_cvref_t<T>>; template <typename T> struct IsSpanImpl : std::false_type {}; @@ -263,7 +263,27 @@ template <typename It, typename = internal::EnableIfCompatibleContiguousIterator<It, T>> constexpr span(It first, size_t count) noexcept - : ExtentStorage(count), data_(gurl_base::to_address(first)) { + : ExtentStorage(count), + // The use of to_address() here is to handle the case where the iterator + // `first` is pointing to the container's `end()`. In that case we can + // not use the address returned from the iterator, or dereference it + // through the iterator's `operator*`, but we can store it. We must assume + // in this case that `count` is 0, since the iterator does not point to + // valid data. Future hardening of iterators may disallow pulling the + // address from `end()`, as demonstrated by asserts() in libstdc++: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93960. + // + // The span API dictates that the `data()` is accessible when size is 0, + // since the pointer may be valid, so we cannot prevent storing and + // giving out an invalid pointer here without breaking API compatibility + // and our unit tests. Thus protecting against this can likely only be + // successful from inside iterators themselves, where the context about + // the pointer is known. + // + // We can not protect here generally against an invalid iterator/count + // being passed in, since we have no context to determine if the + // iterator or count are valid. + data_(gurl_base::to_address(first)) { GURL_CHECK(Extent == dynamic_extent || Extent == count); }
diff --git a/base/cxx17_backports.h b/base/cxx17_backports.h new file mode 100644 index 0000000..6378a78 --- /dev/null +++ b/base/cxx17_backports.h
@@ -0,0 +1,94 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_CXX17_BACKPORTS_H_ +#define BASE_CXX17_BACKPORTS_H_ + +#include <array> +#include <initializer_list> +#include <memory> +#include <string> + +namespace gurl_base { + +// C++14 implementation of C++17's std::size(): +// http://en.cppreference.com/w/cpp/iterator/size +template <typename Container> +constexpr auto size(const Container& c) -> decltype(c.size()) { + return c.size(); +} + +template <typename T, size_t N> +constexpr size_t size(const T (&array)[N]) noexcept { + return N; +} + +// C++14 implementation of C++17's std::empty(): +// http://en.cppreference.com/w/cpp/iterator/empty +template <typename Container> +constexpr auto empty(const Container& c) -> decltype(c.empty()) { + return c.empty(); +} + +template <typename T, size_t N> +constexpr bool empty(const T (&array)[N]) noexcept { + return false; +} + +template <typename T> +constexpr bool empty(std::initializer_list<T> il) noexcept { + return il.size() == 0; +} + +// C++14 implementation of C++17's std::data(): +// http://en.cppreference.com/w/cpp/iterator/data +template <typename Container> +constexpr auto data(Container& c) -> decltype(c.data()) { + return c.data(); +} + +// std::basic_string::data() had no mutable overload prior to C++17 [1]. +// Hence this overload is provided. +// Note: str[0] is safe even for empty strings, as they are guaranteed to be +// null-terminated [2]. +// +// [1] http://en.cppreference.com/w/cpp/string/basic_string/data +// [2] http://en.cppreference.com/w/cpp/string/basic_string/operator_at +template <typename CharT, typename Traits, typename Allocator> +CharT* data(std::basic_string<CharT, Traits, Allocator>& str) { + return std::addressof(str[0]); +} + +template <typename Container> +constexpr auto data(const Container& c) -> decltype(c.data()) { + return c.data(); +} + +template <typename T, size_t N> +constexpr T* data(T (&array)[N]) noexcept { + return array; +} + +template <typename T> +constexpr const T* data(std::initializer_list<T> il) noexcept { + return il.begin(); +} + +// std::array::data() was not constexpr prior to C++17 [1]. +// Hence these overloads are provided. +// +// [1] https://en.cppreference.com/w/cpp/container/array/data +template <typename T, size_t N> +constexpr T* data(std::array<T, N>& array) noexcept { + return !array.empty() ? &array[0] : nullptr; +} + +template <typename T, size_t N> +constexpr const T* data(const std::array<T, N>& array) noexcept { + return !array.empty() ? &array[0] : nullptr; +} + +} // namespace base + +#endif // BASE_CXX17_BACKPORTS_H_
diff --git a/base/optional.h b/base/optional.h deleted file mode 100644 index c946364..0000000 --- a/base/optional.h +++ /dev/null
@@ -1,953 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_OPTIONAL_H_ -#define BASE_OPTIONAL_H_ - -#include <functional> -#include <type_traits> -#include <utility> - -#include "polyfills/base/check.h" -#include "base/template_util.h" - -namespace gurl_base { - -// Specification: -// http://en.cppreference.com/w/cpp/utility/optional/nullopt_t -struct nullopt_t { - constexpr explicit nullopt_t(int) {} -}; - -// Specification: -// http://en.cppreference.com/w/cpp/utility/optional/nullopt -constexpr nullopt_t nullopt(0); - -// Forward declaration, which is refered by following helpers. -template <typename T> -class Optional; - -namespace internal { - -struct DummyUnionMember {}; - -template <typename T, bool = std::is_trivially_destructible<T>::value> -struct OptionalStorageBase { - // Provide non-defaulted default ctor to make sure it's not deleted by - // non-trivial T::T() in the union. - constexpr OptionalStorageBase() : dummy_() {} - - template <class... Args> - constexpr explicit OptionalStorageBase(in_place_t, Args&&... args) - : is_populated_(true), value_(std::forward<Args>(args)...) {} - - // When T is not trivially destructible we must call its - // destructor before deallocating its memory. - // Note that this hides the (implicitly declared) move constructor, which - // would be used for constexpr move constructor in OptionalStorage<T>. - // It is needed iff T is trivially move constructible. However, the current - // is_trivially_{copy,move}_constructible implementation requires - // is_trivially_destructible (which looks a bug, cf: - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51452 and - // http://cplusplus.github.io/LWG/lwg-active.html#2116), so it is not - // necessary for this case at the moment. Please see also the destructor - // comment in "is_trivially_destructible = true" specialization below. - ~OptionalStorageBase() { - if (is_populated_) - value_.~T(); - } - - template <class... Args> - void Init(Args&&... args) { - GURL_DCHECK(!is_populated_); - ::new (std::addressof(value_)) T(std::forward<Args>(args)...); - is_populated_ = true; - } - - bool is_populated_ = false; - union { - // |dummy_| exists so that the union will always be initialized, even when - // it doesn't contain a value. Union members must be initialized for the - // constructor to be 'constexpr'. Having a special trivial class for it is - // better than e.g. using char, because the latter will have to be - // zero-initialized, and the compiler can't optimize this write away, since - // it assumes this might be a programmer's invariant. This can also cause - // problems for conservative GC in Oilpan. Compiler is free to split shared - // and non-shared parts of the union in separate memory locations (or - // registers). If conservative GC is triggered at this moment, the stack - // scanning routine won't find the correct object pointed from - // Optional<HeapObject*>. This dummy valueless struct lets the compiler know - // that we don't care about the value of this union member. - DummyUnionMember dummy_; - T value_; - }; -}; - -template <typename T> -struct OptionalStorageBase<T, true /* trivially destructible */> { - // Provide non-defaulted default ctor to make sure it's not deleted by - // non-trivial T::T() in the union. - constexpr OptionalStorageBase() : dummy_() {} - - template <class... Args> - constexpr explicit OptionalStorageBase(in_place_t, Args&&... args) - : is_populated_(true), value_(std::forward<Args>(args)...) {} - - // When T is trivially destructible (i.e. its destructor does nothing) there - // is no need to call it. Implicitly defined destructor is trivial, because - // both members (bool and union containing only variants which are trivially - // destructible) are trivially destructible. - // Explicitly-defaulted destructor is also trivial, but do not use it here, - // because it hides the implicit move constructor. It is needed to implement - // constexpr move constructor in OptionalStorage iff T is trivially move - // constructible. Note that, if T is trivially move constructible, the move - // constructor of OptionalStorageBase<T> is also implicitly defined and it is - // trivially move constructor. If T is not trivially move constructible, - // "not declaring move constructor without destructor declaration" here means - // "delete move constructor", which works because any move constructor of - // OptionalStorage will not refer to it in that case. - - template <class... Args> - void Init(Args&&... args) { - GURL_DCHECK(!is_populated_); - ::new (std::addressof(value_)) T(std::forward<Args>(args)...); - is_populated_ = true; - } - - bool is_populated_ = false; - union { - // |dummy_| exists so that the union will always be initialized, even when - // it doesn't contain a value. Union members must be initialized for the - // constructor to be 'constexpr'. Having a special trivial class for it is - // better than e.g. using char, because the latter will have to be - // zero-initialized, and the compiler can't optimize this write away, since - // it assumes this might be a programmer's invariant. This can also cause - // problems for conservative GC in Oilpan. Compiler is free to split shared - // and non-shared parts of the union in separate memory locations (or - // registers). If conservative GC is triggered at this moment, the stack - // scanning routine won't find the correct object pointed from - // Optional<HeapObject*>. This dummy valueless struct lets the compiler know - // that we don't care about the value of this union member. - DummyUnionMember dummy_; - T value_; - }; -}; - -// Implement conditional constexpr copy and move constructors. These are -// constexpr if is_trivially_{copy,move}_constructible<T>::value is true -// respectively. If each is true, the corresponding constructor is defined as -// "= default;", which generates a constexpr constructor (In this case, -// the condition of constexpr-ness is satisfied because the base class also has -// compiler generated constexpr {copy,move} constructors). Note that -// placement-new is prohibited in constexpr. -template <typename T, - bool = is_trivially_copy_constructible<T>::value, - bool = std::is_trivially_move_constructible<T>::value> -struct OptionalStorage : OptionalStorageBase<T> { - // This is no trivially {copy,move} constructible case. Other cases are - // defined below as specializations. - - // Accessing the members of template base class requires explicit - // declaration. - using OptionalStorageBase<T>::is_populated_; - using OptionalStorageBase<T>::value_; - using OptionalStorageBase<T>::Init; - - // Inherit constructors (specifically, the in_place constructor). - using OptionalStorageBase<T>::OptionalStorageBase; - - // User defined constructor deletes the default constructor. - // Define it explicitly. - OptionalStorage() = default; - - OptionalStorage(const OptionalStorage& other) { - if (other.is_populated_) - Init(other.value_); - } - - OptionalStorage(OptionalStorage&& other) noexcept( - std::is_nothrow_move_constructible<T>::value) { - if (other.is_populated_) - Init(std::move(other.value_)); - } -}; - -template <typename T> -struct OptionalStorage<T, - true /* trivially copy constructible */, - false /* trivially move constructible */> - : OptionalStorageBase<T> { - using OptionalStorageBase<T>::is_populated_; - using OptionalStorageBase<T>::value_; - using OptionalStorageBase<T>::Init; - using OptionalStorageBase<T>::OptionalStorageBase; - - OptionalStorage() = default; - OptionalStorage(const OptionalStorage& other) = default; - - OptionalStorage(OptionalStorage&& other) noexcept( - std::is_nothrow_move_constructible<T>::value) { - if (other.is_populated_) - Init(std::move(other.value_)); - } -}; - -template <typename T> -struct OptionalStorage<T, - false /* trivially copy constructible */, - true /* trivially move constructible */> - : OptionalStorageBase<T> { - using OptionalStorageBase<T>::is_populated_; - using OptionalStorageBase<T>::value_; - using OptionalStorageBase<T>::Init; - using OptionalStorageBase<T>::OptionalStorageBase; - - OptionalStorage() = default; - OptionalStorage(OptionalStorage&& other) = default; - - OptionalStorage(const OptionalStorage& other) { - if (other.is_populated_) - Init(other.value_); - } -}; - -template <typename T> -struct OptionalStorage<T, - true /* trivially copy constructible */, - true /* trivially move constructible */> - : OptionalStorageBase<T> { - // If both trivially {copy,move} constructible are true, it is not necessary - // to use user-defined constructors. So, just inheriting constructors - // from the base class works. - using OptionalStorageBase<T>::OptionalStorageBase; -}; - -// Base class to support conditionally usable copy-/move- constructors -// and assign operators. -template <typename T> -class OptionalBase { - // This class provides implementation rather than public API, so everything - // should be hidden. Often we use composition, but we cannot in this case - // because of C++ language restriction. - protected: - constexpr OptionalBase() = default; - constexpr OptionalBase(const OptionalBase& other) = default; - constexpr OptionalBase(OptionalBase&& other) = default; - - template <class... Args> - constexpr explicit OptionalBase(in_place_t, Args&&... args) - : storage_(in_place, std::forward<Args>(args)...) {} - - // Implementation of converting constructors. - template <typename U> - explicit OptionalBase(const OptionalBase<U>& other) { - if (other.storage_.is_populated_) - storage_.Init(other.storage_.value_); - } - - template <typename U> - explicit OptionalBase(OptionalBase<U>&& other) { - if (other.storage_.is_populated_) - storage_.Init(std::move(other.storage_.value_)); - } - - ~OptionalBase() = default; - - OptionalBase& operator=(const OptionalBase& other) { - CopyAssign(other); - return *this; - } - - OptionalBase& operator=(OptionalBase&& other) noexcept( - std::is_nothrow_move_assignable<T>::value&& - std::is_nothrow_move_constructible<T>::value) { - MoveAssign(std::move(other)); - return *this; - } - - template <typename U> - void CopyAssign(const OptionalBase<U>& other) { - if (other.storage_.is_populated_) - InitOrAssign(other.storage_.value_); - else - FreeIfNeeded(); - } - - template <typename U> - void MoveAssign(OptionalBase<U>&& other) { - if (other.storage_.is_populated_) - InitOrAssign(std::move(other.storage_.value_)); - else - FreeIfNeeded(); - } - - template <typename U> - void InitOrAssign(U&& value) { - if (storage_.is_populated_) - storage_.value_ = std::forward<U>(value); - else - storage_.Init(std::forward<U>(value)); - } - - void FreeIfNeeded() { - if (!storage_.is_populated_) - return; - storage_.value_.~T(); - storage_.is_populated_ = false; - } - - // For implementing conversion, allow access to other typed OptionalBase - // class. - template <typename U> - friend class OptionalBase; - - OptionalStorage<T> storage_; -}; - -// The following {Copy,Move}{Constructible,Assignable} structs are helpers to -// implement constructor/assign-operator overloading. Specifically, if T is -// is not movable but copyable, Optional<T>'s move constructor should not -// participate in overload resolution. This inheritance trick implements that. -template <bool is_copy_constructible> -struct CopyConstructible {}; - -template <> -struct CopyConstructible<false> { - constexpr CopyConstructible() = default; - constexpr CopyConstructible(const CopyConstructible&) = delete; - constexpr CopyConstructible(CopyConstructible&&) = default; - CopyConstructible& operator=(const CopyConstructible&) = default; - CopyConstructible& operator=(CopyConstructible&&) = default; -}; - -template <bool is_move_constructible> -struct MoveConstructible {}; - -template <> -struct MoveConstructible<false> { - constexpr MoveConstructible() = default; - constexpr MoveConstructible(const MoveConstructible&) = default; - constexpr MoveConstructible(MoveConstructible&&) = delete; - MoveConstructible& operator=(const MoveConstructible&) = default; - MoveConstructible& operator=(MoveConstructible&&) = default; -}; - -template <bool is_copy_assignable> -struct CopyAssignable {}; - -template <> -struct CopyAssignable<false> { - constexpr CopyAssignable() = default; - constexpr CopyAssignable(const CopyAssignable&) = default; - constexpr CopyAssignable(CopyAssignable&&) = default; - CopyAssignable& operator=(const CopyAssignable&) = delete; - CopyAssignable& operator=(CopyAssignable&&) = default; -}; - -template <bool is_move_assignable> -struct MoveAssignable {}; - -template <> -struct MoveAssignable<false> { - constexpr MoveAssignable() = default; - constexpr MoveAssignable(const MoveAssignable&) = default; - constexpr MoveAssignable(MoveAssignable&&) = default; - MoveAssignable& operator=(const MoveAssignable&) = default; - MoveAssignable& operator=(MoveAssignable&&) = delete; -}; - -// Helper to conditionally enable converting constructors and assign operators. -template <typename T, typename U> -using IsConvertibleFromOptional = - disjunction<std::is_constructible<T, Optional<U>&>, - std::is_constructible<T, const Optional<U>&>, - std::is_constructible<T, Optional<U>&&>, - std::is_constructible<T, const Optional<U>&&>, - std::is_convertible<Optional<U>&, T>, - std::is_convertible<const Optional<U>&, T>, - std::is_convertible<Optional<U>&&, T>, - std::is_convertible<const Optional<U>&&, T>>; - -template <typename T, typename U> -using IsAssignableFromOptional = - disjunction<IsConvertibleFromOptional<T, U>, - std::is_assignable<T&, Optional<U>&>, - std::is_assignable<T&, const Optional<U>&>, - std::is_assignable<T&, Optional<U>&&>, - std::is_assignable<T&, const Optional<U>&&>>; - -// Forward compatibility for C++17. -// Introduce one more deeper nested namespace to avoid leaking using std::swap. -namespace swappable_impl { -using std::swap; - -struct IsSwappableImpl { - // Tests if swap can be called. Check<T&>(0) returns true_type iff swap - // is available for T. Otherwise, Check's overload resolution falls back - // to Check(...) declared below thanks to SFINAE, so returns false_type. - template <typename T> - static auto Check(int) - -> decltype(swap(std::declval<T>(), std::declval<T>()), std::true_type()); - - template <typename T> - static std::false_type Check(...); -}; -} // namespace swappable_impl - -template <typename T> -struct IsSwappable : decltype(swappable_impl::IsSwappableImpl::Check<T&>(0)) {}; - -// Forward compatibility for C++20. -template <typename T> -using RemoveCvRefT = std::remove_cv_t<std::remove_reference_t<T>>; - -} // namespace internal - -// On Windows, by default, empty-base class optimization does not work, -// which means even if the base class is empty struct, it still consumes one -// byte for its body. __declspec(empty_bases) enables the optimization. -// cf) -// https://blogs.msdn.microsoft.com/vcblog/2016/03/30/optimizing-the-layout-of-empty-base-classes-in-vs2015-update-2-3/ -#ifdef OS_WIN -#define OPTIONAL_DECLSPEC_EMPTY_BASES __declspec(empty_bases) -#else -#define OPTIONAL_DECLSPEC_EMPTY_BASES -#endif - -// gurl_base::Optional is a Chromium version of the C++17 optional class: -// std::optional documentation: -// http://en.cppreference.com/w/cpp/utility/optional -// Chromium documentation: -// https://chromium.googlesource.com/chromium/src/+/master/docs/optional.md -// -// These are the differences between the specification and the implementation: -// - Constructors do not use 'constexpr' as it is a C++14 extension. -// - 'constexpr' might be missing in some places for reasons specified locally. -// - No exceptions are thrown, because they are banned from Chromium. -// Marked noexcept for only move constructor and move assign operators. -// - All the non-members are in the 'base' namespace instead of 'std'. -// -// Note that T cannot have a constructor T(Optional<T>) etc. Optional<T> checks -// T's constructor (specifically via IsConvertibleFromOptional), and in the -// check whether T can be constructible from Optional<T>, which is recursive -// so it does not work. As of Feb 2018, std::optional C++17 implementation in -// both clang and gcc has same limitation. MSVC SFINAE looks to have different -// behavior, but anyway it reports an error, too. -template <typename T> -class OPTIONAL_DECLSPEC_EMPTY_BASES Optional - : public internal::OptionalBase<T>, - public internal::CopyConstructible<std::is_copy_constructible<T>::value>, - public internal::MoveConstructible<std::is_move_constructible<T>::value>, - public internal::CopyAssignable<std::is_copy_constructible<T>::value && - std::is_copy_assignable<T>::value>, - public internal::MoveAssignable<std::is_move_constructible<T>::value && - std::is_move_assignable<T>::value> { - private: - // Disable some versions of T that are ill-formed. - // See: https://timsong-cpp.github.io/cppwp/n4659/optional#syn-1 - static_assert( - !std::is_same<internal::RemoveCvRefT<T>, in_place_t>::value, - "instantiation of gurl_base::Optional with in_place_t is ill-formed"); - static_assert(!std::is_same<internal::RemoveCvRefT<T>, nullopt_t>::value, - "instantiation of gurl_base::Optional with nullopt_t is ill-formed"); - static_assert( - !std::is_reference<T>::value, - "instantiation of gurl_base::Optional with a reference type is ill-formed"); - // See: https://timsong-cpp.github.io/cppwp/n4659/optional#optional-3 - static_assert(std::is_destructible<T>::value, - "instantiation of gurl_base::Optional with a non-destructible type " - "is ill-formed"); - // Arrays are explicitly disallowed because for arrays of known bound - // is_destructible is of undefined value. - // See: https://en.cppreference.com/w/cpp/types/is_destructible - static_assert( - !std::is_array<T>::value, - "instantiation of gurl_base::Optional with an array type is ill-formed"); - - public: -#undef OPTIONAL_DECLSPEC_EMPTY_BASES - using value_type = T; - - // Defer default/copy/move constructor implementation to OptionalBase. - constexpr Optional() = default; - constexpr Optional(const Optional& other) = default; - constexpr Optional(Optional&& other) noexcept( - std::is_nothrow_move_constructible<T>::value) = default; - - constexpr Optional(nullopt_t) {} // NOLINT(runtime/explicit) - - // Converting copy constructor. "explicit" only if - // std::is_convertible<const U&, T>::value is false. It is implemented by - // declaring two almost same constructors, but that condition in enable_if_t - // is different, so that either one is chosen, thanks to SFINAE. - template < - typename U, - std::enable_if_t<std::is_constructible<T, const U&>::value && - !internal::IsConvertibleFromOptional<T, U>::value && - std::is_convertible<const U&, T>::value, - bool> = false> - Optional(const Optional<U>& other) : internal::OptionalBase<T>(other) {} - - template < - typename U, - std::enable_if_t<std::is_constructible<T, const U&>::value && - !internal::IsConvertibleFromOptional<T, U>::value && - !std::is_convertible<const U&, T>::value, - bool> = false> - explicit Optional(const Optional<U>& other) - : internal::OptionalBase<T>(other) {} - - // Converting move constructor. Similar to converting copy constructor, - // declaring two (explicit and non-explicit) constructors. - template < - typename U, - std::enable_if_t<std::is_constructible<T, U&&>::value && - !internal::IsConvertibleFromOptional<T, U>::value && - std::is_convertible<U&&, T>::value, - bool> = false> - Optional(Optional<U>&& other) : internal::OptionalBase<T>(std::move(other)) {} - - template < - typename U, - std::enable_if_t<std::is_constructible<T, U&&>::value && - !internal::IsConvertibleFromOptional<T, U>::value && - !std::is_convertible<U&&, T>::value, - bool> = false> - explicit Optional(Optional<U>&& other) - : internal::OptionalBase<T>(std::move(other)) {} - - template <class... Args> - constexpr explicit Optional(in_place_t, Args&&... args) - : internal::OptionalBase<T>(in_place, std::forward<Args>(args)...) {} - - template < - class U, - class... Args, - class = std::enable_if_t<std::is_constructible<value_type, - std::initializer_list<U>&, - Args...>::value>> - constexpr explicit Optional(in_place_t, - std::initializer_list<U> il, - Args&&... args) - : internal::OptionalBase<T>(in_place, il, std::forward<Args>(args)...) {} - - // Forward value constructor. Similar to converting constructors, - // conditionally explicit. - template < - typename U = value_type, - std::enable_if_t< - std::is_constructible<T, U&&>::value && - !std::is_same<internal::RemoveCvRefT<U>, in_place_t>::value && - !std::is_same<internal::RemoveCvRefT<U>, Optional<T>>::value && - std::is_convertible<U&&, T>::value, - bool> = false> - constexpr Optional(U&& value) - : internal::OptionalBase<T>(in_place, std::forward<U>(value)) {} - - template < - typename U = value_type, - std::enable_if_t< - std::is_constructible<T, U&&>::value && - !std::is_same<internal::RemoveCvRefT<U>, in_place_t>::value && - !std::is_same<internal::RemoveCvRefT<U>, Optional<T>>::value && - !std::is_convertible<U&&, T>::value, - bool> = false> - constexpr explicit Optional(U&& value) - : internal::OptionalBase<T>(in_place, std::forward<U>(value)) {} - - ~Optional() = default; - - // Defer copy-/move- assign operator implementation to OptionalBase. - Optional& operator=(const Optional& other) = default; - Optional& operator=(Optional&& other) noexcept( - std::is_nothrow_move_assignable<T>::value&& - std::is_nothrow_move_constructible<T>::value) = default; - - Optional& operator=(nullopt_t) { - FreeIfNeeded(); - return *this; - } - - // Perfect-forwarded assignment. - template <typename U> - std::enable_if_t< - !std::is_same<internal::RemoveCvRefT<U>, Optional<T>>::value && - std::is_constructible<T, U>::value && - std::is_assignable<T&, U>::value && - (!std::is_scalar<T>::value || - !std::is_same<std::decay_t<U>, T>::value), - Optional&> - operator=(U&& value) { - InitOrAssign(std::forward<U>(value)); - return *this; - } - - // Copy assign the state of other. - template <typename U> - std::enable_if_t<!internal::IsAssignableFromOptional<T, U>::value && - std::is_constructible<T, const U&>::value && - std::is_assignable<T&, const U&>::value, - Optional&> - operator=(const Optional<U>& other) { - CopyAssign(other); - return *this; - } - - // Move assign the state of other. - template <typename U> - std::enable_if_t<!internal::IsAssignableFromOptional<T, U>::value && - std::is_constructible<T, U>::value && - std::is_assignable<T&, U>::value, - Optional&> - operator=(Optional<U>&& other) { - MoveAssign(std::move(other)); - return *this; - } - - constexpr const T* operator->() const { - GURL_CHECK(storage_.is_populated_); - return std::addressof(storage_.value_); - } - - constexpr T* operator->() { - GURL_CHECK(storage_.is_populated_); - return std::addressof(storage_.value_); - } - - constexpr const T& operator*() const & { - GURL_CHECK(storage_.is_populated_); - return storage_.value_; - } - - constexpr T& operator*() & { - GURL_CHECK(storage_.is_populated_); - return storage_.value_; - } - - constexpr const T&& operator*() const && { - GURL_CHECK(storage_.is_populated_); - return std::move(storage_.value_); - } - - constexpr T&& operator*() && { - GURL_CHECK(storage_.is_populated_); - return std::move(storage_.value_); - } - - constexpr explicit operator bool() const { return storage_.is_populated_; } - - constexpr bool has_value() const { return storage_.is_populated_; } - - constexpr T& value() & { - GURL_CHECK(storage_.is_populated_); - return storage_.value_; - } - - constexpr const T& value() const & { - GURL_CHECK(storage_.is_populated_); - return storage_.value_; - } - - constexpr T&& value() && { - GURL_CHECK(storage_.is_populated_); - return std::move(storage_.value_); - } - - constexpr const T&& value() const && { - GURL_CHECK(storage_.is_populated_); - return std::move(storage_.value_); - } - - template <class U> - constexpr T value_or(U&& default_value) const& { - // TODO(mlamouri): add the following assert when possible: - // static_assert(std::is_copy_constructible<T>::value, - // "T must be copy constructible"); - static_assert(std::is_convertible<U, T>::value, - "U must be convertible to T"); - return storage_.is_populated_ - ? storage_.value_ - : static_cast<T>(std::forward<U>(default_value)); - } - - template <class U> - constexpr T value_or(U&& default_value) && { - // TODO(mlamouri): add the following assert when possible: - // static_assert(std::is_move_constructible<T>::value, - // "T must be move constructible"); - static_assert(std::is_convertible<U, T>::value, - "U must be convertible to T"); - return storage_.is_populated_ - ? std::move(storage_.value_) - : static_cast<T>(std::forward<U>(default_value)); - } - - void swap(Optional& other) { - if (!storage_.is_populated_ && !other.storage_.is_populated_) - return; - - if (storage_.is_populated_ != other.storage_.is_populated_) { - if (storage_.is_populated_) { - other.storage_.Init(std::move(storage_.value_)); - FreeIfNeeded(); - } else { - storage_.Init(std::move(other.storage_.value_)); - other.FreeIfNeeded(); - } - return; - } - - GURL_DCHECK(storage_.is_populated_ && other.storage_.is_populated_); - using std::swap; - swap(**this, *other); - } - - void reset() { FreeIfNeeded(); } - - template <class... Args> - T& emplace(Args&&... args) { - FreeIfNeeded(); - storage_.Init(std::forward<Args>(args)...); - return storage_.value_; - } - - template <class U, class... Args> - std::enable_if_t< - std::is_constructible<T, std::initializer_list<U>&, Args&&...>::value, - T&> - emplace(std::initializer_list<U> il, Args&&... args) { - FreeIfNeeded(); - storage_.Init(il, std::forward<Args>(args)...); - return storage_.value_; - } - - private: - // Accessing template base class's protected member needs explicit - // declaration to do so. - using internal::OptionalBase<T>::CopyAssign; - using internal::OptionalBase<T>::FreeIfNeeded; - using internal::OptionalBase<T>::InitOrAssign; - using internal::OptionalBase<T>::MoveAssign; - using internal::OptionalBase<T>::storage_; -}; - -// Here after defines comparation operators. The definition follows -// http://en.cppreference.com/w/cpp/utility/optional/operator_cmp -// while bool() casting is replaced by has_value() to meet the chromium -// style guide. -template <class T, class U> -constexpr bool operator==(const Optional<T>& lhs, const Optional<U>& rhs) { - if (lhs.has_value() != rhs.has_value()) - return false; - if (!lhs.has_value()) - return true; - return *lhs == *rhs; -} - -template <class T, class U> -constexpr bool operator!=(const Optional<T>& lhs, const Optional<U>& rhs) { - if (lhs.has_value() != rhs.has_value()) - return true; - if (!lhs.has_value()) - return false; - return *lhs != *rhs; -} - -template <class T, class U> -constexpr bool operator<(const Optional<T>& lhs, const Optional<U>& rhs) { - if (!rhs.has_value()) - return false; - if (!lhs.has_value()) - return true; - return *lhs < *rhs; -} - -template <class T, class U> -constexpr bool operator<=(const Optional<T>& lhs, const Optional<U>& rhs) { - if (!lhs.has_value()) - return true; - if (!rhs.has_value()) - return false; - return *lhs <= *rhs; -} - -template <class T, class U> -constexpr bool operator>(const Optional<T>& lhs, const Optional<U>& rhs) { - if (!lhs.has_value()) - return false; - if (!rhs.has_value()) - return true; - return *lhs > *rhs; -} - -template <class T, class U> -constexpr bool operator>=(const Optional<T>& lhs, const Optional<U>& rhs) { - if (!rhs.has_value()) - return true; - if (!lhs.has_value()) - return false; - return *lhs >= *rhs; -} - -template <class T> -constexpr bool operator==(const Optional<T>& opt, nullopt_t) { - return !opt; -} - -template <class T> -constexpr bool operator==(nullopt_t, const Optional<T>& opt) { - return !opt; -} - -template <class T> -constexpr bool operator!=(const Optional<T>& opt, nullopt_t) { - return opt.has_value(); -} - -template <class T> -constexpr bool operator!=(nullopt_t, const Optional<T>& opt) { - return opt.has_value(); -} - -template <class T> -constexpr bool operator<(const Optional<T>& opt, nullopt_t) { - return false; -} - -template <class T> -constexpr bool operator<(nullopt_t, const Optional<T>& opt) { - return opt.has_value(); -} - -template <class T> -constexpr bool operator<=(const Optional<T>& opt, nullopt_t) { - return !opt; -} - -template <class T> -constexpr bool operator<=(nullopt_t, const Optional<T>& opt) { - return true; -} - -template <class T> -constexpr bool operator>(const Optional<T>& opt, nullopt_t) { - return opt.has_value(); -} - -template <class T> -constexpr bool operator>(nullopt_t, const Optional<T>& opt) { - return false; -} - -template <class T> -constexpr bool operator>=(const Optional<T>& opt, nullopt_t) { - return true; -} - -template <class T> -constexpr bool operator>=(nullopt_t, const Optional<T>& opt) { - return !opt; -} - -template <class T, class U> -constexpr bool operator==(const Optional<T>& opt, const U& value) { - return opt.has_value() ? *opt == value : false; -} - -template <class T, class U> -constexpr bool operator==(const U& value, const Optional<T>& opt) { - return opt.has_value() ? value == *opt : false; -} - -template <class T, class U> -constexpr bool operator!=(const Optional<T>& opt, const U& value) { - return opt.has_value() ? *opt != value : true; -} - -template <class T, class U> -constexpr bool operator!=(const U& value, const Optional<T>& opt) { - return opt.has_value() ? value != *opt : true; -} - -template <class T, class U> -constexpr bool operator<(const Optional<T>& opt, const U& value) { - return opt.has_value() ? *opt < value : true; -} - -template <class T, class U> -constexpr bool operator<(const U& value, const Optional<T>& opt) { - return opt.has_value() ? value < *opt : false; -} - -template <class T, class U> -constexpr bool operator<=(const Optional<T>& opt, const U& value) { - return opt.has_value() ? *opt <= value : true; -} - -template <class T, class U> -constexpr bool operator<=(const U& value, const Optional<T>& opt) { - return opt.has_value() ? value <= *opt : false; -} - -template <class T, class U> -constexpr bool operator>(const Optional<T>& opt, const U& value) { - return opt.has_value() ? *opt > value : false; -} - -template <class T, class U> -constexpr bool operator>(const U& value, const Optional<T>& opt) { - return opt.has_value() ? value > *opt : true; -} - -template <class T, class U> -constexpr bool operator>=(const Optional<T>& opt, const U& value) { - return opt.has_value() ? *opt >= value : false; -} - -template <class T, class U> -constexpr bool operator>=(const U& value, const Optional<T>& opt) { - return opt.has_value() ? value >= *opt : true; -} - -template <class T> -constexpr Optional<std::decay_t<T>> make_optional(T&& value) { - return Optional<std::decay_t<T>>(std::forward<T>(value)); -} - -template <class T, class... Args> -constexpr Optional<T> make_optional(Args&&... args) { - return Optional<T>(in_place, std::forward<Args>(args)...); -} - -template <class T, class U, class... Args> -constexpr Optional<T> make_optional(std::initializer_list<U> il, - Args&&... args) { - return Optional<T>(in_place, il, std::forward<Args>(args)...); -} - -// Partial specialization for a function template is not allowed. Also, it is -// not allowed to add overload function to std namespace, while it is allowed -// to specialize the template in std. Thus, swap() (kind of) overloading is -// defined in base namespace, instead. -template <class T> -std::enable_if_t<std::is_move_constructible<T>::value && - internal::IsSwappable<T>::value> -swap(Optional<T>& lhs, Optional<T>& rhs) { - lhs.swap(rhs); -} - -} // namespace base - -namespace std { - -template <class T> -struct hash<gurl_base::Optional<T>> { - size_t operator()(const gurl_base::Optional<T>& opt) const { - return opt == gurl_base::nullopt ? 0 : std::hash<T>()(*opt); - } -}; - -} // namespace std - -#endif // BASE_OPTIONAL_H_
diff --git a/base/ranges/algorithm.h b/base/ranges/algorithm.h index e6432f8..b405d2f 100644 --- a/base/ranges/algorithm.h +++ b/base/ranges/algorithm.h
@@ -924,6 +924,17 @@ Pred pred = {}, Proj1 proj1 = {}, Proj2 proj2 = {}) { + if (gurl_base::is_constant_evaluated()) { + for (; first1 != last1 && first2 != last2; ++first1, ++first2) { + if (!gurl_base::invoke(pred, gurl_base::invoke(proj1, *first1), + gurl_base::invoke(proj2, *first2))) { + return false; + } + } + + return first1 == last1 && first2 == last2; + } + return std::equal(first1, last1, first2, last2, internal::ProjectedBinaryPredicate(pred, proj1, proj2)); }
diff --git a/base/stl_util.h b/base/stl_util.h index 29f200e..609b71a 100644 --- a/base/stl_util.h +++ b/base/stl_util.h
@@ -10,13 +10,12 @@ #include <algorithm> #include <deque> #include <forward_list> -#include <functional> -#include <initializer_list> #include <iterator> #include <list> #include <map> #include <set> #include <string> +#include <tuple> #include <type_traits> #include <unordered_map> #include <unordered_set> @@ -24,10 +23,9 @@ #include <vector> #include "polyfills/base/check.h" -#include "base/containers/contains.h" -#include "base/optional.h" +#include "base/cxx17_backports.h" #include "base/ranges/algorithm.h" -#include "base/template_util.h" +#include "absl/types/optional.h" namespace gurl_base { @@ -54,93 +52,6 @@ } // namespace internal -// C++14 implementation of C++17's std::size(): -// http://en.cppreference.com/w/cpp/iterator/size -template <typename Container> -constexpr auto size(const Container& c) -> decltype(c.size()) { - return c.size(); -} - -template <typename T, size_t N> -constexpr size_t size(const T (&array)[N]) noexcept { - return N; -} - -// C++14 implementation of C++17's std::empty(): -// http://en.cppreference.com/w/cpp/iterator/empty -template <typename Container> -constexpr auto empty(const Container& c) -> decltype(c.empty()) { - return c.empty(); -} - -template <typename T, size_t N> -constexpr bool empty(const T (&array)[N]) noexcept { - return false; -} - -template <typename T> -constexpr bool empty(std::initializer_list<T> il) noexcept { - return il.size() == 0; -} - -// C++14 implementation of C++17's std::data(): -// http://en.cppreference.com/w/cpp/iterator/data -template <typename Container> -constexpr auto data(Container& c) -> decltype(c.data()) { - return c.data(); -} - -// std::basic_string::data() had no mutable overload prior to C++17 [1]. -// Hence this overload is provided. -// Note: str[0] is safe even for empty strings, as they are guaranteed to be -// null-terminated [2]. -// -// [1] http://en.cppreference.com/w/cpp/string/basic_string/data -// [2] http://en.cppreference.com/w/cpp/string/basic_string/operator_at -template <typename CharT, typename Traits, typename Allocator> -CharT* data(std::basic_string<CharT, Traits, Allocator>& str) { - return std::addressof(str[0]); -} - -template <typename Container> -constexpr auto data(const Container& c) -> decltype(c.data()) { - return c.data(); -} - -template <typename T, size_t N> -constexpr T* data(T (&array)[N]) noexcept { - return array; -} - -template <typename T> -constexpr const T* data(std::initializer_list<T> il) noexcept { - return il.begin(); -} - -// std::array::data() was not constexpr prior to C++17 [1]. -// Hence these overloads are provided. -// -// [1] https://en.cppreference.com/w/cpp/container/array/data -template <typename T, size_t N> -constexpr T* data(std::array<T, N>& array) noexcept { - return !array.empty() ? &array[0] : nullptr; -} - -template <typename T, size_t N> -constexpr const T* data(const std::array<T, N>& array) noexcept { - return !array.empty() ? &array[0] : nullptr; -} - -// C++14 implementation of C++17's std::as_const(): -// https://en.cppreference.com/w/cpp/utility/as_const -template <typename T> -constexpr std::add_const_t<T>& as_const(T& t) noexcept { - return t; -} - -template <typename T> -void as_const(const T&& t) = delete; - // Simplified C++14 implementation of C++20's std::to_address. // Note: This does not consider specializations of pointer_traits<>::to_address, // since that member function may only be present in C++20 and later. @@ -158,6 +69,17 @@ return to_address(p.operator->()); } +// Implementation of C++23's std::to_underlying. +// +// Note: This has an additional `std::is_enum<EnumT>` requirement to be SFINAE +// friendly prior to C++20. +// +// Reference: https://en.cppreference.com/w/cpp/utility/to_underlying +template <typename EnumT, typename = std::enable_if_t<std::is_enum<EnumT>{}>> +constexpr std::underlying_type_t<EnumT> to_underlying(EnumT e) noexcept { + return static_cast<std::underlying_type_t<EnumT>>(e); +} + // Returns a const reference to the underlying container of a container adapter. // Works for std::priority_queue, std::queue, and std::stack. template <class A> @@ -618,21 +540,21 @@ // Helper for returning the optional value's address, or nullptr. template <class T> -T* OptionalOrNullptr(gurl_base::Optional<T>& optional) { +T* OptionalOrNullptr(absl::optional<T>& optional) { return optional.has_value() ? &optional.value() : nullptr; } template <class T> -const T* OptionalOrNullptr(const gurl_base::Optional<T>& optional) { +const T* OptionalOrNullptr(const absl::optional<T>& optional) { return optional.has_value() ? &optional.value() : nullptr; } -// Helper for creating an Optional<T> from a potentially nullptr T*. +// Helper for creating an optional<T> from a potentially nullptr T*. template <class T> -gurl_base::Optional<T> OptionalFromPtr(const T* value) { +absl::optional<T> OptionalFromPtr(const T* value) { if (value) - return gurl_base::Optional<T>(*value); - return gurl_base::nullopt; + return absl::optional<T>(*value); + return absl::nullopt; } } // namespace base
diff --git a/base/strings/BUILD b/base/strings/BUILD index a1346bd..257f8f9 100644 --- a/base/strings/BUILD +++ b/base/strings/BUILD
@@ -15,7 +15,6 @@ ], hdrs = [ "char_traits.h", - "string16.h", "string_piece_forward.h", "string_piece.h", "string_util.h",
diff --git a/base/strings/abseil_string_conversions.cc b/base/strings/abseil_string_conversions.cc index e7c746e..5915b53 100644 --- a/base/strings/abseil_string_conversions.cc +++ b/base/strings/abseil_string_conversions.cc
@@ -9,7 +9,7 @@ #include "base/containers/span.h" #include "base/ranges/algorithm.h" #include "base/strings/string_piece.h" -#include "third_party/abseil-cpp/absl/strings/string_view.h" +#include "absl/strings/string_view.h" namespace gurl_base {
diff --git a/base/strings/abseil_string_conversions.h b/base/strings/abseil_string_conversions.h index c821d93..1cee63d 100644 --- a/base/strings/abseil_string_conversions.h +++ b/base/strings/abseil_string_conversions.h
@@ -10,7 +10,7 @@ #include "polyfills/base/base_export.h" #include "base/containers/span.h" #include "base/strings/string_piece.h" -#include "third_party/abseil-cpp/absl/strings/string_view.h" +#include "absl/strings/string_view.h" namespace gurl_base {
diff --git a/base/strings/abseil_string_conversions_unittest.cc b/base/strings/abseil_string_conversions_unittest.cc index e5f70f0..6c8816d 100644 --- a/base/strings/abseil_string_conversions_unittest.cc +++ b/base/strings/abseil_string_conversions_unittest.cc
@@ -10,7 +10,7 @@ #include "base/strings/string_piece.h" #include "base/strings/string_piece_forward.h" #include "testing/gtest/include/gtest/gtest.h" -#include "third_party/abseil-cpp/absl/strings/string_view.h" +#include "absl/strings/string_view.h" namespace gurl_base {
diff --git a/base/strings/char_traits.h b/base/strings/char_traits.h index 13f5833..fe01c53 100644 --- a/base/strings/char_traits.h +++ b/base/strings/char_traits.h
@@ -29,6 +29,10 @@ // Returns the length of |s|, assuming null termination (and not including the // terminating null). static constexpr size_t length(const T* s) noexcept; + + // Searches for character |c| within the first |n| characters of the sequence + // pointed to by |s|. + static constexpr const T* find(const T* s, size_t n, T c); }; template <typename T> @@ -56,6 +60,15 @@ return i; } +template <typename T> +constexpr const T* CharTraits<T>::find(const T* s, size_t n, T c) { + for (; n; --n, ++s) { + if (std::char_traits<T>::eq(*s, c)) + return s; + } + return nullptr; +} + // char and wchar_t specialization of CharTraits that can use clang's constexpr // instrinsics, where available. #if HAS_FEATURE(cxx_constexpr_string_builtins) @@ -70,6 +83,10 @@ static constexpr size_t length(const char* s) noexcept { return __builtin_strlen(s); } + + static constexpr const char* find(const char* s, size_t n, char c) { + return __builtin_char_memchr(s, c, n); + } }; template <> @@ -83,6 +100,10 @@ static constexpr size_t length(const wchar_t* s) noexcept { return __builtin_wcslen(s); } + + static constexpr const wchar_t* find(const wchar_t* s, size_t n, wchar_t c) { + return __builtin_wmemchr(s, c, n); + } }; #endif
diff --git a/base/strings/char_traits_unittest.cc b/base/strings/char_traits_unittest.cc index d0fdc07..d735d4a 100644 --- a/base/strings/char_traits_unittest.cc +++ b/base/strings/char_traits_unittest.cc
@@ -3,7 +3,6 @@ // found in the LICENSE file. #include "base/strings/char_traits.h" -#include "base/strings/string16.h" #include "testing/gtest/include/gtest/gtest.h" namespace gurl_base {
diff --git a/base/strings/escape.cc b/base/strings/escape.cc index cf46fca..ff6f6d8 100644 --- a/base/strings/escape.cc +++ b/base/strings/escape.cc
@@ -4,6 +4,7 @@ #include "base/strings/escape.h" +#include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/third_party/icu/icu_utf.h" @@ -262,7 +263,7 @@ adjustments->clear(); // Do not unescape anything, return the |escaped_text| text. if (rules == UnescapeRule::NONE) - return escaped_text.as_string(); + return std::string(escaped_text); // The output of the unescaping is always smaller than the input, so we can // reserve the input size to make sure we have enough buffer and don't have @@ -335,11 +336,11 @@ return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, nullptr); } -string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( +std::u16string UnescapeAndDecodeUTF8URLComponentWithAdjustments( StringPiece text, UnescapeRule::Type rules, OffsetAdjuster::Adjustments* adjustments) { - string16 result; + std::u16string result; OffsetAdjuster::Adjustments unescape_adjustments; std::string unescaped_url( UnescapeURLWithAdjustmentsImpl(text, rules, &unescape_adjustments));
diff --git a/base/strings/escape.h b/base/strings/escape.h index 0bb6aea..96ce110 100644 --- a/base/strings/escape.h +++ b/base/strings/escape.h
@@ -11,7 +11,6 @@ #include <string> #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/utf_offset_string_conversions.h" @@ -78,10 +77,10 @@ // Unescapes the given substring as a URL, and then tries to interpret the // result as being encoded as UTF-8. If the result is convertible into UTF-8, it // will be returned as converted. If it is not, the original escaped string will -// be converted into a string16 and returned. |adjustments| provides +// be converted into a std::u16string and returned. |adjustments| provides // information on how the original string was adjusted to get the string // returned. -BASE_EXPORT string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments( +BASE_EXPORT std::u16string UnescapeAndDecodeUTF8URLComponentWithAdjustments( StringPiece text, UnescapeRule::Type rules, OffsetAdjuster::Adjustments* adjustments);
diff --git a/base/strings/escape_unittest.cc b/base/strings/escape_unittest.cc index e6c0b1a..b8a5fd6 100644 --- a/base/strings/escape_unittest.cc +++ b/base/strings/escape_unittest.cc
@@ -8,7 +8,6 @@ #include "base/strings/escape.h" #include "base/strings/string_util.h" -#include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" @@ -250,7 +249,7 @@ // The adjustments argument is covered by the next test. // // TODO: Need to test unescape_spaces and unescape_percent. - string16 decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments( + std::u16string decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments( unescape_case.input, UnescapeRule::NORMAL, nullptr); EXPECT_EQ(WideToUTF16(unescape_case.decoded), decoded); }
diff --git a/base/strings/latin1_string_conversions.cc b/base/strings/latin1_string_conversions.cc index 5569015..e4b4020 100644 --- a/base/strings/latin1_string_conversions.cc +++ b/base/strings/latin1_string_conversions.cc
@@ -6,14 +6,14 @@ namespace gurl_base { -string16 Latin1OrUTF16ToUTF16(size_t length, - const Latin1Char* latin1, - const char16* utf16) { +std::u16string Latin1OrUTF16ToUTF16(size_t length, + const Latin1Char* latin1, + const char16_t* utf16) { if (!length) - return string16(); + return std::u16string(); if (latin1) - return string16(latin1, latin1 + length); - return string16(utf16, utf16 + length); + return std::u16string(latin1, latin1 + length); + return std::u16string(utf16, utf16 + length); } } // namespace base
diff --git a/base/strings/latin1_string_conversions.h b/base/strings/latin1_string_conversions.h index 3d60980..7b67073 100644 --- a/base/strings/latin1_string_conversions.h +++ b/base/strings/latin1_string_conversions.h
@@ -10,7 +10,6 @@ #include <string> #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" namespace gurl_base { @@ -20,14 +19,15 @@ typedef unsigned char Latin1Char; // This somewhat odd function is designed to help us convert from Blink Strings -// to string16. A Blink string is either backed by an array of Latin-1 +// to std::u16string. A Blink string is either backed by an array of Latin-1 // characters or an array of UTF-16 characters. This function is called by -// WebString::operator string16() to convert one or the other character array -// to string16. This function is defined here rather than in WebString.h to -// avoid binary bloat in all the callers of the conversion operator. -BASE_EXPORT string16 Latin1OrUTF16ToUTF16(size_t length, - const Latin1Char* latin1, - const char16* utf16); +// WebString::operator std::u16string() to convert one or the other character +// array to std::u16string. This function is defined here rather than in +// WebString.h to avoid binary bloat in all the callers of the conversion +// operator. +BASE_EXPORT std::u16string Latin1OrUTF16ToUTF16(size_t length, + const Latin1Char* latin1, + const char16_t* utf16); } // namespace base
diff --git a/base/strings/nullable_string16.cc b/base/strings/nullable_string16.cc deleted file mode 100644 index 618800d..0000000 --- a/base/strings/nullable_string16.cc +++ /dev/null
@@ -1,33 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/strings/nullable_string16.h" - -#include <ostream> -#include <utility> - -namespace gurl_base { -NullableString16::NullableString16() = default; -NullableString16::NullableString16(const NullableString16& other) = default; -NullableString16::NullableString16(NullableString16&& other) = default; - -NullableString16::NullableString16(const string16& string, bool is_null) { - if (!is_null) - string_.emplace(string); -} - -NullableString16::NullableString16(Optional<string16> optional_string16) - : string_(std::move(optional_string16)) {} - -NullableString16::~NullableString16() = default; -NullableString16& NullableString16::operator=(const NullableString16& other) = - default; -NullableString16& NullableString16::operator=(NullableString16&& other) = - default; - -std::ostream& operator<<(std::ostream& out, const NullableString16& value) { - return value.is_null() ? out << "(null)" : out << value.string(); -} - -} // namespace base
diff --git a/base/strings/nullable_string16.h b/base/strings/nullable_string16.h deleted file mode 100644 index f2ca7bd..0000000 --- a/base/strings/nullable_string16.h +++ /dev/null
@@ -1,55 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_STRINGS_NULLABLE_STRING16_H_ -#define BASE_STRINGS_NULLABLE_STRING16_H_ - -#include <iosfwd> - -#include "polyfills/base/base_export.h" -#include "base/optional.h" -#include "base/strings/string16.h" -#include "base/strings/string_util.h" - -namespace gurl_base { - -// This class is a simple wrapper for string16 which also contains a null -// state. This should be used only where the difference between null and -// empty is meaningful. -class BASE_EXPORT NullableString16 { - public: - NullableString16(); - NullableString16(const NullableString16& other); - NullableString16(NullableString16&& other); - NullableString16(const string16& string, bool is_null); - explicit NullableString16(Optional<string16> optional_string16); - ~NullableString16(); - - NullableString16& operator=(const NullableString16& other); - NullableString16& operator=(NullableString16&& other); - - const string16& string() const { - return string_ ? *string_ : EmptyString16(); - } - bool is_null() const { return !string_; } - const Optional<string16>& as_optional_string16() const { return string_; } - - private: - Optional<string16> string_; -}; - -inline bool operator==(const NullableString16& a, const NullableString16& b) { - return a.as_optional_string16() == b.as_optional_string16(); -} - -inline bool operator!=(const NullableString16& a, const NullableString16& b) { - return !(a == b); -} - -BASE_EXPORT std::ostream& operator<<(std::ostream& out, - const NullableString16& value); - -} // namespace base - -#endif // BASE_STRINGS_NULLABLE_STRING16_H_
diff --git a/base/strings/nullable_string16_unittest.cc b/base/strings/nullable_string16_unittest.cc deleted file mode 100644 index e3d063f..0000000 --- a/base/strings/nullable_string16_unittest.cc +++ /dev/null
@@ -1,35 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/strings/nullable_string16.h" -#include "base/strings/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace gurl_base { - -TEST(NullableString16Test, DefaultConstructor) { - NullableString16 s; - EXPECT_TRUE(s.is_null()); - EXPECT_EQ(string16(), s.string()); -} - -TEST(NullableString16Test, Equals) { - NullableString16 a(ASCIIToUTF16("hello"), false); - NullableString16 b(ASCIIToUTF16("hello"), false); - EXPECT_EQ(a, b); -} - -TEST(NullableString16Test, NotEquals) { - NullableString16 a(ASCIIToUTF16("hello"), false); - NullableString16 b(ASCIIToUTF16("world"), false); - EXPECT_NE(a, b); -} - -TEST(NullableString16Test, NotEqualsNull) { - NullableString16 a(ASCIIToUTF16("hello"), false); - NullableString16 b; - EXPECT_NE(a, b); -} - -} // namespace base
diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc index 65ec075..d7c9a47 100644 --- a/base/strings/pattern.cc +++ b/base/strings/pattern.cc
@@ -131,7 +131,7 @@ }; struct NextCharUTF16 { - base_icu::UChar32 operator()(const char16** p, const char16* end) { + base_icu::UChar32 operator()(const char16_t** p, const char16_t* end) { base_icu::UChar32 c; int offset = 0; CBU16_NEXT(*p, offset, end - *p, c);
diff --git a/base/strings/pattern_unittest.cc b/base/strings/pattern_unittest.cc index 540f784..20383e8 100644 --- a/base/strings/pattern_unittest.cc +++ b/base/strings/pattern_unittest.cc
@@ -37,10 +37,8 @@ EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); // Test UTF16 character matching. - EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), - UTF8ToUTF16("*.com"))); - EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), - UTF8ToUTF16("He??o\\*1*"))); + EXPECT_TRUE(MatchPattern(u"www.google.com", u"*.com")); + EXPECT_TRUE(MatchPattern(u"Hello*1234", u"He??o\\*1*")); // Some test cases that might cause naive implementations to exhibit // exponential run time or fail.
diff --git a/base/strings/strcat.cc b/base/strings/strcat.cc index 6b007c7..c6b8faf 100644 --- a/base/strings/strcat.cc +++ b/base/strings/strcat.cc
@@ -14,7 +14,7 @@ return internal::StrCatT(pieces); } -string16 StrCat(span<const StringPiece16> pieces) { +std::u16string StrCat(span<const StringPiece16> pieces) { return internal::StrCatT(pieces); } @@ -22,24 +22,24 @@ return internal::StrCatT(pieces); } -string16 StrCat(span<const string16> pieces) { +std::u16string StrCat(span<const std::u16string> pieces) { return internal::StrCatT(pieces); } void StrAppend(std::string* dest, span<const StringPiece> pieces) { - internal::StrAppendT(dest, pieces); + internal::StrAppendT(*dest, pieces); } -void StrAppend(string16* dest, span<const StringPiece16> pieces) { - internal::StrAppendT(dest, pieces); +void StrAppend(std::u16string* dest, span<const StringPiece16> pieces) { + internal::StrAppendT(*dest, pieces); } void StrAppend(std::string* dest, span<const std::string> pieces) { - internal::StrAppendT(dest, pieces); + internal::StrAppendT(*dest, pieces); } -void StrAppend(string16* dest, span<const string16> pieces) { - internal::StrAppendT(dest, pieces); +void StrAppend(std::u16string* dest, span<const std::u16string> pieces) { + internal::StrAppendT(*dest, pieces); } } // namespace base
diff --git a/base/strings/strcat.h b/base/strings/strcat.h index 2d85304..fe35447 100644 --- a/base/strings/strcat.h +++ b/base/strings/strcat.h
@@ -61,18 +61,19 @@ BASE_EXPORT std::string StrCat(span<const StringPiece> pieces) WARN_UNUSED_RESULT; -BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces) +BASE_EXPORT std::u16string StrCat(span<const StringPiece16> pieces) WARN_UNUSED_RESULT; BASE_EXPORT std::string StrCat(span<const std::string> pieces) WARN_UNUSED_RESULT; -BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT; +BASE_EXPORT std::u16string StrCat(span<const std::u16string> pieces) + WARN_UNUSED_RESULT; // Initializer list forwards to the array version. inline std::string StrCat(std::initializer_list<StringPiece> pieces) { return StrCat(make_span(pieces)); } -inline string16 StrCat(std::initializer_list<StringPiece16> pieces) { +inline std::u16string StrCat(std::initializer_list<StringPiece16> pieces) { return StrCat(make_span(pieces)); } @@ -85,9 +86,11 @@ // because it avoids a temporary string allocation and copy. BASE_EXPORT void StrAppend(std::string* dest, span<const StringPiece> pieces); -BASE_EXPORT void StrAppend(string16* dest, span<const StringPiece16> pieces); +BASE_EXPORT void StrAppend(std::u16string* dest, + span<const StringPiece16> pieces); BASE_EXPORT void StrAppend(std::string* dest, span<const std::string> pieces); -BASE_EXPORT void StrAppend(string16* dest, span<const string16> pieces); +BASE_EXPORT void StrAppend(std::u16string* dest, + span<const std::u16string> pieces); // Initializer list forwards to the array version. inline void StrAppend(std::string* dest, @@ -95,7 +98,7 @@ StrAppend(dest, make_span(pieces)); } -inline void StrAppend(string16* dest, +inline void StrAppend(std::u16string* dest, std::initializer_list<StringPiece16> pieces) { StrAppend(dest, make_span(pieces)); }
diff --git a/base/strings/strcat_internal.h b/base/strings/strcat_internal.h index 24387d6..8011946 100644 --- a/base/strings/strcat_internal.h +++ b/base/strings/strcat_internal.h
@@ -8,48 +8,64 @@ #include <string> #include "base/containers/span.h" +#include "base/template_util.h" namespace gurl_base { namespace internal { -// Reserves an additional amount of capacity in the given string, growing by at -// least 2x if necessary. Used by StrAppendT(). -// -// The "at least 2x" growing rule duplicates the exponential growth of -// std::string. The problem is that most implementations of reserve() will grow -// exactly to the requested amount instead of exponentially growing like would -// happen when appending normally. If we didn't do this, an append after the -// call to StrAppend() would definitely cause a reallocation, and loops with -// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want -// StrAppend() to have the same semantics as std::string::append(). -template <typename String> -void ReserveAdditionalIfNeeded(String* str, - typename String::size_type additional) { - const size_t required = str->size() + additional; - // Check whether we need to reserve additional capacity at all. - if (required <= str->capacity()) - return; - - str->reserve(std::max(required, str->capacity() * 2)); +// Optimized version of `std::basic_string::resize()` that skips zero +// initialization of appended characters. Reading from the newly allocated +// characters results in undefined behavior if they are not explicitly +// initialized afterwards. Currently proposed for standardization as +// std::basic_string::resize_and_overwrite: https://wg21.link/P1072R6 +template <typename CharT> +auto Resize(std::basic_string<CharT>& str, size_t total_size, priority_tag<1>) + -> decltype(str.__resize_default_init(total_size)) { + str.__resize_default_init(total_size); } -template <typename DestString, typename InputString> -void StrAppendT(DestString* dest, span<const InputString> pieces) { - size_t additional_size = 0; - for (const auto& cur : pieces) - additional_size += cur.size(); - ReserveAdditionalIfNeeded(dest, additional_size); +// Fallback to regular std::basic_string::resize() if invoking +// __resize_default_init is ill-formed. +template <typename CharT> +void Resize(std::basic_string<CharT>& str, size_t total_size, priority_tag<0>) { + str.resize(total_size); +} +// Appends `pieces` to `dest`. Instead of simply calling `dest.append()` +// `pieces.size()` times, this method first resizes `dest` to be of the desired +// size, and then appends each piece via `std::char_traits::copy`. This achieves +// two goals: +// 1) Allocating the desired size all at once avoids other allocations that +// could happen if intermediate allocations did not reserve enough capacity. +// 2) Invoking std::char_traits::copy instead of std::basic_string::append +// avoids having to write the terminating '\0' character n times. +template <typename CharT, typename StringT> +void StrAppendT(std::basic_string<CharT>& dest, span<const StringT> pieces) { + const size_t initial_size = dest.size(); + size_t total_size = initial_size; for (const auto& cur : pieces) - dest->append(cur.data(), cur.size()); + total_size += cur.size(); + + // Note: As opposed to `reserve()` calling `resize()` with an argument smaller + // than the current `capacity()` does not result in the string releasing spare + // capacity. Furthermore, common std::string implementations apply a geometric + // growth strategy if the current capacity is not sufficient for the newly + // added characters. Since this codepath is also triggered by `resize()`, we + // don't have to manage the std::string's capacity ourselves here to avoid + // performance hits in case `StrAppend()` gets called in a loop. + Resize(dest, total_size, priority_tag<1>()); + CharT* dest_char = &dest[initial_size]; + for (const auto& cur : pieces) { + std::char_traits<CharT>::copy(dest_char, cur.data(), cur.size()); + dest_char += cur.size(); + } } template <typename StringT> auto StrCatT(span<const StringT> pieces) { - std::basic_string<typename StringT::value_type, typename StringT::traits_type> - result; - StrAppendT(&result, pieces); + std::basic_string<typename StringT::value_type> result; + StrAppendT(result, pieces); return result; }
diff --git a/base/strings/strcat_unittest.cc b/base/strings/strcat_unittest.cc index 9374c39..d6a68d3 100644 --- a/base/strings/strcat_unittest.cc +++ b/base/strings/strcat_unittest.cc
@@ -18,14 +18,14 @@ } TEST(StrCat, 16Bit) { - string16 arg1 = ASCIIToUTF16("1"); - string16 arg2 = ASCIIToUTF16("22"); - string16 arg3 = ASCIIToUTF16("333"); + std::u16string arg1 = u"1"; + std::u16string arg2 = u"22"; + std::u16string arg3 = u"333"; - EXPECT_EQ(ASCIIToUTF16(""), StrCat({string16()})); - EXPECT_EQ(ASCIIToUTF16("1"), StrCat({arg1})); - EXPECT_EQ(ASCIIToUTF16("122"), StrCat({arg1, arg2})); - EXPECT_EQ(ASCIIToUTF16("122333"), StrCat({arg1, arg2, arg3})); + EXPECT_EQ(u"", StrCat({std::u16string()})); + EXPECT_EQ(u"1", StrCat({arg1})); + EXPECT_EQ(u"122", StrCat({arg1, arg2})); + EXPECT_EQ(u"122333", StrCat({arg1, arg2, arg3})); } TEST(StrAppend, 8Bit) { @@ -45,23 +45,23 @@ } TEST(StrAppend, 16Bit) { - string16 arg1 = ASCIIToUTF16("1"); - string16 arg2 = ASCIIToUTF16("22"); - string16 arg3 = ASCIIToUTF16("333"); + std::u16string arg1 = u"1"; + std::u16string arg2 = u"22"; + std::u16string arg3 = u"333"; - string16 result; + std::u16string result; - result = ASCIIToUTF16("foo"); - StrAppend(&result, {string16()}); - EXPECT_EQ(ASCIIToUTF16("foo"), result); + result = u"foo"; + StrAppend(&result, {std::u16string()}); + EXPECT_EQ(u"foo", result); - result = ASCIIToUTF16("foo"); + result = u"foo"; StrAppend(&result, {arg1}); - EXPECT_EQ(ASCIIToUTF16("foo1"), result); + EXPECT_EQ(u"foo1", result); - result = ASCIIToUTF16("foo"); + result = u"foo"; StrAppend(&result, {arg1, arg2, arg3}); - EXPECT_EQ(ASCIIToUTF16("foo122333"), result); + EXPECT_EQ(u"foo122333", result); } TEST(StrAppendT, ReserveAdditionalIfNeeded) {
diff --git a/base/strings/strcat_win.cc b/base/strings/strcat_win.cc index 60b22bc..affc99e 100644 --- a/base/strings/strcat_win.cc +++ b/base/strings/strcat_win.cc
@@ -12,8 +12,6 @@ namespace gurl_base { -#if defined(BASE_STRING16_IS_STD_U16STRING) - std::wstring StrCat(span<const WStringPiece> pieces) { return internal::StrCatT(pieces); } @@ -23,13 +21,11 @@ } void StrAppend(std::wstring* dest, span<const WStringPiece> pieces) { - internal::StrAppendT(dest, pieces); + internal::StrAppendT(*dest, pieces); } void StrAppend(std::wstring* dest, span<const std::wstring> pieces) { - internal::StrAppendT(dest, pieces); + internal::StrAppendT(*dest, pieces); } -#endif - } // namespace base
diff --git a/base/strings/strcat_win.h b/base/strings/strcat_win.h index 68f8a58..70926bc 100644 --- a/base/strings/strcat_win.h +++ b/base/strings/strcat_win.h
@@ -16,12 +16,7 @@ namespace gurl_base { // The following section contains overloads of the cross-platform APIs for -// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring -// and gurl_base::string16 are distinct types, as otherwise this would result in an -// ODR violation. -// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is -// std::u16string. -#if defined(BASE_STRING16_IS_STD_U16STRING) +// std::wstring and gurl_base::WStringPiece. BASE_EXPORT void StrAppend(std::wstring* dest, span<const WStringPiece> pieces); BASE_EXPORT void StrAppend(std::wstring* dest, span<const std::wstring> pieces); @@ -38,7 +33,6 @@ inline std::wstring StrCat(std::initializer_list<WStringPiece> pieces) { return StrCat(make_span(pieces)); } -#endif // defined(BASE_STRING16_IS_STD_U16STRING) } // namespace base
diff --git a/base/strings/string16.h b/base/strings/string16.h deleted file mode 100644 index dc3ddc7..0000000 --- a/base/strings/string16.h +++ /dev/null
@@ -1,49 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_STRINGS_STRING16_H_ -#define BASE_STRINGS_STRING16_H_ - -// WHAT: -// Type aliases for string and character types supporting UTF-16 data. Prior to -// C++11 there was no standard library solution for this, which is why wstring -// was used where possible (i.e. where wchar_t holds UTF-16 encoded data). -// -// In C++11 we gained std::u16string, which is a cross-platform solution for -// UTF-16 strings. This is now the string16 type where ever wchar_t does not -// hold UTF16 data (i.e. commonly non-Windows platforms). Eventually this should -// be used everywhere, at which point this type alias and this file should be -// removed. https://crbug.com/911896 tracks the migration effort. - -#include <string> - -#include "build/build_config.h" - -#if defined(WCHAR_T_IS_UTF16) - -// Define a macro for wrapping construction of char16 arrays and string16s from -// a literal string. This indirection allows for an easier migration of -// gurl_base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one -// character change to the macro will be necessary. -// TODO(https://crbug.com/911896): Remove this macro once gurl_base::char16 is -// char16_t on all platforms. -#define STRING16_LITERAL(x) L##x - -namespace gurl_base { -using char16 = wchar_t; -using string16 = std::wstring; -} // namespace base - -#else - -#define STRING16_LITERAL(x) u##x - -namespace gurl_base { -using char16 = char16_t; -using string16 = std::u16string; -} // namespace base - -#endif // WCHAR_T_IS_UTF16 - -#endif // BASE_STRINGS_STRING16_H_
diff --git a/base/strings/string16_unittest.cc b/base/strings/string16_unittest.cc deleted file mode 100644 index 9cdb075..0000000 --- a/base/strings/string16_unittest.cc +++ /dev/null
@@ -1,23 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/strings/string16.h" - -#include "testing/gtest/include/gtest/gtest.h" - -namespace gurl_base { - -// Ensure that STRING16_LITERAL can be used to instantiate constants of type -// char16 and char16[], respectively. -TEST(String16Test, String16Literal) { - static constexpr char16 kHelloChars[] = { - STRING16_LITERAL('H'), STRING16_LITERAL('e'), STRING16_LITERAL('l'), - STRING16_LITERAL('l'), STRING16_LITERAL('o'), STRING16_LITERAL('\0'), - }; - - static constexpr char16 kHelloStr[] = STRING16_LITERAL("Hello"); - EXPECT_EQ(std::char_traits<char16>::compare(kHelloChars, kHelloStr, 6), 0); -} - -} // namespace base
diff --git a/base/strings/string_number_conversions.cc b/base/strings/string_number_conversions.cc index d1886b1..b0eeafd 100644 --- a/base/strings/string_number_conversions.cc +++ b/base/strings/string_number_conversions.cc
@@ -9,7 +9,6 @@ #include "base/containers/span.h" #include "polyfills/base/logging.h" -#include "base/strings/string16.h" #include "base/strings/string_number_conversions_internal.h" #include "base/strings/string_piece.h" @@ -19,56 +18,56 @@ return internal::IntToStringT<std::string>(value); } -string16 NumberToString16(int value) { - return internal::IntToStringT<string16>(value); +std::u16string NumberToString16(int value) { + return internal::IntToStringT<std::u16string>(value); } std::string NumberToString(unsigned value) { return internal::IntToStringT<std::string>(value); } -string16 NumberToString16(unsigned value) { - return internal::IntToStringT<string16>(value); +std::u16string NumberToString16(unsigned value) { + return internal::IntToStringT<std::u16string>(value); } std::string NumberToString(long value) { return internal::IntToStringT<std::string>(value); } -string16 NumberToString16(long value) { - return internal::IntToStringT<string16>(value); +std::u16string NumberToString16(long value) { + return internal::IntToStringT<std::u16string>(value); } std::string NumberToString(unsigned long value) { return internal::IntToStringT<std::string>(value); } -string16 NumberToString16(unsigned long value) { - return internal::IntToStringT<string16>(value); +std::u16string NumberToString16(unsigned long value) { + return internal::IntToStringT<std::u16string>(value); } std::string NumberToString(long long value) { return internal::IntToStringT<std::string>(value); } -string16 NumberToString16(long long value) { - return internal::IntToStringT<string16>(value); +std::u16string NumberToString16(long long value) { + return internal::IntToStringT<std::u16string>(value); } std::string NumberToString(unsigned long long value) { return internal::IntToStringT<std::string>(value); } -string16 NumberToString16(unsigned long long value) { - return internal::IntToStringT<string16>(value); +std::u16string NumberToString16(unsigned long long value) { + return internal::IntToStringT<std::u16string>(value); } std::string NumberToString(double value) { return internal::DoubleToStringT<std::string>(value); } -string16 NumberToString16(double value) { - return internal::DoubleToStringT<string16>(value); +std::u16string NumberToString16(double value) { + return internal::DoubleToStringT<std::u16string>(value); } bool StringToInt(StringPiece input, int* output) {
diff --git a/base/strings/string_number_conversions.h b/base/strings/string_number_conversions.h index 7bb51b6..d2f8af3 100644 --- a/base/strings/string_number_conversions.h +++ b/base/strings/string_number_conversions.h
@@ -13,7 +13,6 @@ #include "polyfills/base/base_export.h" #include "base/containers/span.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "build/build_config.h" @@ -39,19 +38,19 @@ // Ignores locale! see warning above. BASE_EXPORT std::string NumberToString(int value); -BASE_EXPORT string16 NumberToString16(int value); +BASE_EXPORT std::u16string NumberToString16(int value); BASE_EXPORT std::string NumberToString(unsigned int value); -BASE_EXPORT string16 NumberToString16(unsigned int value); +BASE_EXPORT std::u16string NumberToString16(unsigned int value); BASE_EXPORT std::string NumberToString(long value); -BASE_EXPORT string16 NumberToString16(long value); +BASE_EXPORT std::u16string NumberToString16(long value); BASE_EXPORT std::string NumberToString(unsigned long value); -BASE_EXPORT string16 NumberToString16(unsigned long value); +BASE_EXPORT std::u16string NumberToString16(unsigned long value); BASE_EXPORT std::string NumberToString(long long value); -BASE_EXPORT string16 NumberToString16(long long value); +BASE_EXPORT std::u16string NumberToString16(long long value); BASE_EXPORT std::string NumberToString(unsigned long long value); -BASE_EXPORT string16 NumberToString16(unsigned long long value); +BASE_EXPORT std::u16string NumberToString16(unsigned long long value); BASE_EXPORT std::string NumberToString(double value); -BASE_EXPORT string16 NumberToString16(double value); +BASE_EXPORT std::u16string NumberToString16(double value); // String -> number conversions ------------------------------------------------
diff --git a/base/strings/string_number_conversions_fuzzer.cc b/base/strings/string_number_conversions_fuzzer.cc index 012887a..e6e5c6f 100644 --- a/base/strings/string_number_conversions_fuzzer.cc +++ b/base/strings/string_number_conversions_fuzzer.cc
@@ -44,7 +44,7 @@ const size_t size, bool (*string_to_num)(gurl_base::StringPiece16, NumberType*)) { - return CheckRoundtripsT<NumberType, gurl_base::StringPiece16, gurl_base::string16>( + return CheckRoundtripsT<NumberType, gurl_base::StringPiece16, std::u16string>( data, size, &gurl_base::NumberToString16, string_to_num); } @@ -81,7 +81,7 @@ // Test for StringPiece16 if size is even. if (size % 2 == 0) { gurl_base::StringPiece16 string_piece_input16( - reinterpret_cast<const gurl_base::char16*>(data), size / 2); + reinterpret_cast<const char16_t*>(data), size / 2); gurl_base::StringToInt(string_piece_input16, &out_int); gurl_base::StringToUint(string_piece_input16, &out_uint);
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h index 4f917f8..8223b59 100644 --- a/base/strings/string_number_conversions_internal.h +++ b/base/strings/string_number_conversions_internal.h
@@ -18,6 +18,7 @@ #include "base/numerics/safe_math.h" #include "base/strings/string_util.h" #include "base/third_party/double_conversion/double-conversion/double-conversion.h" +#include "absl/types/optional.h" namespace gurl_base { @@ -58,7 +59,7 @@ // Utility to convert a character to a digit in a given base template <int BASE, typename CHAR> -Optional<uint8_t> CharToDigit(CHAR c) { +absl::optional<uint8_t> CharToDigit(CHAR c) { static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]"); if (c >= '0' && c < '0' + std::min(BASE, 10)) return c - '0'; @@ -69,7 +70,7 @@ if (c >= 'A' && c < 'A' + BASE - 10) return c - 'A' + 10; - return gurl_base::nullopt; + return absl::nullopt; } // There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it @@ -88,9 +89,9 @@ }; template <> -class WhitespaceHelper<char16> { +class WhitespaceHelper<char16_t> { public: - static bool Invoke(char16 c) { return 0 != iswspace(c); } + static bool Invoke(char16_t c) { return 0 != iswspace(c); } }; template <typename CHAR> @@ -133,7 +134,7 @@ } for (Iter current = begin; current != end; ++current) { - Optional<uint8_t> new_digit = CharToDigit<kBase>(*current); + absl::optional<uint8_t> new_digit = CharToDigit<kBase>(*current); if (!new_digit) { return {value, false}; @@ -179,8 +180,8 @@ }; }; -template <typename Number, int kBase, typename Str> -auto StringToNumber(BasicStringPiece<Str> input) { +template <typename Number, int kBase, typename CharT> +auto StringToNumber(BasicStringPiece<CharT> input) { using Parser = StringToNumberParser<Number, kBase>; using Result = typename Parser::Result; @@ -212,16 +213,16 @@ return result; } -template <typename STR, typename VALUE> -bool StringToIntImpl(BasicStringPiece<STR> input, VALUE& output) { - auto result = StringToNumber<VALUE, 10>(input); +template <typename T, typename VALUE, typename CharT = typename T::value_type> +bool StringToIntImpl(T input, VALUE& output) { + auto result = StringToNumber<VALUE, 10, CharT>(input); output = result.value; return result.valid; } -template <typename STR, typename VALUE> -bool HexStringToIntImpl(BasicStringPiece<STR> input, VALUE& output) { - auto result = StringToNumber<VALUE, 16>(input); +template <typename T, typename VALUE, typename CharT = typename T::value_type> +bool HexStringToIntImpl(T input, VALUE& output) { + auto result = StringToNumber<VALUE, 16, CharT>(input); output = result.value; return result.valid; } @@ -285,9 +286,9 @@ return false; for (uintptr_t i = 0; i < count / 2; ++i) { // most significant 4 bits - Optional<uint8_t> msb = CharToDigit<16>(input[i * 2]); + absl::optional<uint8_t> msb = CharToDigit<16>(input[i * 2]); // least significant 4 bits - Optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]); + absl::optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]); if (!msb || !lsb) { return false; }
diff --git a/base/strings/string_number_conversions_unittest.cc b/base/strings/string_number_conversions_unittest.cc index 62a31f6..f836316 100644 --- a/base/strings/string_number_conversions_unittest.cc +++ b/base/strings/string_number_conversions_unittest.cc
@@ -145,7 +145,7 @@ EXPECT_EQ(i.success, StringToInt(i.input, &output)); EXPECT_EQ(i.output, output); - string16 utf16_input = UTF8ToUTF16(i.input); + std::u16string utf16_input = UTF8ToUTF16(i.input); output = i.output ^ 1; // Ensure StringToInt wrote something. EXPECT_EQ(i.success, StringToInt(utf16_input, &output)); EXPECT_EQ(i.output, output); @@ -160,14 +160,14 @@ EXPECT_FALSE(StringToInt(input_string, &output)); EXPECT_EQ(6, output); - string16 utf16_input = UTF8ToUTF16(input_string); + std::u16string utf16_input = UTF8ToUTF16(input_string); output = 0; EXPECT_FALSE(StringToInt(utf16_input, &output)); EXPECT_EQ(6, output); output = 0; - const char16 negative_wide_input[] = {0xFF4D, '4', '2', 0}; - EXPECT_FALSE(StringToInt(string16(negative_wide_input), &output)); + const char16_t negative_wide_input[] = {0xFF4D, '4', '2', 0}; + EXPECT_FALSE(StringToInt(std::u16string(negative_wide_input), &output)); EXPECT_EQ(0, output); } @@ -211,7 +211,7 @@ EXPECT_EQ(i.success, StringToUint(i.input, &output)); EXPECT_EQ(i.output, output); - string16 utf16_input = UTF8ToUTF16(i.input); + std::u16string utf16_input = UTF8ToUTF16(i.input); output = i.output ^ 1; // Ensure StringToUint wrote something. EXPECT_EQ(i.success, StringToUint(utf16_input, &output)); EXPECT_EQ(i.output, output); @@ -226,14 +226,14 @@ EXPECT_FALSE(StringToUint(input_string, &output)); EXPECT_EQ(6U, output); - string16 utf16_input = UTF8ToUTF16(input_string); + std::u16string utf16_input = UTF8ToUTF16(input_string); output = 0; EXPECT_FALSE(StringToUint(utf16_input, &output)); EXPECT_EQ(6U, output); output = 0; - const char16 negative_wide_input[] = {0xFF4D, '4', '2', 0}; - EXPECT_FALSE(StringToUint(string16(negative_wide_input), &output)); + const char16_t negative_wide_input[] = {0xFF4D, '4', '2', 0}; + EXPECT_FALSE(StringToUint(std::u16string(negative_wide_input), &output)); EXPECT_EQ(0U, output); } @@ -281,7 +281,7 @@ EXPECT_EQ(i.success, StringToInt64(i.input, &output)); EXPECT_EQ(i.output, output); - string16 utf16_input = UTF8ToUTF16(i.input); + std::u16string utf16_input = UTF8ToUTF16(i.input); output = 0; EXPECT_EQ(i.success, StringToInt64(utf16_input, &output)); EXPECT_EQ(i.output, output); @@ -296,7 +296,7 @@ EXPECT_FALSE(StringToInt64(input_string, &output)); EXPECT_EQ(6, output); - string16 utf16_input = UTF8ToUTF16(input_string); + std::u16string utf16_input = UTF8ToUTF16(input_string); output = 0; EXPECT_FALSE(StringToInt64(utf16_input, &output)); EXPECT_EQ(6, output); @@ -348,7 +348,7 @@ EXPECT_EQ(i.success, StringToUint64(i.input, &output)); EXPECT_EQ(i.output, output); - string16 utf16_input = UTF8ToUTF16(i.input); + std::u16string utf16_input = UTF8ToUTF16(i.input); output = 0; EXPECT_EQ(i.success, StringToUint64(utf16_input, &output)); EXPECT_EQ(i.output, output); @@ -363,7 +363,7 @@ EXPECT_FALSE(StringToUint64(input_string, &output)); EXPECT_EQ(6U, output); - string16 utf16_input = UTF8ToUTF16(input_string); + std::u16string utf16_input = UTF8ToUTF16(input_string); output = 0; EXPECT_FALSE(StringToUint64(utf16_input, &output)); EXPECT_EQ(6U, output); @@ -417,7 +417,7 @@ EXPECT_EQ(i.success, StringToSizeT(i.input, &output)); EXPECT_EQ(i.output, output); - string16 utf16_input = UTF8ToUTF16(i.input); + std::u16string utf16_input = UTF8ToUTF16(i.input); output = 0; EXPECT_EQ(i.success, StringToSizeT(utf16_input, &output)); EXPECT_EQ(i.output, output); @@ -432,7 +432,7 @@ EXPECT_FALSE(StringToSizeT(input_string, &output)); EXPECT_EQ(6U, output); - string16 utf16_input = UTF8ToUTF16(input_string); + std::u16string utf16_input = UTF8ToUTF16(input_string); output = 0; EXPECT_FALSE(StringToSizeT(utf16_input, &output)); EXPECT_EQ(6U, output);
diff --git a/base/strings/string_number_conversions_win.cc b/base/strings/string_number_conversions_win.cc index abae437..381f99d 100644 --- a/base/strings/string_number_conversions_win.cc +++ b/base/strings/string_number_conversions_win.cc
@@ -39,7 +39,6 @@ return internal::DoubleToStringT<std::wstring>(value); } -#if defined(BASE_STRING16_IS_STD_U16STRING) namespace internal { template <> @@ -74,6 +73,5 @@ return internal::StringToDoubleImpl( input, reinterpret_cast<const uint16_t*>(input.data()), *output); } -#endif // defined(BASE_STRING16_IS_STD_U16STRING) } // namespace base
diff --git a/base/strings/string_number_conversions_win.h b/base/strings/string_number_conversions_win.h index f8b645f..d63d2d7 100644 --- a/base/strings/string_number_conversions_win.h +++ b/base/strings/string_number_conversions_win.h
@@ -21,19 +21,13 @@ BASE_EXPORT std::wstring NumberToWString(double value); // The following section contains overloads of the cross-platform APIs for -// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring -// and gurl_base::string16 are distinct types, as otherwise this would result in an -// ODR violation. -// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is -// std::u16string. -#if defined(BASE_STRING16_IS_STD_U16STRING) +// std::wstring and gurl_base::WStringPiece. BASE_EXPORT bool StringToInt(WStringPiece input, int* output); BASE_EXPORT bool StringToUint(WStringPiece input, unsigned* output); BASE_EXPORT bool StringToInt64(WStringPiece input, int64_t* output); BASE_EXPORT bool StringToUint64(WStringPiece input, uint64_t* output); BASE_EXPORT bool StringToSizeT(WStringPiece input, size_t* output); BASE_EXPORT bool StringToDouble(WStringPiece input, double* output); -#endif // defined(BASE_STRING16_IS_STD_U16STRING) } // namespace base
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc index fe9be5f..f9b9422 100644 --- a/base/strings/string_piece.cc +++ b/base/strings/string_piece.cc
@@ -1,15 +1,14 @@ // Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// Copied from strings/stringpiece.cc with modifications #include "base/strings/string_piece.h" -#include <limits.h> -#include <string.h> - #include <algorithm> +#include <climits> +#include <limits> #include <ostream> +#include <string> #include "base/strings/utf_string_conversions.h" #include "build/build_config.h" @@ -37,8 +36,9 @@ // MSVC doesn't like complex extern templates and DLLs. #if !defined(COMPILER_MSVC) -template class BasicStringPiece<std::string>; -template class BasicStringPiece<string16>; +template class BasicStringPiece<char>; +template class BasicStringPiece<char16_t>; +template class BasicStringPiece<wchar_t>; #endif std::ostream& operator<<(std::ostream& o, StringPiece piece) { @@ -50,24 +50,22 @@ return o << UTF16ToUTF8(piece); } -#if !defined(WCHAR_T_IS_UTF16) std::ostream& operator<<(std::ostream& o, WStringPiece piece) { return o << WideToUTF8(piece); } -#endif namespace internal { -template <typename STR> -size_t findT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) { +template <typename T, typename CharT = typename T::value_type> +size_t findT(T self, T s, size_t pos) { if (pos > self.size()) - return BasicStringPiece<STR>::npos; + return BasicStringPiece<CharT>::npos; - typename BasicStringPiece<STR>::const_iterator result = + typename BasicStringPiece<CharT>::const_iterator result = std::search(self.begin() + pos, self.end(), s.begin(), s.end()); const size_t xpos = static_cast<size_t>(result - self.begin()); - return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos; + return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<CharT>::npos; } size_t find(StringPiece self, StringPiece s, size_t pos) { @@ -78,41 +76,20 @@ return findT(self, s, pos); } -template <typename STR> -size_t findT(BasicStringPiece<STR> self, - typename STR::value_type c, - size_t pos) { - if (pos >= self.size()) - return BasicStringPiece<STR>::npos; - - typename BasicStringPiece<STR>::const_iterator result = - std::find(self.begin() + pos, self.end(), c); - return result != self.end() ? - static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; -} - -size_t find(StringPiece self, char c, size_t pos) { - return findT(self, c, pos); -} - -size_t find(StringPiece16 self, char16 c, size_t pos) { - return findT(self, c, pos); -} - -template <typename STR> -size_t rfindT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) { +template <typename T, typename CharT = typename T::value_type> +size_t rfindT(T self, T s, size_t pos) { if (self.size() < s.size()) - return BasicStringPiece<STR>::npos; + return BasicStringPiece<CharT>::npos; if (s.empty()) return std::min(self.size(), pos); - typename BasicStringPiece<STR>::const_iterator last = + typename BasicStringPiece<CharT>::const_iterator last = self.begin() + std::min(self.size() - s.size(), pos) + s.size(); - typename BasicStringPiece<STR>::const_iterator result = + typename BasicStringPiece<CharT>::const_iterator result = std::find_end(self.begin(), last, s.begin(), s.end()); - return result != last ? - static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; + return result != last ? static_cast<size_t>(result - self.begin()) + : BasicStringPiece<CharT>::npos; } size_t rfind(StringPiece self, StringPiece s, size_t pos) { @@ -123,31 +100,6 @@ return rfindT(self, s, pos); } -template <typename STR> -size_t rfindT(BasicStringPiece<STR> self, - typename STR::value_type c, - size_t pos) { - if (self.size() == 0) - return BasicStringPiece<STR>::npos; - - for (size_t i = std::min(pos, self.size() - 1); ; - --i) { - if (self.data()[i] == c) - return i; - if (i == 0) - break; - } - return BasicStringPiece<STR>::npos; -} - -size_t rfind(StringPiece self, char c, size_t pos) { - return rfindT(self, c, pos); -} - -size_t rfind(StringPiece16 self, char16 c, size_t pos) { - return rfindT(self, c, pos); -} - // 8-bit version using lookup table. size_t find_first_of(StringPiece self, StringPiece s, size_t pos) { if (self.size() == 0 || s.size() == 0) @@ -155,7 +107,7 @@ // Avoid the cost of BuildLookupTable() for a single-character search. if (s.size() == 1) - return find(self, s.data()[0], pos); + return self.find(s.data()[0], pos); bool lookup[UCHAR_MAX + 1] = { false }; BuildLookupTable(s, lookup); @@ -167,29 +119,34 @@ return StringPiece::npos; } -// 16-bit brute force version. -size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) { +// Generic brute force version. +template <typename T, typename CharT = typename T::value_type> +size_t find_first_ofT(T self, T s, size_t pos) { // Use the faster std::find() if searching for a single character. - StringPiece16::const_iterator found = + typename BasicStringPiece<CharT>::const_iterator found = s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0]) : std::find_first_of(self.begin() + pos, self.end(), s.begin(), s.end()); if (found == self.end()) - return StringPiece16::npos; + return BasicStringPiece<CharT>::npos; return found - self.begin(); } +size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) { + return find_first_ofT(self, s, pos); +} + // 8-bit version using lookup table. size_t find_first_not_of(StringPiece self, StringPiece s, size_t pos) { - if (self.size() == 0) + if (pos >= self.size()) return StringPiece::npos; if (s.size() == 0) - return 0; + return pos; // Avoid the cost of BuildLookupTable() for a single-character search. if (s.size() == 1) - return find_first_not_of(self, s.data()[0], pos); + return self.find_first_not_of(s.data()[0], pos); bool lookup[UCHAR_MAX + 1] = { false }; BuildLookupTable(s, lookup); @@ -201,12 +158,11 @@ return StringPiece::npos; } -// 16-bit brute-force version. -BASE_EXPORT size_t find_first_not_of(StringPiece16 self, - StringPiece16 s, - size_t pos) { +// Generic brute-force version. +template <typename T, typename CharT = typename T::value_type> +size_t find_first_not_ofT(T self, T s, size_t pos) { if (self.size() == 0) - return StringPiece16::npos; + return BasicStringPiece<CharT>::npos; for (size_t self_i = pos; self_i < self.size(); ++self_i) { bool found = false; @@ -219,30 +175,11 @@ if (!found) return self_i; } - return StringPiece16::npos; + return BasicStringPiece<CharT>::npos; } -template <typename STR> -size_t find_first_not_ofT(BasicStringPiece<STR> self, - typename STR::value_type c, - size_t pos) { - if (self.size() == 0) - return BasicStringPiece<STR>::npos; - - for (; pos < self.size(); ++pos) { - if (self.data()[pos] != c) { - return pos; - } - } - return BasicStringPiece<STR>::npos; -} - -size_t find_first_not_of(StringPiece self, char c, size_t pos) { - return find_first_not_ofT(self, c, pos); -} - -size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos) { - return find_first_not_ofT(self, c, pos); +size_t find_first_not_of(StringPiece16 self, StringPiece16 s, size_t pos) { + return find_first_not_ofT(self, s, pos); } // 8-bit version using lookup table. @@ -252,7 +189,7 @@ // Avoid the cost of BuildLookupTable() for a single-character search. if (s.size() == 1) - return rfind(self, s.data()[0], pos); + return self.rfind(s.data()[0], pos); bool lookup[UCHAR_MAX + 1] = { false }; BuildLookupTable(s, lookup); @@ -265,10 +202,11 @@ return StringPiece::npos; } -// 16-bit brute-force version. -size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) { +// Generic brute-force version. +template <typename T, typename CharT = typename T::value_type> +size_t find_last_ofT(T self, T s, size_t pos) { if (self.size() == 0) - return StringPiece16::npos; + return BasicStringPiece<CharT>::npos; for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) { @@ -279,7 +217,11 @@ if (self_i == 0) break; } - return StringPiece16::npos; + return BasicStringPiece<CharT>::npos; +} + +size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) { + return find_last_ofT(self, s, pos); } // 8-bit version using lookup table. @@ -293,7 +235,7 @@ // Avoid the cost of BuildLookupTable() for a single-character search. if (s.size() == 1) - return find_last_not_of(self, s.data()[0], pos); + return self.find_last_not_of(s.data()[0], pos); bool lookup[UCHAR_MAX + 1] = { false }; BuildLookupTable(s, lookup); @@ -306,8 +248,9 @@ return StringPiece::npos; } -// 16-bit brute-force version. -size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) { +// Generic brute-force version. +template <typename T, typename CharT = typename T::value_type> +size_t find_last_not_ofT(T self, T s, size_t pos) { if (self.size() == 0) return StringPiece::npos; @@ -324,32 +267,35 @@ if (self_i == 0) break; } - return StringPiece16::npos; + return BasicStringPiece<CharT>::npos; } -template <typename STR> -size_t find_last_not_ofT(BasicStringPiece<STR> self, - typename STR::value_type c, - size_t pos) { - if (self.size() == 0) - return BasicStringPiece<STR>::npos; - - for (size_t i = std::min(pos, self.size() - 1); ; --i) { - if (self.data()[i] != c) - return i; - if (i == 0) - break; - } - return BasicStringPiece<STR>::npos; +size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) { + return find_last_not_ofT(self, s, pos); } -size_t find_last_not_of(StringPiece self, char c, size_t pos) { - return find_last_not_ofT(self, c, pos); +size_t find(WStringPiece self, WStringPiece s, size_t pos) { + return findT(self, s, pos); } -size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos) { - return find_last_not_ofT(self, c, pos); +size_t rfind(WStringPiece self, WStringPiece s, size_t pos) { + return rfindT(self, s, pos); } +size_t find_first_of(WStringPiece self, WStringPiece s, size_t pos) { + return find_first_ofT(self, s, pos); +} + +size_t find_first_not_of(WStringPiece self, WStringPiece s, size_t pos) { + return find_first_not_ofT(self, s, pos); +} + +size_t find_last_of(WStringPiece self, WStringPiece s, size_t pos) { + return find_last_ofT(self, s, pos); +} + +size_t find_last_not_of(WStringPiece self, WStringPiece s, size_t pos) { + return find_last_not_ofT(self, s, pos); +} } // namespace internal } // namespace base
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h index dea85e3..03f5103 100644 --- a/base/strings/string_piece.h +++ b/base/strings/string_piece.h
@@ -1,7 +1,6 @@ // Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// Copied from strings/stringpiece.h with modifications // // A string-like object that points to a sized piece of memory. // @@ -25,6 +24,7 @@ #include <stddef.h> #include <iosfwd> +#include <limits> #include <ostream> #include <string> #include <type_traits> @@ -32,7 +32,6 @@ #include "polyfills/base/base_export.h" #include "polyfills/base/check_op.h" #include "base/strings/char_traits.h" -#include "base/strings/string16.h" #include "base/strings/string_piece_forward.h" #include "build/build_config.h" @@ -51,13 +50,9 @@ BASE_EXPORT size_t find(StringPiece self, StringPiece s, size_t pos); BASE_EXPORT size_t find(StringPiece16 self, StringPiece16 s, size_t pos); -BASE_EXPORT size_t find(StringPiece self, char c, size_t pos); -BASE_EXPORT size_t find(StringPiece16 self, char16 c, size_t pos); BASE_EXPORT size_t rfind(StringPiece self, StringPiece s, size_t pos); BASE_EXPORT size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos); -BASE_EXPORT size_t rfind(StringPiece self, char c, size_t pos); -BASE_EXPORT size_t rfind(StringPiece16 self, char16 c, size_t pos); BASE_EXPORT size_t find_first_of(StringPiece self, StringPiece s, size_t pos); BASE_EXPORT size_t find_first_of(StringPiece16 self, @@ -70,15 +65,11 @@ BASE_EXPORT size_t find_first_not_of(StringPiece16 self, StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_first_not_of(StringPiece self, char c, size_t pos); -BASE_EXPORT size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos); BASE_EXPORT size_t find_last_of(StringPiece self, StringPiece s, size_t pos); BASE_EXPORT size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_last_of(StringPiece self, char c, size_t pos); -BASE_EXPORT size_t find_last_of(StringPiece16 self, char16 c, size_t pos); BASE_EXPORT size_t find_last_not_of(StringPiece self, StringPiece s, @@ -86,466 +77,539 @@ BASE_EXPORT size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos); -BASE_EXPORT size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos); -BASE_EXPORT size_t find_last_not_of(StringPiece self, char c, size_t pos); + +BASE_EXPORT size_t find(WStringPiece self, WStringPiece s, size_t pos); +BASE_EXPORT size_t rfind(WStringPiece self, WStringPiece s, size_t pos); +BASE_EXPORT size_t find_first_of(WStringPiece self, WStringPiece s, size_t pos); +BASE_EXPORT size_t find_first_not_of(WStringPiece self, + WStringPiece s, + size_t pos); +BASE_EXPORT size_t find_last_of(WStringPiece self, WStringPiece s, size_t pos); +BASE_EXPORT size_t find_last_not_of(WStringPiece self, + WStringPiece s, + size_t pos); } // namespace internal // BasicStringPiece ------------------------------------------------------------ -// Defines the types, methods, operators, and data members common to both -// StringPiece and StringPiece16. -// -// This is templatized by string class type rather than character type, so -// BasicStringPiece<std::string> or BasicStringPiece<gurl_base::string16>. -template <typename STRING_TYPE> class BasicStringPiece { +// Mirrors the C++17 version of std::basic_string_view<> as closely as possible, +// except where noted below. +template <typename CharT, typename Traits> +class BasicStringPiece { public: - // Standard STL container boilerplate. - typedef size_t size_type; - typedef typename STRING_TYPE::traits_type traits_type; - typedef typename STRING_TYPE::value_type value_type; - typedef const value_type* pointer; - typedef const value_type& reference; - typedef const value_type& const_reference; - typedef ptrdiff_t difference_type; - typedef const value_type* const_iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + using traits_type = Traits; + using value_type = CharT; + using pointer = CharT*; + using const_pointer = const CharT*; + using reference = CharT&; + using const_reference = const CharT&; + using const_iterator = const CharT*; + using iterator = const_iterator; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + using reverse_iterator = const_reverse_iterator; + using size_type = size_t; + using difference_type = ptrdiff_t; - static const size_type npos; - - public: - // We provide non-explicit singleton constructors so users can pass - // in a "const char*" or a "string" wherever a "StringPiece" is - // expected (likewise for char16, string16, StringPiece16). - constexpr BasicStringPiece() : ptr_(nullptr), length_(0) {} - // TODO(crbug.com/1049498): Construction from nullptr is not allowed for - // std::basic_string_view, so remove the special handling for it. - // Note: This doesn't just use STRING_TYPE::traits_type::length(), since that + constexpr BasicStringPiece() noexcept : ptr_(nullptr), length_(0) {} + constexpr BasicStringPiece(const BasicStringPiece& other) noexcept = default; + constexpr BasicStringPiece& operator=(const BasicStringPiece& view) noexcept = + default; + constexpr BasicStringPiece(const CharT* s, size_type count) + : ptr_(s), length_(count) {} + // Note: This doesn't just use traits_type::length(), since that // isn't constexpr until C++17. - constexpr BasicStringPiece(const value_type* str) - : ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {} + constexpr BasicStringPiece(const CharT* s) + : ptr_(s), length_(s ? CharTraits<CharT>::length(s) : 0) { + // Intentional STL deviation: Null-check instead of UB. + GURL_CHECK(s); + } // Explicitly disallow construction from nullptr. Note that this does not // catch construction from runtime strings that might be null. // Note: The following is just a more elaborate way of spelling // `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is // not supported by the PNaCl toolchain. - // TODO(crbug.com/1049498): Remove once we GURL_CHECK(str) in the constructor - // above. template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>> BasicStringPiece(T) { static_assert(sizeof(T) == 0, // Always false. "StringPiece does not support construction from nullptr, use " "the default constructor instead."); } - BasicStringPiece(const STRING_TYPE& str) + + // These are necessary because std::basic_string provides construction from + // (an object convertible to) a std::basic_string_view, as well as an explicit + // cast operator to a std::basic_string_view, but (obviously) not from/to a + // BasicStringPiece. + BasicStringPiece(const std::basic_string<CharT>& str) : ptr_(str.data()), length_(str.size()) {} - constexpr BasicStringPiece(const value_type* offset, size_type len) - : ptr_(offset), length_(len) {} - - // data() may return a pointer to a buffer with embedded NULs, and the - // returned buffer may or may not be null terminated. Therefore it is - // typically a mistake to pass data() to a routine that expects a NUL - // terminated string. - constexpr const value_type* data() const { return ptr_; } - constexpr size_type size() const noexcept { return length_; } - constexpr size_type length() const noexcept { return length_; } - constexpr bool empty() const noexcept { return length_ == 0; } - - constexpr value_type operator[](size_type i) const { - GURL_CHECK(i < length_); - return ptr_[i]; - } - - constexpr value_type front() const { - GURL_CHECK_NE(0UL, length_); - return ptr_[0]; - } - - constexpr value_type back() const { - GURL_CHECK_NE(0UL, length_); - return ptr_[length_ - 1]; - } - - constexpr void remove_prefix(size_type n) { - GURL_CHECK(n <= length_); - ptr_ += n; - length_ -= n; - } - - constexpr void remove_suffix(size_type n) { - GURL_CHECK(n <= length_); - length_ -= n; - } - - // This is the style of conversion preferred by std::string_view in C++17. - explicit operator STRING_TYPE() const { - return empty() ? STRING_TYPE() : STRING_TYPE(data(), size()); - } - - // Deprecated, use operator STRING_TYPE() instead. - // TODO(crbug.com/1049498): Remove for all STRING_TYPEs. - template <typename StrT = STRING_TYPE, - typename = std::enable_if_t<std::is_same<StrT, std::string>::value>> - STRING_TYPE as_string() const { - return STRING_TYPE(*this); + explicit operator std::basic_string<CharT>() const { + return std::basic_string<CharT>(data(), size()); } constexpr const_iterator begin() const noexcept { return ptr_; } + constexpr const_iterator cbegin() const noexcept { return ptr_; } constexpr const_iterator end() const noexcept { return ptr_ + length_; } + constexpr const_iterator cend() const noexcept { return ptr_ + length_; } constexpr const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(ptr_ + length_); } + constexpr const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(ptr_ + length_); + } constexpr const_reverse_iterator rend() const noexcept { return const_reverse_iterator(ptr_); } + constexpr const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(ptr_); + } - size_type max_size() const { return length_; } - size_type capacity() const { return length_; } + constexpr const_reference operator[](size_type pos) const { + // Intentional STL deviation: Bounds-check instead of UB. + return at(pos); + } + constexpr const_reference at(size_type pos) const { + GURL_CHECK_LT(pos, size()); + return data()[pos]; + } - // String operations, see https://wg21.link/string.view.ops. - constexpr size_type copy(value_type* s, - size_type n, + constexpr const_reference front() const { return operator[](0); } + + constexpr const_reference back() const { return operator[](size() - 1); } + + constexpr const_pointer data() const noexcept { return ptr_; } + + constexpr size_type size() const noexcept { return length_; } + constexpr size_type length() const noexcept { return length_; } + + constexpr size_type max_size() const { + return std::numeric_limits<size_type>::max() / sizeof(CharT); + } + + constexpr bool empty() const noexcept WARN_UNUSED_RESULT { + return size() == 0; + } + + constexpr void remove_prefix(size_type n) { + // Intentional STL deviation: Bounds-check instead of UB. + GURL_CHECK_LE(n, size()); + ptr_ += n; + length_ -= n; + } + + constexpr void remove_suffix(size_type n) { + // Intentional STL deviation: Bounds-check instead of UB. + GURL_CHECK_LE(n, size()); + length_ -= n; + } + + constexpr void swap(BasicStringPiece& v) noexcept { + // Note: Cannot use std::swap() since it is not constexpr until C++20. + const const_pointer ptr = ptr_; + ptr_ = v.ptr_; + v.ptr_ = ptr; + const size_type length = length_; + length_ = v.length_; + v.length_ = length; + } + + constexpr size_type copy(CharT* dest, + size_type count, size_type pos = 0) const { GURL_CHECK_LE(pos, size()); - size_type rlen = std::min(n, size() - pos); - traits_type::copy(s, data() + pos, rlen); - return rlen; + const size_type rcount = std::min(count, size() - pos); + traits_type::copy(dest, data() + pos, rcount); + return rcount; } constexpr BasicStringPiece substr(size_type pos = 0, - size_type n = npos) const { + size_type count = npos) const { GURL_CHECK_LE(pos, size()); - return {data() + pos, std::min(n, size() - pos)}; + const size_type rcount = std::min(count, size() - pos); + return {data() + pos, rcount}; } - constexpr int compare(BasicStringPiece str) const noexcept { - size_type rlen = std::min(size(), str.size()); - int result = CharTraits<value_type>::compare(data(), str.data(), rlen); - if (result == 0) - result = size() == str.size() ? 0 : (size() < str.size() ? -1 : 1); - return result; + constexpr int compare(BasicStringPiece v) const noexcept { + const size_type rlen = std::min(size(), v.size()); + const int result = CharTraits<CharT>::compare(data(), v.data(), rlen); + if (result != 0) + return result; + if (size() == v.size()) + return 0; + return size() < v.size() ? -1 : 1; } - - constexpr int compare(size_type pos, - size_type n, - BasicStringPiece str) const { - return substr(pos, n).compare(str); - } - constexpr int compare(size_type pos1, - size_type n1, - BasicStringPiece str, - size_type pos2, - size_type n2) const { - return substr(pos1, n1).compare(str.substr(pos2, n2)); + size_type count1, + BasicStringPiece v) const { + return substr(pos1, count1).compare(v); } - - constexpr int compare(const value_type* s) const { + constexpr int compare(size_type pos1, + size_type count1, + BasicStringPiece v, + size_type pos2, + size_type count2) const { + return substr(pos1, count1).compare(v.substr(pos2, count2)); + } + constexpr int compare(const CharT* s) const { return compare(BasicStringPiece(s)); } - - constexpr int compare(size_type pos, size_type n, const value_type* s) const { - return substr(pos, n).compare(BasicStringPiece(s)); + constexpr int compare(size_type pos1, + size_type count1, + const CharT* s) const { + return substr(pos1, count1).compare(BasicStringPiece(s)); + } + constexpr int compare(size_type pos1, + size_type count1, + const CharT* s, + size_type count2) const { + return substr(pos1, count1).compare(BasicStringPiece(s, count2)); } - constexpr int compare(size_type pos, - size_type n1, - const value_type* s, - size_type n2) const { - return substr(pos, n1).compare(BasicStringPiece(s, n2)); - } - - // Searching, see https://wg21.link/string.view.find. - - // find: Search for a character or substring at a given offset. - constexpr size_type find(BasicStringPiece s, + constexpr size_type find(BasicStringPiece v, size_type pos = 0) const noexcept { - return internal::find(*this, s, pos); - } + if (is_constant_evaluated()) { + if (v.size() > size()) + return npos; + for (size_type p = pos; p <= size() - v.size(); ++p) { + if (!compare(p, v.size(), v)) + return p; + } + return npos; + } - constexpr size_type find(value_type c, size_type pos = 0) const noexcept { - return internal::find(*this, c, pos); + return internal::find(*this, v, pos); } + constexpr size_type find(CharT ch, size_type pos = 0) const noexcept { + if (pos >= size()) + return npos; - constexpr size_type find(const value_type* s, + const const_pointer result = + gurl_base::CharTraits<CharT>::find(data() + pos, size() - pos, ch); + return result ? static_cast<size_type>(result - data()) : npos; + } + constexpr size_type find(const CharT* s, size_type pos, - size_type n) const { - return find(BasicStringPiece(s, n), pos); + size_type count) const { + return find(BasicStringPiece(s, count), pos); } - - constexpr size_type find(const value_type* s, size_type pos = 0) const { + constexpr size_type find(const CharT* s, size_type pos = 0) const { return find(BasicStringPiece(s), pos); } - // rfind: Reverse find. - constexpr size_type rfind(BasicStringPiece s, + constexpr size_type rfind(BasicStringPiece v, size_type pos = npos) const noexcept { - return internal::rfind(*this, s, pos); - } + if (is_constant_evaluated()) { + if (v.size() > size()) + return npos; + for (size_type p = std::min(size() - v.size(), pos);; --p) { + if (!compare(p, v.size(), v)) + return p; + if (!p) + break; + } + return npos; + } - constexpr size_type rfind(value_type c, size_type pos = npos) const noexcept { - return internal::rfind(*this, c, pos); + return internal::rfind(*this, v, pos); } + constexpr size_type rfind(CharT c, size_type pos = npos) const noexcept { + if (empty()) + return npos; - constexpr size_type rfind(const value_type* s, + for (size_t i = std::min(pos, size() - 1);; --i) { + if (data()[i] == c) + return i; + + if (i == 0) + break; + } + return npos; + } + constexpr size_type rfind(const CharT* s, size_type pos, - size_type n) const { - return rfind(BasicStringPiece(s, n), pos); + size_type count) const { + return rfind(BasicStringPiece(s, count), pos); } - - constexpr size_type rfind(const value_type* s, size_type pos = npos) const { + constexpr size_type rfind(const CharT* s, size_type pos = npos) const { return rfind(BasicStringPiece(s), pos); } - // find_first_of: Find the first occurrence of one of a set of characters. - constexpr size_type find_first_of(BasicStringPiece s, + constexpr size_type find_first_of(BasicStringPiece v, size_type pos = 0) const noexcept { - return internal::find_first_of(*this, s, pos); - } + if (is_constant_evaluated()) { + if (empty() || v.empty()) + return npos; + for (size_type p = pos; p < size(); ++p) { + if (v.find(data()[p]) != npos) + return p; + } + return npos; + } - constexpr size_type find_first_of(value_type c, - size_type pos = 0) const noexcept { + return internal::find_first_of(*this, v, pos); + } + constexpr size_type find_first_of(CharT c, size_type pos = 0) const noexcept { return find(c, pos); } - - constexpr size_type find_first_of(const value_type* s, + constexpr size_type find_first_of(const CharT* s, size_type pos, - size_type n) const { - return find_first_of(BasicStringPiece(s, n), pos); + size_type count) const { + return find_first_of(BasicStringPiece(s, count), pos); } - - constexpr size_type find_first_of(const value_type* s, - size_type pos = 0) const { + constexpr size_type find_first_of(const CharT* s, size_type pos = 0) const { return find_first_of(BasicStringPiece(s), pos); } - // find_last_of: Find the last occurrence of one of a set of characters. - constexpr size_type find_last_of(BasicStringPiece s, + constexpr size_type find_last_of(BasicStringPiece v, size_type pos = npos) const noexcept { - return internal::find_last_of(*this, s, pos); - } + if (is_constant_evaluated()) { + if (empty() || v.empty()) + return npos; + for (size_type p = std::min(pos, size() - 1);; --p) { + if (v.find(data()[p]) != npos) + return p; + if (!p) + break; + } + return npos; + } - constexpr size_type find_last_of(value_type c, + return internal::find_last_of(*this, v, pos); + } + constexpr size_type find_last_of(CharT c, size_type pos = npos) const noexcept { return rfind(c, pos); } - - constexpr size_type find_last_of(const value_type* s, + constexpr size_type find_last_of(const CharT* s, size_type pos, - size_type n) const { - return find_last_of(BasicStringPiece(s, n), pos); + size_type count) const { + return find_last_of(BasicStringPiece(s, count), pos); } - - constexpr size_type find_last_of(const value_type* s, - size_type pos = npos) const { + constexpr size_type find_last_of(const CharT* s, size_type pos = npos) const { return find_last_of(BasicStringPiece(s), pos); } - // find_first_not_of: Find the first occurrence not of a set of characters. - constexpr size_type find_first_not_of(BasicStringPiece s, + constexpr size_type find_first_not_of(BasicStringPiece v, size_type pos = 0) const noexcept { - return internal::find_first_not_of(*this, s, pos); - } + if (is_constant_evaluated()) { + if (empty()) + return npos; + for (size_type p = pos; p < size(); ++p) { + if (v.find(data()[p]) == npos) + return p; + } + return npos; + } - constexpr size_type find_first_not_of(value_type c, + return internal::find_first_not_of(*this, v, pos); + } + constexpr size_type find_first_not_of(CharT c, size_type pos = 0) const noexcept { - return internal::find_first_not_of(*this, c, pos); - } + if (empty()) + return npos; - constexpr size_type find_first_not_of(const value_type* s, + for (; pos < size(); ++pos) { + if (data()[pos] != c) + return pos; + } + return npos; + } + constexpr size_type find_first_not_of(const CharT* s, size_type pos, - size_type n) const { - return find_first_not_of(BasicStringPiece(s, n), pos); + size_type count) const { + return find_first_not_of(BasicStringPiece(s, count), pos); } - - constexpr size_type find_first_not_of(const value_type* s, + constexpr size_type find_first_not_of(const CharT* s, size_type pos = 0) const { return find_first_not_of(BasicStringPiece(s), pos); } - // find_last_not_of: Find the last occurrence not of a set of characters. - constexpr size_type find_last_not_of(BasicStringPiece s, + constexpr size_type find_last_not_of(BasicStringPiece v, size_type pos = npos) const noexcept { - return internal::find_last_not_of(*this, s, pos); - } + if (is_constant_evaluated()) { + if (empty()) + return npos; + for (size_type p = std::min(pos, size() - 1);; --p) { + if (v.find(data()[p]) == npos) + return p; + if (!p) + break; + } + return npos; + } - constexpr size_type find_last_not_of(value_type c, + return internal::find_last_not_of(*this, v, pos); + } + constexpr size_type find_last_not_of(CharT c, size_type pos = npos) const noexcept { - return internal::find_last_not_of(*this, c, pos); - } + if (empty()) + return npos; - constexpr size_type find_last_not_of(const value_type* s, + for (size_t i = std::min(pos, size() - 1);; --i) { + if (data()[i] != c) + return i; + if (i == 0) + break; + } + return npos; + } + constexpr size_type find_last_not_of(const CharT* s, size_type pos, - size_type n) const { - return find_last_not_of(BasicStringPiece(s, n), pos); + size_type count) const { + return find_last_not_of(BasicStringPiece(s, count), pos); } - - constexpr size_type find_last_not_of(const value_type* s, + constexpr size_type find_last_not_of(const CharT* s, size_type pos = npos) const { return find_last_not_of(BasicStringPiece(s), pos); } + static constexpr size_type npos = size_type(-1); + protected: - const value_type* ptr_; + const_pointer ptr_; size_type length_; }; -template <typename STRING_TYPE> -const typename BasicStringPiece<STRING_TYPE>::size_type -BasicStringPiece<STRING_TYPE>::npos = - typename BasicStringPiece<STRING_TYPE>::size_type(-1); +// static +template <typename CharT, typename Traits> +const typename BasicStringPiece<CharT, Traits>::size_type + BasicStringPiece<CharT, Traits>::npos; // MSVC doesn't like complex extern templates and DLLs. #if !defined(COMPILER_MSVC) -extern template class BASE_EXPORT BasicStringPiece<std::string>; -extern template class BASE_EXPORT BasicStringPiece<string16>; +extern template class BASE_EXPORT BasicStringPiece<char>; +extern template class BASE_EXPORT BasicStringPiece<char16_t>; #endif -// Comparison operators -------------------------------------------------------- -// operator == -template <typename StringT> -constexpr bool operator==(BasicStringPiece<StringT> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits> +constexpr bool operator==(BasicStringPiece<CharT, Traits> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; } - -// Here and below we make use of std::common_type_t to emulate an identity type -// transformation. This creates a non-deduced context, so that we can compare -// StringPieces with types that implicitly convert to StringPieces. See -// https://wg21.link/n3766 for details. +// Here and below we make use of std::common_type_t to emulate +// std::type_identity (part of C++20). This creates a non-deduced context, so +// that we can compare StringPieces with types that implicitly convert to +// StringPieces. See https://wg21.link/n3766 for details. // Furthermore, we require dummy template parameters for these overloads to work // around a name mangling issue on Windows. -template <typename StringT, int = 1> +template <typename CharT, typename Traits, int = 1> constexpr bool operator==( - BasicStringPiece<StringT> lhs, - std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + BasicStringPiece<CharT, Traits> lhs, + std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept { + return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; +} +template <typename CharT, typename Traits, int = 2> +constexpr bool operator==( + std::common_type_t<BasicStringPiece<CharT, Traits>> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; } -template <typename StringT, int = 2> -constexpr bool operator==(std::common_type_t<BasicStringPiece<StringT>> lhs, - BasicStringPiece<StringT> rhs) noexcept { - return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; -} - -// operator != -template <typename StringT> -constexpr bool operator!=(BasicStringPiece<StringT> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits> +constexpr bool operator!=(BasicStringPiece<CharT, Traits> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return !(lhs == rhs); } - -template <typename StringT, int = 1> +template <typename CharT, typename Traits, int = 1> constexpr bool operator!=( - BasicStringPiece<StringT> lhs, - std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + BasicStringPiece<CharT, Traits> lhs, + std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept { + return !(lhs == rhs); +} +template <typename CharT, typename Traits, int = 2> +constexpr bool operator!=( + std::common_type_t<BasicStringPiece<CharT, Traits>> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return !(lhs == rhs); } -template <typename StringT, int = 2> -constexpr bool operator!=(std::common_type_t<BasicStringPiece<StringT>> lhs, - BasicStringPiece<StringT> rhs) noexcept { - return !(lhs == rhs); -} - -// operator < -template <typename StringT> -constexpr bool operator<(BasicStringPiece<StringT> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits> +constexpr bool operator<(BasicStringPiece<CharT, Traits> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return lhs.compare(rhs) < 0; } - -template <typename StringT, int = 1> +template <typename CharT, typename Traits, int = 1> constexpr bool operator<( - BasicStringPiece<StringT> lhs, - std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + BasicStringPiece<CharT, Traits> lhs, + std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept { return lhs.compare(rhs) < 0; } -template <typename StringT, int = 2> -constexpr bool operator<(std::common_type_t<BasicStringPiece<StringT>> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits, int = 2> +constexpr bool operator<( + std::common_type_t<BasicStringPiece<CharT, Traits>> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return lhs.compare(rhs) < 0; } -// operator > -template <typename StringT> -constexpr bool operator>(BasicStringPiece<StringT> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits> +constexpr bool operator>(BasicStringPiece<CharT, Traits> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return rhs < lhs; } - -template <typename StringT, int = 1> +template <typename CharT, typename Traits, int = 1> constexpr bool operator>( - BasicStringPiece<StringT> lhs, - std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + BasicStringPiece<CharT, Traits> lhs, + std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept { + return rhs < lhs; +} +template <typename CharT, typename Traits, int = 2> +constexpr bool operator>( + std::common_type_t<BasicStringPiece<CharT, Traits>> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return rhs < lhs; } -template <typename StringT, int = 2> -constexpr bool operator>(std::common_type_t<BasicStringPiece<StringT>> lhs, - BasicStringPiece<StringT> rhs) noexcept { - return rhs < lhs; -} - -// operator <= -template <typename StringT> -constexpr bool operator<=(BasicStringPiece<StringT> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits> +constexpr bool operator<=(BasicStringPiece<CharT, Traits> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return !(rhs < lhs); } - -template <typename StringT, int = 1> +template <typename CharT, typename Traits, int = 1> constexpr bool operator<=( - BasicStringPiece<StringT> lhs, - std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + BasicStringPiece<CharT, Traits> lhs, + std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept { + return !(rhs < lhs); +} +template <typename CharT, typename Traits, int = 2> +constexpr bool operator<=( + std::common_type_t<BasicStringPiece<CharT, Traits>> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return !(rhs < lhs); } -template <typename StringT, int = 2> -constexpr bool operator<=(std::common_type_t<BasicStringPiece<StringT>> lhs, - BasicStringPiece<StringT> rhs) noexcept { - return !(rhs < lhs); -} - -// operator >= -template <typename StringT> -constexpr bool operator>=(BasicStringPiece<StringT> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits> +constexpr bool operator>=(BasicStringPiece<CharT, Traits> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return !(lhs < rhs); } - -template <typename StringT, int = 1> +template <typename CharT, typename Traits, int = 1> constexpr bool operator>=( - BasicStringPiece<StringT> lhs, - std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + BasicStringPiece<CharT, Traits> lhs, + std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept { return !(lhs < rhs); } - -template <typename StringT, int = 2> -constexpr bool operator>=(std::common_type_t<BasicStringPiece<StringT>> lhs, - BasicStringPiece<StringT> rhs) noexcept { +template <typename CharT, typename Traits, int = 2> +constexpr bool operator>=( + std::common_type_t<BasicStringPiece<CharT, Traits>> lhs, + BasicStringPiece<CharT, Traits> rhs) noexcept { return !(lhs < rhs); } BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece piece); +// Not in the STL: convenience functions to output non-UTF-8 strings to an +// 8-bit-width stream. BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece16 piece); - -#if !defined(WCHAR_T_IS_UTF16) BASE_EXPORT std::ostream& operator<<(std::ostream& o, WStringPiece piece); -#endif -// Hashing --------------------------------------------------------------------- +// Intentionally omitted (since Chromium does not use character literals): +// operator""sv. -// We provide appropriate hash functions so StringPiece and StringPiece16 can -// be used as keys in hash sets and maps. - -// This hash function is copied from base/strings/string16.h. We don't use the -// ones already defined for string and string16 directly because it would -// require the string constructors to be called, which we don't want. - +// Stand-ins for the STL's std::hash<> specializations. template <typename StringPieceType> struct StringPieceHashImpl { + // This is a custom hash function. We don't use the ones already defined for + // string and std::u16string directly because it would require the string + // constructors to be called, which we don't want. std::size_t operator()(StringPieceType sp) const { std::size_t result = 0; for (auto c : sp) @@ -553,7 +617,6 @@ return result; } }; - using StringPieceHash = StringPieceHashImpl<StringPiece>; using StringPiece16Hash = StringPieceHashImpl<StringPiece16>; using WStringPieceHash = StringPieceHashImpl<WStringPiece>;
diff --git a/base/strings/string_piece_forward.h b/base/strings/string_piece_forward.h index aa79117..ce7e489 100644 --- a/base/strings/string_piece_forward.h +++ b/base/strings/string_piece_forward.h
@@ -1,23 +1,21 @@ // Copyright 2017 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. - -// Forward declaration of StringPiece types from base/strings/string_piece.h +// +// Forward declaration of StringPiece types from base/strings/string_piece.h. #ifndef BASE_STRINGS_STRING_PIECE_FORWARD_H_ #define BASE_STRINGS_STRING_PIECE_FORWARD_H_ #include <string> -#include "base/strings/string16.h" - namespace gurl_base { -template <typename STRING_TYPE> +template <typename CharT, typename Traits = std::char_traits<CharT>> class BasicStringPiece; -typedef BasicStringPiece<std::string> StringPiece; -typedef BasicStringPiece<string16> StringPiece16; -typedef BasicStringPiece<std::wstring> WStringPiece; +using StringPiece = BasicStringPiece<char>; +using StringPiece16 = BasicStringPiece<char16_t>; +using WStringPiece = BasicStringPiece<wchar_t>; } // namespace base
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc index 76de8f7..c6b1f29 100644 --- a/base/strings/string_piece_unittest.cc +++ b/base/strings/string_piece_unittest.cc
@@ -6,44 +6,41 @@ #include <string> -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" namespace gurl_base { -template <typename T> +template <typename CharT> class CommonStringPieceTest : public ::testing::Test { public: - static const T as_string(const char* input) { - return T(input); - } - static const T& as_string(const T& input) { + static std::string as_string(const char* input) { return input; } + static const std::string& as_string(const std::string& input) { return input; } }; template <> -class CommonStringPieceTest<string16> : public ::testing::Test { +class CommonStringPieceTest<char16_t> : public ::testing::Test { public: - static const string16 as_string(const char* input) { - return ASCIIToUTF16(input); + static std::u16string as_string(const char* input) { + return UTF8ToUTF16(input); } - static const string16 as_string(const std::string& input) { - return ASCIIToUTF16(input); + static std::u16string as_string(const std::string& input) { + return UTF8ToUTF16(input); } }; -typedef ::testing::Types<std::string, string16> SupportedStringTypes; +typedef ::testing::Types<char, char16_t> SupportedCharTypes; -TYPED_TEST_SUITE(CommonStringPieceTest, SupportedStringTypes); +TYPED_TEST_SUITE(CommonStringPieceTest, SupportedCharTypes); TYPED_TEST(CommonStringPieceTest, CheckComparisonOperators) { #define CMP_Y(op, x, y) \ { \ - TypeParam lhs(TestFixture::as_string(x)); \ - TypeParam rhs(TestFixture::as_string(y)); \ + std::basic_string<TypeParam> lhs(TestFixture::as_string(x)); \ + std::basic_string<TypeParam> rhs(TestFixture::as_string(y)); \ ASSERT_TRUE((BasicStringPiece<TypeParam>((lhs.c_str())) \ op BasicStringPiece<TypeParam>((rhs.c_str())))); \ ASSERT_TRUE(BasicStringPiece<TypeParam>(lhs) op rhs); \ @@ -55,8 +52,8 @@ #define CMP_N(op, x, y) \ { \ - TypeParam lhs(TestFixture::as_string(x)); \ - TypeParam rhs(TestFixture::as_string(y)); \ + std::basic_string<TypeParam> lhs(TestFixture::as_string(x)); \ + std::basic_string<TypeParam> rhs(TestFixture::as_string(y)); \ ASSERT_FALSE((BasicStringPiece<TypeParam>((lhs.c_str())) \ op BasicStringPiece<TypeParam>((rhs.c_str())))); \ ASSERT_FALSE(BasicStringPiece<TypeParam>(lhs) op rhs); \ @@ -141,39 +138,39 @@ } TYPED_TEST(CommonStringPieceTest, CheckSTL) { - TypeParam alphabet(TestFixture::as_string("abcdefghijklmnopqrstuvwxyz")); - TypeParam abc(TestFixture::as_string("abc")); - TypeParam xyz(TestFixture::as_string("xyz")); - TypeParam foobar(TestFixture::as_string("foobar")); + std::basic_string<TypeParam> alphabet( + TestFixture::as_string("abcdefghijklmnopqrstuvwxyz")); + std::basic_string<TypeParam> abc(TestFixture::as_string("abc")); + std::basic_string<TypeParam> xyz(TestFixture::as_string("xyz")); + std::basic_string<TypeParam> foobar(TestFixture::as_string("foobar")); BasicStringPiece<TypeParam> a(alphabet); BasicStringPiece<TypeParam> b(abc); BasicStringPiece<TypeParam> c(xyz); BasicStringPiece<TypeParam> d(foobar); BasicStringPiece<TypeParam> e; - TypeParam temp(TestFixture::as_string("123")); - temp += static_cast<typename TypeParam::value_type>(0); + std::basic_string<TypeParam> temp(TestFixture::as_string("123")); + temp += static_cast<TypeParam>(0); temp += TestFixture::as_string("456"); BasicStringPiece<TypeParam> f(temp); - ASSERT_EQ(a[6], static_cast<typename TypeParam::value_type>('g')); - ASSERT_EQ(b[0], static_cast<typename TypeParam::value_type>('a')); - ASSERT_EQ(c[2], static_cast<typename TypeParam::value_type>('z')); - ASSERT_EQ(f[3], static_cast<typename TypeParam::value_type>('\0')); - ASSERT_EQ(f[5], static_cast<typename TypeParam::value_type>('5')); + ASSERT_EQ(a[6], static_cast<TypeParam>('g')); + ASSERT_EQ(b[0], static_cast<TypeParam>('a')); + ASSERT_EQ(c[2], static_cast<TypeParam>('z')); + ASSERT_EQ(f[3], static_cast<TypeParam>('\0')); + ASSERT_EQ(f[5], static_cast<TypeParam>('5')); - ASSERT_EQ(*d.data(), static_cast<typename TypeParam::value_type>('f')); - ASSERT_EQ(d.data()[5], static_cast<typename TypeParam::value_type>('r')); + ASSERT_EQ(*d.data(), static_cast<TypeParam>('f')); + ASSERT_EQ(d.data()[5], static_cast<TypeParam>('r')); ASSERT_EQ(e.data(), nullptr); - ASSERT_EQ(*a.begin(), static_cast<typename TypeParam::value_type>('a')); - ASSERT_EQ(*(b.begin() + 2), static_cast<typename TypeParam::value_type>('c')); - ASSERT_EQ(*(c.end() - 1), static_cast<typename TypeParam::value_type>('z')); + ASSERT_EQ(*a.begin(), static_cast<TypeParam>('a')); + ASSERT_EQ(*(b.begin() + 2), static_cast<TypeParam>('c')); + ASSERT_EQ(*(c.end() - 1), static_cast<TypeParam>('z')); - ASSERT_EQ(*a.rbegin(), static_cast<typename TypeParam::value_type>('z')); - ASSERT_EQ(*(b.rbegin() + 2), - static_cast<typename TypeParam::value_type>('a')); - ASSERT_EQ(*(c.rend() - 1), static_cast<typename TypeParam::value_type>('x')); + ASSERT_EQ(*a.rbegin(), static_cast<TypeParam>('z')); + ASSERT_EQ(*(b.rbegin() + 2), static_cast<TypeParam>('a')); + ASSERT_EQ(*(c.rend() - 1), static_cast<TypeParam>('x')); ASSERT_EQ(a.rbegin() + 26, a.rend()); ASSERT_EQ(a.size(), 26U); @@ -196,17 +193,17 @@ ASSERT_EQ(d.data(), nullptr); ASSERT_EQ(d.begin(), d.end()); - ASSERT_GE(a.max_size(), a.capacity()); - ASSERT_GE(a.capacity(), a.size()); + ASSERT_GE(a.max_size(), a.size()); } TYPED_TEST(CommonStringPieceTest, CheckFind) { typedef BasicStringPiece<TypeParam> Piece; - TypeParam alphabet(TestFixture::as_string("abcdefghijklmnopqrstuvwxyz")); - TypeParam abc(TestFixture::as_string("abc")); - TypeParam xyz(TestFixture::as_string("xyz")); - TypeParam foobar(TestFixture::as_string("foobar")); + std::basic_string<TypeParam> alphabet( + TestFixture::as_string("abcdefghijklmnopqrstuvwxyz")); + std::basic_string<TypeParam> abc(TestFixture::as_string("abc")); + std::basic_string<TypeParam> xyz(TestFixture::as_string("xyz")); + std::basic_string<TypeParam> foobar(TestFixture::as_string("foobar")); BasicStringPiece<TypeParam> a(alphabet); BasicStringPiece<TypeParam> b(abc); @@ -215,12 +212,12 @@ d = Piece(); Piece e; - TypeParam temp(TestFixture::as_string("123")); + std::basic_string<TypeParam> temp(TestFixture::as_string("123")); temp.push_back('\0'); temp += TestFixture::as_string("456"); Piece f(temp); - typename TypeParam::value_type buf[4] = { '%', '%', '%', '%' }; + TypeParam buf[4] = {'%', '%', '%', '%'}; ASSERT_EQ(a.copy(buf, 4), 4U); ASSERT_EQ(buf[0], a[0]); ASSERT_EQ(buf[1], a[1]); @@ -237,7 +234,7 @@ ASSERT_EQ(buf[2], c[2]); ASSERT_EQ(buf[3], a[3]); - ASSERT_EQ(Piece::npos, TypeParam::npos); + ASSERT_EQ(Piece::npos, std::basic_string<TypeParam>::npos); ASSERT_EQ(a.find(b), 0U); ASSERT_EQ(a.find(b, 1), Piece::npos); @@ -250,7 +247,8 @@ ASSERT_EQ(a.find(e), 0U); ASSERT_EQ(a.find(d, 12), 12U); ASSERT_EQ(a.find(e, 17), 17U); - TypeParam not_found(TestFixture::as_string("xx not found bb")); + std::basic_string<TypeParam> not_found( + TestFixture::as_string("xx not found bb")); Piece g(not_found); ASSERT_EQ(a.find(g), Piece::npos); // empty string nonsense @@ -259,7 +257,8 @@ ASSERT_EQ(d.find(b, 4), Piece::npos); ASSERT_EQ(e.find(b, 7), Piece::npos); - size_t empty_search_pos = TypeParam().find(TypeParam()); + size_t empty_search_pos = + std::basic_string<TypeParam>().find(std::basic_string<TypeParam>()); ASSERT_EQ(d.find(d), empty_search_pos); ASSERT_EQ(d.find(e), empty_search_pos); ASSERT_EQ(e.find(d), empty_search_pos); @@ -269,7 +268,7 @@ ASSERT_EQ(e.find(d, 4), std::string().find(std::string(), 4)); ASSERT_EQ(e.find(e, 4), std::string().find(std::string(), 4)); - constexpr typename TypeParam::value_type kNul = '\0'; + constexpr TypeParam kNul = '\0'; ASSERT_EQ(a.find('a'), 0U); ASSERT_EQ(a.find('c'), 2U); ASSERT_EQ(a.find('z'), 25U); @@ -296,8 +295,6 @@ ASSERT_EQ(a.find(c.data(), 9, 0), 9U); ASSERT_EQ(a.find(c.data(), Piece::npos, 0), Piece::npos); ASSERT_EQ(b.find(c.data(), Piece::npos, 0), Piece::npos); - ASSERT_EQ(a.find(d.data(), 12, 0), 12U); - ASSERT_EQ(a.find(e.data(), 17, 0), 17U); // empty string nonsense ASSERT_EQ(d.find(b.data(), 4, 0), Piece::npos); ASSERT_EQ(e.find(b.data(), 7, 0), Piece::npos); @@ -306,21 +303,10 @@ ASSERT_EQ(a.find(c.data(), 9), 23U); ASSERT_EQ(a.find(c.data(), Piece::npos), Piece::npos); ASSERT_EQ(b.find(c.data(), Piece::npos), Piece::npos); - ASSERT_EQ(a.find(d.data(), 12), 12U); - ASSERT_EQ(a.find(e.data(), 17), 17U); // empty string nonsense ASSERT_EQ(d.find(b.data(), 4), Piece::npos); ASSERT_EQ(e.find(b.data(), 7), Piece::npos); - ASSERT_EQ(d.find(d.data(), 4, 0), - std::string().find(std::string().data(), 4, 0)); - ASSERT_EQ(d.find(e.data(), 4, 1), - std::string().find(std::string().data(), 4, 1)); - ASSERT_EQ(e.find(d.data(), 4, 2), - std::string().find(std::string().data(), 4, 2)); - ASSERT_EQ(e.find(e.data(), 4, 3), - std::string().find(std::string().data(), 4, 3)); - ASSERT_EQ(a.rfind(b), 0U); ASSERT_EQ(a.rfind(b, 1), 0U); ASSERT_EQ(a.rfind(c), 23U); @@ -329,10 +315,14 @@ ASSERT_EQ(a.rfind(c, 0U), Piece::npos); ASSERT_EQ(b.rfind(c), Piece::npos); ASSERT_EQ(b.rfind(c, 0U), Piece::npos); - ASSERT_EQ(a.rfind(d), static_cast<size_t>(a.rfind(TypeParam()))); - ASSERT_EQ(a.rfind(e), a.rfind(TypeParam())); - ASSERT_EQ(a.rfind(d), static_cast<size_t>(TypeParam(a).rfind(TypeParam()))); - ASSERT_EQ(a.rfind(e), TypeParam(a).rfind(TypeParam())); + ASSERT_EQ(a.rfind(d), + static_cast<size_t>(a.rfind(std::basic_string<TypeParam>()))); + ASSERT_EQ(a.rfind(e), a.rfind(std::basic_string<TypeParam>())); + ASSERT_EQ(a.rfind(d), + static_cast<size_t>(std::basic_string<TypeParam>(a).rfind( + std::basic_string<TypeParam>()))); + ASSERT_EQ(a.rfind(e), std::basic_string<TypeParam>(a).rfind( + std::basic_string<TypeParam>())); ASSERT_EQ(a.rfind(d, 12), 12U); ASSERT_EQ(a.rfind(e, 17), 17U); ASSERT_EQ(a.rfind(g), Piece::npos); @@ -370,19 +360,12 @@ ASSERT_EQ(a.rfind(c.data(), 1U, 0), 1U); ASSERT_EQ(a.rfind(c.data(), 0U, 0), 0U); ASSERT_EQ(b.rfind(c.data(), 0U, 0), 0U); - ASSERT_EQ(a.rfind(d.data(), 12, 0), 12U); - ASSERT_EQ(a.rfind(e.data(), 17, 0), 17U); ASSERT_EQ(d.rfind(b.data(), 4, 0), 0U); ASSERT_EQ(e.rfind(b.data(), 7, 0), 0U); - // empty string nonsense - ASSERT_EQ(d.rfind(d.data(), 4), std::string().rfind(std::string())); - ASSERT_EQ(e.rfind(d.data(), 7), std::string().rfind(std::string())); - ASSERT_EQ(d.rfind(e.data(), 4), std::string().rfind(std::string())); - ASSERT_EQ(e.rfind(e.data(), 7), std::string().rfind(std::string())); - - TypeParam one_two_three_four(TestFixture::as_string("one,two:three;four")); - TypeParam comma_colon(TestFixture::as_string(",:")); + std::basic_string<TypeParam> one_two_three_four( + TestFixture::as_string("one,two:three;four")); + std::basic_string<TypeParam> comma_colon(TestFixture::as_string(",:")); ASSERT_EQ(3U, Piece(one_two_three_four).find_first_of(comma_colon)); ASSERT_EQ(a.find_first_of(b), 0U); ASSERT_EQ(a.find_first_of(b, 0), 0U); @@ -416,6 +399,10 @@ ASSERT_EQ(a.find_first_not_of(f), 0U); ASSERT_EQ(a.find_first_not_of(d), 0U); ASSERT_EQ(a.find_first_not_of(e), 0U); + ASSERT_EQ(a.find_first_not_of(d, 1), 1U); + ASSERT_EQ(a.find_first_not_of(e, 1), 1U); + ASSERT_EQ(a.find_first_not_of(d, a.size()), Piece::npos); + ASSERT_EQ(a.find_first_not_of(e, a.size()), Piece::npos); // empty string nonsense ASSERT_EQ(d.find_first_not_of(a), Piece::npos); ASSERT_EQ(e.find_first_not_of(a), Piece::npos); @@ -424,7 +411,7 @@ ASSERT_EQ(d.find_first_not_of(e), Piece::npos); ASSERT_EQ(e.find_first_not_of(e), Piece::npos); - TypeParam equals(TestFixture::as_string("====")); + std::basic_string<TypeParam> equals(TestFixture::as_string("====")); Piece h(equals); ASSERT_EQ(h.find_first_not_of('='), Piece::npos); ASSERT_EQ(h.find_first_not_of('=', 3), Piece::npos); @@ -440,7 +427,7 @@ ASSERT_EQ(e.find_first_not_of(kNul), Piece::npos); // Piece g("xx not found bb"); - TypeParam fifty_six(TestFixture::as_string("56")); + std::basic_string<TypeParam> fifty_six(TestFixture::as_string("56")); Piece i(fifty_six); ASSERT_EQ(h.find_last_of(a), Piece::npos); ASSERT_EQ(g.find_last_of(a), g.size()-1); @@ -525,14 +512,14 @@ } TYPED_TEST(CommonStringPieceTest, CheckCustom) { - TypeParam foobar(TestFixture::as_string("foobar")); + std::basic_string<TypeParam> foobar(TestFixture::as_string("foobar")); BasicStringPiece<TypeParam> a(foobar); - TypeParam s1(TestFixture::as_string("123")); - s1 += static_cast<typename TypeParam::value_type>('\0'); + std::basic_string<TypeParam> s1(TestFixture::as_string("123")); + s1 += static_cast<TypeParam>('\0'); s1 += TestFixture::as_string("456"); BasicStringPiece<TypeParam> b(s1); BasicStringPiece<TypeParam> e; - TypeParam s2; + std::basic_string<TypeParam> s2; // remove_prefix BasicStringPiece<TypeParam> c(a); @@ -565,9 +552,10 @@ ASSERT_NE(c, a); // operator STRING_TYPE() - TypeParam s5(TypeParam(a).c_str(), 7); // Note, has an embedded NULL + std::basic_string<TypeParam> s5(std::basic_string<TypeParam>(a).c_str(), + 7); // Note, has an embedded NULL ASSERT_EQ(c, s5); - TypeParam s6(e); + std::basic_string<TypeParam> s6(e); ASSERT_TRUE(s6.empty()); } @@ -594,15 +582,18 @@ ASSERT_EQ(s.data(), nullptr); ASSERT_EQ(s.size(), 0U); - TypeParam str(s); + std::basic_string<TypeParam> str(s); ASSERT_EQ(str.length(), 0U); - ASSERT_EQ(str, TypeParam()); + ASSERT_EQ(str, std::basic_string<TypeParam>()); } TYPED_TEST(CommonStringPieceTest, CheckComparisons2) { - TypeParam alphabet(TestFixture::as_string("abcdefghijklmnopqrstuvwxyz")); - TypeParam alphabet_z(TestFixture::as_string("abcdefghijklmnopqrstuvwxyzz")); - TypeParam alphabet_y(TestFixture::as_string("abcdefghijklmnopqrstuvwxyy")); + std::basic_string<TypeParam> alphabet( + TestFixture::as_string("abcdefghijklmnopqrstuvwxyz")); + std::basic_string<TypeParam> alphabet_z( + TestFixture::as_string("abcdefghijklmnopqrstuvwxyzz")); + std::basic_string<TypeParam> alphabet_y( + TestFixture::as_string("abcdefghijklmnopqrstuvwxyy")); BasicStringPiece<TypeParam> abc(alphabet); // check comparison operations on strings longer than 4 bytes. @@ -624,38 +615,37 @@ } TYPED_TEST(CommonStringPieceTest, HeterogenousStringPieceEquals) { - TypeParam hello(TestFixture::as_string("hello")); + std::basic_string<TypeParam> hello(TestFixture::as_string("hello")); ASSERT_EQ(BasicStringPiece<TypeParam>(hello), hello); ASSERT_EQ(hello.c_str(), BasicStringPiece<TypeParam>(hello)); } -// string16-specific stuff +// std::u16string-specific stuff TEST(StringPiece16Test, CheckSTL) { // Check some non-ascii characters. - string16 fifth(ASCIIToUTF16("123")); + std::u16string fifth(u"123"); fifth.push_back(0x0000); fifth.push_back(0xd8c5); fifth.push_back(0xdffe); StringPiece16 f(fifth); ASSERT_EQ(f[3], '\0'); - ASSERT_EQ(f[5], static_cast<char16>(0xdffe)); + ASSERT_EQ(f[5], 0xdffe); ASSERT_EQ(f.size(), 6U); } - - TEST(StringPiece16Test, CheckConversion) { - // Make sure that we can convert from UTF8 to UTF16 and back. We use a two - // byte character (G clef) to test this. - ASSERT_EQ(UTF16ToUTF8(UTF8ToUTF16("\xf0\x9d\x84\x9e")), "\xf0\x9d\x84\x9e"); + // Make sure that we can convert from UTF8 to UTF16 and back. We use a + // character (G clef) outside the BMP to test this. + const char* kTest = "\U0001D11E"; + ASSERT_EQ(UTF16ToUTF8(UTF8ToUTF16(kTest)), kTest); } TYPED_TEST(CommonStringPieceTest, CheckConstructors) { - TypeParam str(TestFixture::as_string("hello world")); - TypeParam empty; + std::basic_string<TypeParam> str(TestFixture::as_string("hello world")); + std::basic_string<TypeParam> empty; ASSERT_EQ(str, BasicStringPiece<TypeParam>(str)); ASSERT_EQ(str, BasicStringPiece<TypeParam>(str.c_str())); @@ -856,4 +846,46 @@ static_assert(piece.substr(0, 99) == piece, ""); } +TEST(StringPieceTest, Find) { + constexpr StringPiece foobar("foobar", 6); + constexpr StringPiece foo = foobar.substr(0, 3); + constexpr StringPiece bar = foobar.substr(3); + + // find + static_assert(foobar.find(bar, 0) == 3, ""); + static_assert(foobar.find('o', 0) == 1, ""); + static_assert(foobar.find("ox", 0, 1) == 1, ""); + static_assert(foobar.find("ox", 0) == StringPiece::npos, ""); + + // rfind + static_assert(foobar.rfind(bar, 5) == 3, ""); + static_assert(foobar.rfind('o', 5) == 2, ""); + static_assert(foobar.rfind("ox", 5, 1) == 2, ""); + static_assert(foobar.rfind("ox", 5) == StringPiece::npos, ""); + + // find_first_of + static_assert(foobar.find_first_of(foo, 2) == 2, ""); + static_assert(foobar.find_first_of('o', 2) == 2, ""); + static_assert(foobar.find_first_of("ox", 2, 2) == 2, ""); + static_assert(foobar.find_first_of("ox", 2) == 2, ""); + + // find_last_of + static_assert(foobar.find_last_of(foo, 5) == 2, ""); + static_assert(foobar.find_last_of('o', 5) == 2, ""); + static_assert(foobar.find_last_of("ox", 5, 2) == 2, ""); + static_assert(foobar.find_last_of("ox", 5) == 2, ""); + + // find_first_not_of + static_assert(foobar.find_first_not_of(foo, 2) == 3, ""); + static_assert(foobar.find_first_not_of('o', 2) == 3, ""); + static_assert(foobar.find_first_not_of("ox", 2, 2) == 3, ""); + static_assert(foobar.find_first_not_of("ox", 2) == 3, ""); + + // find_last_not_of + static_assert(foobar.find_last_not_of(bar, 5) == 2, ""); + static_assert(foobar.find_last_not_of('a', 4) == 3, ""); + static_assert(foobar.find_last_not_of("ox", 2, 2) == 0, ""); + static_assert(foobar.find_last_not_of("ox", 2) == 0, ""); +} + } // namespace base
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc index 40dedb7..24b4a21 100644 --- a/base/strings/string_split.cc +++ b/base/strings/string_split.cc
@@ -55,12 +55,12 @@ result_type); } -std::vector<string16> SplitString(StringPiece16 input, - StringPiece16 separators, - WhitespaceHandling whitespace, - SplitResult result_type) { - return internal::SplitStringT<string16>(input, separators, whitespace, - result_type); +std::vector<std::u16string> SplitString(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringT<std::u16string>(input, separators, whitespace, + result_type); } std::vector<StringPiece> SplitStringPiece(StringPiece input, @@ -110,12 +110,13 @@ return success; } -std::vector<string16> SplitStringUsingSubstr(StringPiece16 input, - StringPiece16 delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - return internal::SplitStringUsingSubstrT<string16>(input, delimiter, - whitespace, result_type); +std::vector<std::u16string> SplitStringUsingSubstr( + StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringUsingSubstrT<std::u16string>( + input, delimiter, whitespace, result_type); } std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
diff --git a/base/strings/string_split.h b/base/strings/string_split.h index 039a049..d7f56a6 100644 --- a/base/strings/string_split.h +++ b/base/strings/string_split.h
@@ -10,7 +10,6 @@ #include <vector> #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "build/build_config.h" @@ -51,11 +50,11 @@ WhitespaceHandling whitespace, SplitResult result_type) WARN_UNUSED_RESULT; -BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input, - StringPiece16 separators, - WhitespaceHandling whitespace, - SplitResult result_type) - WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<std::u16string> SplitString( + StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; // Like SplitString above except it returns a vector of StringPieces which // reference the original buffer without copying. Although you have to be @@ -103,7 +102,7 @@ // Similar to SplitString, but use a substring delimiter instead of a list of // characters that are all possible delimiters. -BASE_EXPORT std::vector<string16> SplitStringUsingSubstr( +BASE_EXPORT std::vector<std::u16string> SplitStringUsingSubstr( StringPiece16 input, StringPiece16 delimiter, WhitespaceHandling whitespace,
diff --git a/base/strings/string_split_internal.h b/base/strings/string_split_internal.h index 9dc3763..4430381 100644 --- a/base/strings/string_split_internal.h +++ b/base/strings/string_split_internal.h
@@ -15,45 +15,46 @@ namespace internal { // Returns either the ASCII or UTF-16 whitespace. -template <typename Str> -BasicStringPiece<Str> WhitespaceForType(); +template <typename CharT> +BasicStringPiece<CharT> WhitespaceForType(); template <> -inline StringPiece16 WhitespaceForType<string16>() { +inline StringPiece16 WhitespaceForType<char16_t>() { return kWhitespaceUTF16; } template <> -inline StringPiece WhitespaceForType<std::string>() { +inline StringPiece WhitespaceForType<char>() { return kWhitespaceASCII; } // General string splitter template. Can take 8- or 16-bit input, can produce // the corresponding string or StringPiece output. -template <typename OutputStringType, typename Str> -static std::vector<OutputStringType> SplitStringT( - BasicStringPiece<Str> str, - BasicStringPiece<Str> delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { +template <typename OutputStringType, + typename T, + typename CharT = typename T::value_type> +static std::vector<OutputStringType> SplitStringT(T str, + T delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { std::vector<OutputStringType> result; if (str.empty()) return result; size_t start = 0; - while (start != Str::npos) { + while (start != std::basic_string<CharT>::npos) { size_t end = str.find_first_of(delimiter, start); - BasicStringPiece<Str> piece; - if (end == Str::npos) { + BasicStringPiece<CharT> piece; + if (end == std::basic_string<CharT>::npos) { piece = str.substr(start); - start = Str::npos; + start = std::basic_string<CharT>::npos; } else { piece = str.substr(start, end - start); start = end + 1; } if (whitespace == TRIM_WHITESPACE) - piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); + piece = TrimString(piece, WhitespaceForType<CharT>(), TRIM_ALL); if (result_type == SPLIT_WANT_ALL || !piece.empty()) result.emplace_back(piece); @@ -61,13 +62,15 @@ return result; } -template <typename OutputStringType, typename Str> +template <typename OutputStringType, + typename T, + typename CharT = typename T::value_type> std::vector<OutputStringType> SplitStringUsingSubstrT( - BasicStringPiece<Str> input, - BasicStringPiece<Str> delimiter, + T input, + T delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - using Piece = BasicStringPiece<Str>; + using Piece = BasicStringPiece<CharT>; using size_type = typename Piece::size_type; std::vector<OutputStringType> result; @@ -84,7 +87,7 @@ : input.substr(begin_index, end_index - begin_index); if (whitespace == TRIM_WHITESPACE) - term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); + term = TrimString(term, WhitespaceForType<CharT>(), TRIM_ALL); if (result_type == SPLIT_WANT_ALL || !term.empty()) result.emplace_back(term);
diff --git a/base/strings/string_split_unittest.cc b/base/strings/string_split_unittest.cc index a3e13fa..5bafec5 100644 --- a/base/strings/string_split_unittest.cc +++ b/base/strings/string_split_unittest.cc
@@ -6,7 +6,6 @@ #include <stddef.h> -#include "base/macros.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "testing/gmock/include/gmock/gmock.h"
diff --git a/base/strings/string_split_win.cc b/base/strings/string_split_win.cc index 297853c..1327769 100644 --- a/base/strings/string_split_win.cc +++ b/base/strings/string_split_win.cc
@@ -12,11 +12,10 @@ namespace gurl_base { -#if defined(BASE_STRING16_IS_STD_U16STRING) namespace internal { template <> -inline WStringPiece WhitespaceForType<std::wstring>() { +inline WStringPiece WhitespaceForType<wchar_t>() { return kWhitespaceWide; } @@ -54,6 +53,5 @@ return internal::SplitStringUsingSubstrT<WStringPiece>( input, delimiter, whitespace, result_type); } -#endif } // namespace base
diff --git a/base/strings/string_split_win.h b/base/strings/string_split_win.h index 080641c..850d2ca 100644 --- a/base/strings/string_split_win.h +++ b/base/strings/string_split_win.h
@@ -10,19 +10,13 @@ #include "polyfills/base/base_export.h" #include "base/compiler_specific.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/string_split.h" namespace gurl_base { // The following section contains overloads of the cross-platform APIs for -// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring -// and gurl_base::string16 are distinct types, as otherwise this would result in an -// ODR violation. -// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is -// std::u16string. -#if defined(BASE_STRING16_IS_STD_U16STRING) +// std::wstring and gurl_base::WStringPiece. BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, WStringPiece separators, WhitespaceHandling whitespace, @@ -46,7 +40,6 @@ WStringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) WARN_UNUSED_RESULT; -#endif } // namespace base
diff --git a/base/strings/string_tokenizer.h b/base/strings/string_tokenizer.h index 7ee0178..14db1e1 100644 --- a/base/strings/string_tokenizer.h +++ b/base/strings/string_tokenizer.h
@@ -9,13 +9,17 @@ #include <string> #include "base/strings/string_piece.h" +#include "base/strings/string_util.h" namespace gurl_base { // StringTokenizerT is a simple string tokenizer class. It works like an // iterator that with each step (see the Advance method) updates members that // refer to the next token in the input string. The user may optionally -// configure the tokenizer to return delimiters. +// configure the tokenizer to return delimiters. For the optional +// WhitespacePolicy parameter, kSkipOver will cause the tokenizer to skip +// over whitespace characters. The tokenizer never stops on a whitespace +// character. // // EXAMPLE 1: // @@ -80,6 +84,23 @@ // } // // +// EXAMPLE 4: +// +// std::string input = "this, \t is, \t a, \t test"; +// StringTokenizer t(input, ",", +// StringTokenizer::WhitespacePolicy::kSkipOver); +// while (t.GetNext()) { +// printf("%s\n", t.token().c_str()); +// } +// +// Output: +// +// this +// is +// a +// test +// +// template <class str, class const_iterator> class StringTokenizerT { public: @@ -96,23 +117,41 @@ RETURN_EMPTY_TOKENS = 1 << 1, }; + // Policy indicating what to do with whitespace characters. Whitespace is + // defined to be the characters indicated here: + // https://www.w3schools.com/jsref/jsref_regexp_whitespace.asp + enum class WhitespacePolicy { + // Whitespace should be treated the same as any other non-delimiter + // character. + kIncludeInTokens, + // Whitespace is skipped over and not included in the resulting token. + // Whitespace will also delimit other tokens, however it is never returned + // even if RETURN_DELIMS is set. If quote chars are set (See set_quote_chars + // below) Whitespace will be included in a token when processing quotes. + kSkipOver, + }; + // The string object must live longer than the tokenizer. In particular, this // should not be constructed with a temporary. The deleted rvalue constructor // blocks the most obvious instances of this (e.g. passing a string literal to // the constructor), but caution must still be exercised. - StringTokenizerT(const str& string, - const str& delims) { - Init(string.begin(), string.end(), delims); + StringTokenizerT( + const str& string, + const str& delims, + WhitespacePolicy whitespace_policy = WhitespacePolicy::kIncludeInTokens) { + Init(string.begin(), string.end(), delims, whitespace_policy); } // Don't allow temporary strings to be used with string tokenizer, since // Init() would otherwise save iterators to a temporary string. StringTokenizerT(str&&, const str& delims) = delete; - StringTokenizerT(const_iterator string_begin, - const_iterator string_end, - const str& delims) { - Init(string_begin, string_end, delims); + StringTokenizerT( + const_iterator string_begin, + const_iterator string_end, + const str& delims, + WhitespacePolicy whitespace_policy = WhitespacePolicy::kIncludeInTokens) { + Init(string_begin, string_end, delims, whitespace_policy); } // Set the options for this tokenizer. By default, this is 0. @@ -151,15 +190,15 @@ const_iterator token_begin() const { return token_begin_; } const_iterator token_end() const { return token_end_; } str token() const { return str(token_begin_, token_end_); } - BasicStringPiece<str> token_piece() const { - return BasicStringPiece<str>(&*token_begin_, - std::distance(token_begin_, token_end_)); + BasicStringPiece<char_type> token_piece() const { + return MakeBasicStringPiece<char_type>(token_begin_, token_end_); } private: void Init(const_iterator string_begin, const_iterator string_end, - const str& delims) { + const str& delims, + WhitespacePolicy whitespace_policy) { start_pos_ = string_begin; token_begin_ = string_begin; token_end_ = string_begin; @@ -167,6 +206,19 @@ delims_ = delims; options_ = 0; token_is_delim_ = true; + whitespace_policy_ = whitespace_policy; + } + + bool ShouldSkip(char_type c) const { + return whitespace_policy_ == WhitespacePolicy::kSkipOver && + IsAsciiWhitespace(c); + } + + // Skip over any contiguous whitespace characters according to the whitespace + // policy. + void SkipWhitespace() { + while (token_end_ != end_ && ShouldSkip(*token_end_)) + ++token_end_; } // Implementation of GetNext() for when we have no quote characters. We have @@ -181,12 +233,16 @@ return false; } ++token_end_; - if (delims_.find(*token_begin_) == str::npos) + if (delims_.find(*token_begin_) == str::npos && + !ShouldSkip(*token_begin_)) { break; - // else skip over delimiter. + } + // else skip over delimiter or skippable character. } - while (token_end_ != end_ && delims_.find(*token_end_) == str::npos) + while (token_end_ != end_ && delims_.find(*token_end_) == str::npos && + !ShouldSkip(*token_end_)) { ++token_end_; + } return true; } @@ -194,6 +250,7 @@ bool FullGetNext() { AdvanceState state; + SkipWhitespace(); for (;;) { if (token_is_delim_) { // Last token was a delimiter. Note: This is also the case at the start. @@ -250,13 +307,9 @@ return false; } - bool IsDelim(char_type c) const { - return delims_.find(c) != str::npos; - } + bool IsDelim(char_type c) const { return delims_.find(c) != str::npos; } - bool IsQuote(char_type c) const { - return quotes_.find(c) != str::npos; - } + bool IsQuote(char_type c) const { return quotes_.find(c) != str::npos; } struct AdvanceState { bool in_quote; @@ -265,7 +318,8 @@ AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {} }; - // Returns true if a delimiter was not hit. + // Returns true if a delimiter or, depending on policy, whitespace was not + // hit. bool AdvanceOne(AdvanceState* state, char_type c) { if (state->in_quote) { if (state->in_escape) { @@ -276,7 +330,7 @@ state->in_quote = false; } } else { - if (IsDelim(c)) + if (IsDelim(c) || ShouldSkip(c)) return false; state->in_quote = IsQuote(state->quote_char = c); } @@ -291,11 +345,13 @@ str quotes_; int options_; bool token_is_delim_; + WhitespacePolicy whitespace_policy_; }; typedef StringTokenizerT<std::string, std::string::const_iterator> StringTokenizer; -typedef StringTokenizerT<string16, string16::const_iterator> String16Tokenizer; +typedef StringTokenizerT<std::u16string, std::u16string::const_iterator> + String16Tokenizer; typedef StringTokenizerT<std::string, const char*> CStringTokenizer; } // namespace base
diff --git a/base/strings/string_tokenizer_unittest.cc b/base/strings/string_tokenizer_unittest.cc index 1665d5d..9cca0c1 100644 --- a/base/strings/string_tokenizer_unittest.cc +++ b/base/strings/string_tokenizer_unittest.cc
@@ -382,6 +382,36 @@ EXPECT_FALSE(t.GetNext()); } +TEST(StringTokenizerTest, ParseWithWhitespace_NoQuotes) { + string input = "\t\t\t foo=a,\r\n b,\r\n\t\t\t bar\t "; + StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver); + + EXPECT_TRUE(t.GetNext()); + EXPECT_EQ("foo=a", t.token()); + + EXPECT_TRUE(t.GetNext()); + EXPECT_EQ("b", t.token()); + + EXPECT_TRUE(t.GetNext()); + EXPECT_EQ("bar", t.token()); + + EXPECT_FALSE(t.GetNext()); +} + +TEST(StringTokenizerTest, ParseWithWhitespace_Quotes) { + string input = "\t\t\t foo='a, b',\t\t\t bar\t "; + StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver); + t.set_quote_chars("'"); + + EXPECT_TRUE(t.GetNext()); + EXPECT_EQ("foo='a, b'", t.token()); + + EXPECT_TRUE(t.GetNext()); + EXPECT_EQ("bar", t.token()); + + EXPECT_FALSE(t.GetNext()); +} + } // namespace } // namespace base
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc index c2e440f..f76d2f7 100644 --- a/base/strings/string_util.cc +++ b/base/strings/string_util.cc
@@ -71,7 +71,7 @@ return internal::ToLowerASCIIImpl(str); } -string16 ToLowerASCII(StringPiece16 str) { +std::u16string ToLowerASCII(StringPiece16 str) { return internal::ToLowerASCIIImpl(str); } @@ -79,7 +79,7 @@ return internal::ToUpperASCIIImpl(str); } -string16 ToUpperASCII(StringPiece16 str) { +std::u16string ToUpperASCII(StringPiece16 str) { return internal::ToUpperASCIIImpl(str); } @@ -106,15 +106,15 @@ return *s; } -const string16& EmptyString16() { - static const gurl_base::NoDestructor<string16> s16; +const std::u16string& EmptyString16() { + static const gurl_base::NoDestructor<std::u16string> s16; return *s16; } bool ReplaceChars(StringPiece16 input, StringPiece16 replace_chars, StringPiece16 replace_with, - string16* output) { + std::u16string* output) { return internal::ReplaceCharsT(input, replace_chars, replace_with, output); } @@ -127,7 +127,7 @@ bool RemoveChars(StringPiece16 input, StringPiece16 remove_chars, - string16* output) { + std::u16string* output) { return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output); } @@ -139,7 +139,7 @@ bool TrimString(StringPiece16 input, StringPiece16 trim_chars, - string16* output) { + std::u16string* output) { return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; } @@ -202,7 +202,7 @@ TrimPositions TrimWhitespace(StringPiece16 input, TrimPositions positions, - string16* output) { + std::u16string* output) { return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); } @@ -225,8 +225,8 @@ positions); } -string16 CollapseWhitespace(StringPiece16 text, - bool trim_sequences_with_line_breaks) { +std::u16string CollapseWhitespace(StringPiece16 text, + bool trim_sequences_with_line_breaks) { return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); } @@ -331,7 +331,7 @@ " PB" }; -string16 FormatBytesUnlocalized(int64_t bytes) { +std::u16string FormatBytesUnlocalized(int64_t bytes) { double unit_amount = static_cast<double>(bytes); size_t dimension = 0; const int kKilo = 1024; @@ -353,12 +353,12 @@ return ASCIIToUTF16(buf); } -void ReplaceFirstSubstringAfterOffset(string16* str, +void ReplaceFirstSubstringAfterOffset(std::u16string* str, size_t start_offset, StringPiece16 find_this, StringPiece16 replace_with) { internal::DoReplaceMatchesAfterOffset( - str, start_offset, internal::SubstringMatcher<string16>{find_this}, + str, start_offset, internal::MakeSubstringMatcher(find_this), replace_with, internal::ReplaceType::REPLACE_FIRST); } @@ -367,16 +367,16 @@ StringPiece find_this, StringPiece replace_with) { internal::DoReplaceMatchesAfterOffset( - str, start_offset, internal::SubstringMatcher<std::string>{find_this}, + str, start_offset, internal::MakeSubstringMatcher(find_this), replace_with, internal::ReplaceType::REPLACE_FIRST); } -void ReplaceSubstringsAfterOffset(string16* str, +void ReplaceSubstringsAfterOffset(std::u16string* str, size_t start_offset, StringPiece16 find_this, StringPiece16 replace_with) { internal::DoReplaceMatchesAfterOffset( - str, start_offset, internal::SubstringMatcher<string16>{find_this}, + str, start_offset, internal::MakeSubstringMatcher(find_this), replace_with, internal::ReplaceType::REPLACE_ALL); } @@ -385,7 +385,7 @@ StringPiece find_this, StringPiece replace_with) { internal::DoReplaceMatchesAfterOffset( - str, start_offset, internal::SubstringMatcher<std::string>{find_this}, + str, start_offset, internal::MakeSubstringMatcher(find_this), replace_with, internal::ReplaceType::REPLACE_ALL); } @@ -393,7 +393,7 @@ return internal::WriteIntoT(str, length_with_null); } -char16* WriteInto(string16* str, size_t length_with_null) { +char16_t* WriteInto(std::u16string* str, size_t length_with_null) { return internal::WriteIntoT(str, length_with_null); } @@ -401,7 +401,8 @@ return internal::JoinStringT(parts, separator); } -string16 JoinString(span<const string16> parts, StringPiece16 separator) { +std::u16string JoinString(span<const std::u16string> parts, + StringPiece16 separator) { return internal::JoinStringT(parts, separator); } @@ -409,7 +410,8 @@ return internal::JoinStringT(parts, separator); } -string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) { +std::u16string JoinString(span<const StringPiece16> parts, + StringPiece16 separator) { return internal::JoinStringT(parts, separator); } @@ -418,14 +420,15 @@ return internal::JoinStringT(parts, separator); } -string16 JoinString(std::initializer_list<StringPiece16> parts, - StringPiece16 separator) { +std::u16string JoinString(std::initializer_list<StringPiece16> parts, + StringPiece16 separator) { return internal::JoinStringT(parts, separator); } -string16 ReplaceStringPlaceholders(StringPiece16 format_string, - const std::vector<string16>& subst, - std::vector<size_t>* offsets) { +std::u16string ReplaceStringPlaceholders( + StringPiece16 format_string, + const std::vector<std::u16string>& subst, + std::vector<size_t>* offsets) { return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } @@ -435,11 +438,12 @@ return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } -string16 ReplaceStringPlaceholders(const string16& format_string, - const string16& a, - size_t* offset) { +std::u16string ReplaceStringPlaceholders(const std::u16string& format_string, + const std::u16string& a, + size_t* offset) { std::vector<size_t> offsets; - string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets); + std::u16string result = + ReplaceStringPlaceholders(format_string, {a}, &offsets); GURL_DCHECK_EQ(1U, offsets.size()); if (offset)
diff --git a/base/strings/string_util.h b/base/strings/string_util.h index a1e5c59..ccbf745 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h
@@ -20,8 +20,6 @@ #include "polyfills/base/base_export.h" #include "base/compiler_specific.h" #include "base/containers/span.h" -#include "base/stl_util.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" // For implicit conversions. #include "build/build_config.h" @@ -86,27 +84,27 @@ // Simplified implementation of C++20's std::basic_string_view(It, End). // Reference: https://wg21.link/string.view.cons -template <typename StringT, typename Iter> -constexpr BasicStringPiece<StringT> MakeBasicStringPiece(Iter begin, Iter end) { +template <typename CharT, typename Iter> +constexpr BasicStringPiece<CharT> MakeBasicStringPiece(Iter begin, Iter end) { GURL_DCHECK_GE(end - begin, 0); - return {gurl_base::to_address(begin), end - begin}; + return {gurl_base::to_address(begin), static_cast<size_t>(end - begin)}; } // Explicit instantiations of MakeBasicStringPiece for the BasicStringPiece // aliases defined in base/strings/string_piece_forward.h template <typename Iter> constexpr StringPiece MakeStringPiece(Iter begin, Iter end) { - return MakeBasicStringPiece<std::string>(begin, end); + return MakeBasicStringPiece<char>(begin, end); } template <typename Iter> constexpr StringPiece16 MakeStringPiece16(Iter begin, Iter end) { - return MakeBasicStringPiece<string16>(begin, end); + return MakeBasicStringPiece<char16_t>(begin, end); } template <typename Iter> constexpr WStringPiece MakeWStringPiece(Iter begin, Iter end) { - return MakeBasicStringPiece<std::wstring>(begin, end); + return MakeBasicStringPiece<wchar_t>(begin, end); } // ASCII-specific tolower. The standard library's tolower is locale sensitive, @@ -127,11 +125,11 @@ // Converts the given string to it's ASCII-lowercase equivalent. BASE_EXPORT std::string ToLowerASCII(StringPiece str); -BASE_EXPORT string16 ToLowerASCII(StringPiece16 str); +BASE_EXPORT std::u16string ToLowerASCII(StringPiece16 str); // Converts the given string to it's ASCII-uppercase equivalent. BASE_EXPORT std::string ToUpperASCII(StringPiece str); -BASE_EXPORT string16 ToUpperASCII(StringPiece16 str); +BASE_EXPORT std::u16string ToUpperASCII(StringPiece16 str); // Functor for case-insensitive ASCII comparisons for STL algorithms like // std::search. @@ -178,16 +176,17 @@ // These should not be used as initializers, function arguments, or return // values for functions which return by value or outparam. BASE_EXPORT const std::string& EmptyString(); -BASE_EXPORT const string16& EmptyString16(); +BASE_EXPORT const std::u16string& EmptyString16(); // Contains the set of characters representing whitespace in the corresponding // encoding. Null-terminated. The ASCII versions are the whitespaces as defined // by HTML5, and don't include control characters. BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode. -BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode. -BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF. +BASE_EXPORT extern const char16_t kWhitespaceUTF16[]; // Includes Unicode. +BASE_EXPORT extern const char16_t + kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF. BASE_EXPORT extern const char kWhitespaceASCII[]; -BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode. +BASE_EXPORT extern const char16_t kWhitespaceASCIIAs16[]; // No unicode. // Null-terminated string representing the UTF-8 byte order mark. BASE_EXPORT extern const char kUtf8ByteOrderMark[]; @@ -197,7 +196,7 @@ // NOTE: Safe to use the same variable for both |input| and |output|. BASE_EXPORT bool RemoveChars(StringPiece16 input, StringPiece16 remove_chars, - string16* output); + std::u16string* output); BASE_EXPORT bool RemoveChars(StringPiece input, StringPiece remove_chars, std::string* output); @@ -210,7 +209,7 @@ BASE_EXPORT bool ReplaceChars(StringPiece16 input, StringPiece16 replace_chars, StringPiece16 replace_with, - string16* output); + std::u16string* output); BASE_EXPORT bool ReplaceChars(StringPiece input, StringPiece replace_chars, StringPiece replace_with, @@ -231,7 +230,7 @@ // the normal usage to trim in-place). BASE_EXPORT bool TrimString(StringPiece16 input, StringPiece16 trim_chars, - string16* output); + std::u16string* output); BASE_EXPORT bool TrimString(StringPiece input, StringPiece trim_chars, std::string* output); @@ -260,7 +259,7 @@ // NOTE: Safe to use the same variable for both input and output. BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input, TrimPositions positions, - string16* output); + std::u16string* output); BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input, TrimPositions positions); BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input, @@ -277,8 +276,9 @@ // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace // sequences containing a CR or LF are trimmed. // (3) All other whitespace sequences are converted to single spaces. -BASE_EXPORT string16 CollapseWhitespace(StringPiece16 text, - bool trim_sequences_with_line_breaks); +BASE_EXPORT std::u16string CollapseWhitespace( + StringPiece16 text, + bool trim_sequences_with_line_breaks); BASE_EXPORT std::string CollapseWhitespaceASCII( StringPiece text, bool trim_sequences_with_line_breaks); @@ -403,15 +403,14 @@ // appropriate for use in any UI; use of FormatBytes and friends in ui/base is // highly recommended instead. TODO(avi): Figure out how to get callers to use // FormatBytes instead; remove this. -BASE_EXPORT string16 FormatBytesUnlocalized(int64_t bytes); +BASE_EXPORT std::u16string FormatBytesUnlocalized(int64_t bytes); // Starting at |start_offset| (usually 0), replace the first instance of // |find_this| with |replace_with|. -BASE_EXPORT void ReplaceFirstSubstringAfterOffset( - gurl_base::string16* str, - size_t start_offset, - StringPiece16 find_this, - StringPiece16 replace_with); +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::u16string* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with); BASE_EXPORT void ReplaceFirstSubstringAfterOffset( std::string* str, size_t start_offset, @@ -424,11 +423,10 @@ // This does entire substrings; use std::replace in <algorithm> for single // characters, for example: // std::replace(str.begin(), str.end(), 'a', 'b'); -BASE_EXPORT void ReplaceSubstringsAfterOffset( - string16* str, - size_t start_offset, - StringPiece16 find_this, - StringPiece16 replace_with); +BASE_EXPORT void ReplaceSubstringsAfterOffset(std::u16string* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with); BASE_EXPORT void ReplaceSubstringsAfterOffset( std::string* str, size_t start_offset, @@ -452,7 +450,7 @@ // than str.c_str() will get back a string of whatever size |str| had on entry // to this function (probably 0). BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); -BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); +BASE_EXPORT char16_t* WriteInto(std::u16string* str, size_t length_with_null); // Joins a list of strings into a single string, inserting |separator| (which // may be empty) in between all elements. @@ -469,28 +467,29 @@ // Use StrCat (in base/strings/strcat.h) if you don't need a separator. BASE_EXPORT std::string JoinString(span<const std::string> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(span<const string16> parts, - StringPiece16 separator); +BASE_EXPORT std::u16string JoinString(span<const std::u16string> parts, + StringPiece16 separator); BASE_EXPORT std::string JoinString(span<const StringPiece> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(span<const StringPiece16> parts, - StringPiece16 separator); +BASE_EXPORT std::u16string JoinString(span<const StringPiece16> parts, + StringPiece16 separator); // Explicit initializer_list overloads are required to break ambiguity when used // with a literal initializer list (otherwise the compiler would not be able to // decide between the string and StringPiece overloads). BASE_EXPORT std::string JoinString(std::initializer_list<StringPiece> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts, - StringPiece16 separator); +BASE_EXPORT std::u16string JoinString( + std::initializer_list<StringPiece16> parts, + StringPiece16 separator); // Replace $1-$2-$3..$9 in the format string with values from |subst|. // Additionally, any number of consecutive '$' characters is replaced by that // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be // NULL. This only allows you to use up to nine replacements. -BASE_EXPORT string16 -ReplaceStringPlaceholders(StringPiece16 format_string, - const std::vector<string16>& subst, - std::vector<size_t>* offsets); +BASE_EXPORT std::u16string ReplaceStringPlaceholders( + StringPiece16 format_string, + const std::vector<std::u16string>& subst, + std::vector<size_t>* offsets); BASE_EXPORT std::string ReplaceStringPlaceholders( StringPiece format_string, @@ -498,9 +497,10 @@ std::vector<size_t>* offsets); // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. -BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, - const string16& a, - size_t* offset); +BASE_EXPORT std::u16string ReplaceStringPlaceholders( + const std::u16string& format_string, + const std::u16string& a, + size_t* offset); } // namespace base
diff --git a/base/strings/string_util_constants.cc b/base/strings/string_util_constants.cc index e9e4d93..198cd53 100644 --- a/base/strings/string_util_constants.cc +++ b/base/strings/string_util_constants.cc
@@ -44,10 +44,10 @@ #define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0}; -const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0}; -const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0}; +const char16_t kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0}; +const char16_t kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0}; const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0}; -const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0}; +const char16_t kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0}; const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h index ccc1367..07f4930 100644 --- a/base/strings/string_util_internal.h +++ b/base/strings/string_util_internal.h
@@ -43,34 +43,33 @@ return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1)); } -template <typename StringType> -StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { - StringType ret; +template <typename T, typename CharT = typename T::value_type> +std::basic_string<CharT> ToLowerASCIIImpl(T str) { + std::basic_string<CharT> ret; ret.reserve(str.size()); for (size_t i = 0; i < str.size(); i++) ret.push_back(ToLowerASCII(str[i])); return ret; } -template <typename StringType> -StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { - StringType ret; +template <typename T, typename CharT = typename T::value_type> +std::basic_string<CharT> ToUpperASCIIImpl(T str) { + std::basic_string<CharT> ret; ret.reserve(str.size()); for (size_t i = 0; i < str.size(); i++) ret.push_back(ToUpperASCII(str[i])); return ret; } -template <class StringType> -int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, - BasicStringPiece<StringType> b) { +template <typename T, typename CharT = typename T::value_type> +int CompareCaseInsensitiveASCIIT(T a, T b) { // Find the first characters that aren't equal and compare them. If the end // of one of the strings is found before a nonequal character, the lengths // of the strings are compared. size_t i = 0; while (i < a.length() && i < b.length()) { - typename StringType::value_type lower_a = ToLowerASCII(a[i]); - typename StringType::value_type lower_b = ToLowerASCII(b[i]); + CharT lower_a = ToLowerASCII(a[i]); + CharT lower_b = ToLowerASCII(b[i]); if (lower_a < lower_b) return -1; if (lower_a > lower_b) @@ -88,11 +87,11 @@ return 1; } -template <typename Str> -TrimPositions TrimStringT(BasicStringPiece<Str> input, - BasicStringPiece<Str> trim_chars, +template <typename T, typename CharT = typename T::value_type> +TrimPositions TrimStringT(T input, + T trim_chars, TrimPositions positions, - Str* output) { + std::basic_string<CharT>* output) { // Find the edges of leading/trailing whitespace as desired. Need to use // a StringPiece version of input to be able to call find* on it with the // StringPiece version of trim_chars (normally the trim_chars will be a @@ -107,8 +106,8 @@ // When the string was all trimmed, report that we stripped off characters // from whichever position the caller was interested in. For empty input, we // stripped no characters, but we still need to clear |output|. - if (input.empty() || first_good_char == Str::npos || - last_good_char == Str::npos) { + if (input.empty() || first_good_char == std::basic_string<CharT>::npos || + last_good_char == std::basic_string<CharT>::npos) { bool input_was_empty = input.empty(); // in case output == &input output->clear(); return input_was_empty ? TRIM_NONE : positions; @@ -124,10 +123,8 @@ (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); } -template <typename Str> -BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, - BasicStringPiece<Str> trim_chars, - TrimPositions positions) { +template <typename T, typename CharT = typename T::value_type> +T TrimStringPieceT(T input, T trim_chars, TrimPositions positions) { size_t begin = (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; size_t end = (positions & TRIM_TRAILING) @@ -136,10 +133,11 @@ return input.substr(std::min(begin, input.size()), end - begin); } -template <typename STR> -STR CollapseWhitespaceT(BasicStringPiece<STR> text, - bool trim_sequences_with_line_breaks) { - STR result; +template <typename T, typename CharT = typename T::value_type> +std::basic_string<CharT> CollapseWhitespaceT( + T text, + bool trim_sequences_with_line_breaks) { + std::basic_string<CharT> result; result.resize(text.size()); // Set flags to pretend we're already in a trimmed whitespace sequence, so we @@ -257,31 +255,27 @@ // The hardcoded strings are typically very short so it doesn't matter, and the // string piece gives additional flexibility for the caller (doesn't have to be // null terminated) so we choose the StringPiece route. -template <typename Str> -inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, - StringPiece lowercase_ascii) { +template <typename T, typename CharT = typename T::value_type> +inline bool DoLowerCaseEqualsASCII(T str, StringPiece lowercase_ascii) { return std::equal( str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(), [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; }); } -template <typename Str> -bool StartsWithT(BasicStringPiece<Str> str, - BasicStringPiece<Str> search_for, - CompareCase case_sensitivity) { +template <typename T, typename CharT = typename T::value_type> +bool StartsWithT(T str, T search_for, CompareCase case_sensitivity) { if (search_for.size() > str.size()) return false; - BasicStringPiece<Str> source = str.substr(0, search_for.size()); + BasicStringPiece<CharT> source = str.substr(0, search_for.size()); switch (case_sensitivity) { case CompareCase::SENSITIVE: return source == search_for; case CompareCase::INSENSITIVE_ASCII: - return std::equal( - search_for.begin(), search_for.end(), source.begin(), - CaseInsensitiveCompareASCII<typename Str::value_type>()); + return std::equal(search_for.begin(), search_for.end(), source.begin(), + CaseInsensitiveCompareASCII<CharT>()); default: GURL_NOTREACHED(); @@ -289,14 +283,12 @@ } } -template <typename Str> -bool EndsWithT(BasicStringPiece<Str> str, - BasicStringPiece<Str> search_for, - CompareCase case_sensitivity) { +template <typename T, typename CharT = typename T::value_type> +bool EndsWithT(T str, T search_for, CompareCase case_sensitivity) { if (search_for.size() > str.size()) return false; - BasicStringPiece<Str> source = + BasicStringPiece<CharT> source = str.substr(str.size() - search_for.size(), search_for.size()); switch (case_sensitivity) { @@ -304,9 +296,8 @@ return source == search_for; case CompareCase::INSENSITIVE_ASCII: - return std::equal( - source.begin(), source.end(), search_for.begin(), - CaseInsensitiveCompareASCII<typename Str::value_type>()); + return std::equal(source.begin(), source.end(), search_for.begin(), + CaseInsensitiveCompareASCII<CharT>()); default: GURL_NOTREACHED(); @@ -315,28 +306,40 @@ } // A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. -template <class StringType> +template <class CharT> struct SubstringMatcher { - BasicStringPiece<StringType> find_this; + BasicStringPiece<CharT> find_this; - size_t Find(const StringType& input, size_t pos) { + size_t Find(const std::basic_string<CharT>& input, size_t pos) { return input.find(find_this.data(), pos, find_this.length()); } size_t MatchSize() { return find_this.length(); } }; -// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. -template <class StringType> -struct CharacterMatcher { - BasicStringPiece<StringType> find_any_of_these; +// Type deduction helper for SubstringMatcher. +template <typename T, typename CharT = typename T::value_type> +auto MakeSubstringMatcher(T find_this) { + return SubstringMatcher<CharT>{find_this}; +} - size_t Find(const StringType& input, size_t pos) { +// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. +template <class CharT> +struct CharacterMatcher { + BasicStringPiece<CharT> find_any_of_these; + + size_t Find(const std::basic_string<CharT>& input, size_t pos) { return input.find_first_of(find_any_of_these.data(), pos, find_any_of_these.length()); } constexpr size_t MatchSize() { return 1; } }; +// Type deduction helper for CharacterMatcher. +template <typename T, typename CharT = typename T::value_type> +auto MakeCharacterMatcher(T find_any_of_these) { + return CharacterMatcher<CharT>{find_any_of_these}; +} + enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; // Runs in O(n) time in the length of |str|, and transforms the string without @@ -344,13 +347,13 @@ // // This is parameterized on a |Matcher| traits type, so that it can be the // implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). -template <class StringType, class Matcher> -bool DoReplaceMatchesAfterOffset(StringType* str, +template <typename Matcher, typename T, typename CharT = typename T::value_type> +bool DoReplaceMatchesAfterOffset(std::basic_string<CharT>* str, size_t initial_offset, Matcher matcher, - BasicStringPiece<StringType> replace_with, + T replace_with, ReplaceType replace_type) { - using CharTraits = typename StringType::traits_type; + using CharTraits = std::char_traits<CharT>; const size_t find_length = matcher.MatchSize(); if (!find_length) @@ -358,7 +361,7 @@ // If the find string doesn't appear, there's nothing to do. size_t first_match = matcher.Find(*str, initial_offset); - if (first_match == StringType::npos) + if (first_match == std::basic_string<CharT>::npos) return false; // If we're only replacing one instance, there's no need to do anything @@ -373,7 +376,7 @@ // replace() on each instance, and finish the entire operation in O(n) time. if (find_length == replace_length) { auto* buffer = &((*str)[0]); - for (size_t offset = first_match; offset != StringType::npos; + for (size_t offset = first_match; offset != std::basic_string<CharT>::npos; offset = matcher.Find(*str, offset + replace_length)) { CharTraits::copy(buffer + offset, replace_with.data(), replace_length); } @@ -403,7 +406,7 @@ // matches. const size_t expansion_per_match = (replace_length - find_length); size_t num_matches = 0; - for (size_t match = first_match; match != StringType::npos; + for (size_t match = first_match; match != std::basic_string<CharT>::npos; match = matcher.Find(*str, match + find_length)) { expansion += expansion_per_match; ++num_matches; @@ -413,7 +416,7 @@ if (str->capacity() < final_length) { // If we'd have to allocate a new buffer to grow the string, build the // result directly into the new allocation via append(). - StringType src(str->get_allocator()); + std::basic_string<CharT> src(str->get_allocator()); str->swap(src); str->reserve(final_length); @@ -471,7 +474,8 @@ } read_offset += find_length; - // min() clamps StringType::npos (the largest unsigned value) to str_length. + // min() clamps std::basic_string<CharT>::npos (the largest unsigned value) + // to str_length. size_t match = std::min(matcher.Find(*str, read_offset), str_length); size_t length = match - read_offset; @@ -487,19 +491,19 @@ return true; } -template <class StringType> -bool ReplaceCharsT(BasicStringPiece<StringType> input, - BasicStringPiece<StringType> find_any_of_these, - BasicStringPiece<StringType> replace_with, - StringType* output) { +template <typename T, typename CharT = typename T::value_type> +bool ReplaceCharsT(T input, + T find_any_of_these, + T replace_with, + std::basic_string<CharT>* output) { // Commonly, this is called with output and input being the same string; in // that case, skip the copy. if (input.data() != output->data() || input.size() != output->size()) output->assign(input.data(), input.size()); - return DoReplaceMatchesAfterOffset( - output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, - ReplaceType::REPLACE_ALL); + return DoReplaceMatchesAfterOffset(output, 0, + MakeCharacterMatcher(find_any_of_these), + replace_with, ReplaceType::REPLACE_ALL); } template <class string_type> @@ -513,20 +517,21 @@ // Generic version for all JoinString overloads. |list_type| must be a sequence // (gurl_base::span or std::initializer_list) of strings/StringPieces (std::string, -// string16, StringPiece or StringPiece16). |string_type| is either std::string -// or string16. -template <typename list_type, typename string_type> -static string_type JoinStringT(list_type parts, - BasicStringPiece<string_type> sep) { +// std::u16string, StringPiece or StringPiece16). |CharT| is either char or +// char16_t. +template <typename list_type, + typename T, + typename CharT = typename T::value_type> +static std::basic_string<CharT> JoinStringT(list_type parts, T sep) { if (gurl_base::empty(parts)) - return string_type(); + return std::basic_string<CharT>(); // Pre-allocate the eventual size of the string. Start with the size of all of // the separators (note that this *assumes* parts.size() > 0). size_t total_size = (parts.size() - 1) * sep.size(); for (const auto& part : parts) total_size += part.size(); - string_type result; + std::basic_string<CharT> result; result.reserve(total_size); auto iter = parts.begin(); @@ -545,10 +550,10 @@ return result; } -template <class StringType> -StringType DoReplaceStringPlaceholders( - BasicStringPiece<StringType> format_string, - const std::vector<StringType>& subst, +template <typename T, typename CharT = typename T::value_type> +std::basic_string<CharT> DoReplaceStringPlaceholders( + T format_string, + const std::vector<std::basic_string<CharT>>& subst, std::vector<size_t>* offsets) { size_t substitutions = subst.size(); GURL_DCHECK_LT(substitutions, 10U); @@ -557,7 +562,7 @@ for (const auto& cur : subst) sub_length += cur.length(); - StringType formatted; + std::basic_string<CharT> formatted; formatted.reserve(format_string.length() + sub_length); std::vector<ReplacementOffset> r_offsets;
diff --git a/base/strings/string_util_perftest.cc b/base/strings/string_util_perftest.cc index 033df0e..8a5d540 100644 --- a/base/strings/string_util_perftest.cc +++ b/base/strings/string_util_perftest.cc
@@ -34,7 +34,7 @@ for (size_t non_ascii_loc = 0; non_ascii_loc < 3; ++non_ascii_loc) { size_t non_ascii_pos = str_length * non_ascii_loc / 2 + 2; MeasureIsStringASCII<std::string>(str_length, non_ascii_pos); - MeasureIsStringASCII<string16>(str_length, non_ascii_pos); + MeasureIsStringASCII<std::u16string>(str_length, non_ascii_pos); #if defined(WCHAR_T_IS_UTF32) MeasureIsStringASCII<std::basic_string<wchar_t>>(str_length, non_ascii_pos);
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc index beb99e2..f8326cc 100644 --- a/base/strings/string_util_unittest.cc +++ b/base/strings/string_util_unittest.cc
@@ -10,11 +10,12 @@ #include <stdint.h> #include <algorithm> +#include <string> #include <type_traits> #include "base/bits.h" #include "base/stl_util.h" -#include "base/strings/string16.h" +#include "base/strings/string_piece.h" #include "base/strings/utf_string_conversions.h" #include "build/build_config.h" #include "testing/gmock/include/gmock/gmock.h" @@ -355,23 +356,23 @@ #if defined(WCHAR_T_IS_UTF16) TEST(StringUtilTest, as_wcstr) { - char16 rw_buffer[10] = {}; + char16_t rw_buffer[10] = {}; static_assert( std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>::value, ""); EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer)); - string16 rw_str(10, '\0'); + std::u16string rw_str(10, '\0'); static_assert( std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_str))>::value, ""); EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str)); - const char16 ro_buffer[10] = {}; + const char16_t ro_buffer[10] = {}; static_assert( std::is_same<const wchar_t*, decltype(as_wcstr(ro_buffer))>::value, ""); EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer)); - const string16 ro_str(10, '\0'); + const std::u16string ro_str(10, '\0'); static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(ro_str))>::value, ""); EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str)); @@ -385,35 +386,37 @@ TEST(StringUtilTest, as_u16cstr) { wchar_t rw_buffer[10] = {}; static_assert( - std::is_same<char16*, decltype(as_writable_u16cstr(rw_buffer))>::value, + std::is_same<char16_t*, decltype(as_writable_u16cstr(rw_buffer))>::value, ""); EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer)); std::wstring rw_str(10, '\0'); static_assert( - std::is_same<char16*, decltype(as_writable_u16cstr(rw_str))>::value, ""); + std::is_same<char16_t*, decltype(as_writable_u16cstr(rw_str))>::value, + ""); EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_u16cstr(rw_str)); const wchar_t ro_buffer[10] = {}; static_assert( - std::is_same<const char16*, decltype(as_u16cstr(ro_buffer))>::value, ""); + std::is_same<const char16_t*, decltype(as_u16cstr(ro_buffer))>::value, + ""); EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer)); const std::wstring ro_str(10, '\0'); static_assert( - std::is_same<const char16*, decltype(as_u16cstr(ro_str))>::value, ""); + std::is_same<const char16_t*, decltype(as_u16cstr(ro_str))>::value, ""); EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str)); WStringPiece piece = ro_buffer; - static_assert(std::is_same<const char16*, decltype(as_u16cstr(piece))>::value, - ""); + static_assert( + std::is_same<const char16_t*, decltype(as_u16cstr(piece))>::value, ""); EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece)); } #endif // defined(WCHAR_T_IS_UTF16) TEST(StringUtilTest, TrimWhitespace) { - string16 output; // Allow contents to carry over to next testcase + std::u16string output; // Allow contents to carry over to next testcase for (const auto& value : trim_cases) { EXPECT_EQ(value.return_value, TrimWhitespace(WideToUTF16(value.input), value.positions, @@ -422,14 +425,14 @@ } // Test that TrimWhitespace() can take the same string for input and output - output = ASCIIToUTF16(" This is a test \r\n"); + output = u" This is a test \r\n"; EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); - EXPECT_EQ(ASCIIToUTF16("This is a test"), output); + EXPECT_EQ(u"This is a test", output); // Once more, but with a string of whitespace - output = ASCIIToUTF16(" \r\n"); + output = u" \r\n"; EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); - EXPECT_EQ(string16(), output); + EXPECT_EQ(std::u16string(), output); std::string output_ascii; for (const auto& value : trim_cases_ascii) { @@ -521,10 +524,10 @@ TEST(StringUtilTest, IsStringASCII) { static char char_ascii[] = "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; - static char16 char16_ascii[] = { - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A', - 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6', - '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 }; + static char16_t char16_ascii[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', + '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', '0', + '1', '2', '3', '4', '5', '6', '7', '8', '9', + '0', 'A', 'B', 'C', 'D', 'E', 'F', 0}; static std::wstring wchar_ascii( L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"); @@ -604,7 +607,7 @@ for (size_t i = 0; i < gurl_base::size(char_cases); ++i) { EXPECT_TRUE(IsStringASCII(char_cases[i])); - string16 utf16 = ASCIIToUTF16(char_cases[i]); + std::u16string utf16 = ASCIIToUTF16(char_cases[i]); EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16); std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i])); @@ -614,7 +617,7 @@ EXPECT_FALSE(IsStringASCII("Google \x80Video")); // Convert empty strings. - string16 empty16; + std::u16string empty16; std::string empty; EXPECT_EQ(empty, UTF16ToASCII(empty16)); EXPECT_EQ(empty16, ASCIIToUTF16(empty)); @@ -623,8 +626,8 @@ const char chars_with_nul[] = "test\0string"; const int length_with_nul = gurl_base::size(chars_with_nul) - 1; std::string string_with_nul(chars_with_nul, length_with_nul); - string16 string16_with_nul = ASCIIToUTF16(string_with_nul); - EXPECT_EQ(static_cast<string16::size_type>(length_with_nul), + std::u16string string16_with_nul = ASCIIToUTF16(string_with_nul); + EXPECT_EQ(static_cast<std::u16string::size_type>(length_with_nul), string16_with_nul.length()); std::string narrow_with_nul = UTF16ToASCII(string16_with_nul); EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), @@ -637,12 +640,12 @@ EXPECT_EQ('c', ToLowerASCII('c')); EXPECT_EQ('2', ToLowerASCII('2')); - EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C'))); - EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c'))); - EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2'))); + EXPECT_EQ(u'c', ToLowerASCII(u'C')); + EXPECT_EQ(u'c', ToLowerASCII(u'c')); + EXPECT_EQ(u'2', ToLowerASCII(u'2')); EXPECT_EQ("cc2", ToLowerASCII("Cc2")); - EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2"))); + EXPECT_EQ(u"cc2", ToLowerASCII(u"Cc2")); } TEST(StringUtilTest, ToUpperASCII) { @@ -650,12 +653,12 @@ EXPECT_EQ('C', ToUpperASCII('c')); EXPECT_EQ('2', ToUpperASCII('2')); - EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C'))); - EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c'))); - EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2'))); + EXPECT_EQ(u'C', ToUpperASCII(u'C')); + EXPECT_EQ(u'C', ToUpperASCII(u'c')); + EXPECT_EQ(u'2', ToUpperASCII(u'2')); EXPECT_EQ("CC2", ToUpperASCII("Cc2")); - EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2"))); + EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2")); } TEST(StringUtilTest, LowerCaseEqualsASCII) { @@ -746,9 +749,9 @@ {"abababab", 1, "aba", "c", "abcbab"}, }; - // gurl_base::string16 variant + // std::u16string variant for (const auto& scenario : cases) { - string16 str = ASCIIToUTF16(scenario.str); + std::u16string str = ASCIIToUTF16(scenario.str); ReplaceSubstringsAfterOffset(&str, scenario.start_offset, ASCIIToUTF16(scenario.find_this), ASCIIToUTF16(scenario.replace_with)); @@ -757,7 +760,7 @@ // std::string with insufficient capacity: expansion must realloc the buffer. for (const auto& scenario : cases) { - std::string str = scenario.str.as_string(); + std::string str(scenario.str); str.shrink_to_fit(); // This is nonbinding, but it's the best we've got. ReplaceSubstringsAfterOffset(&str, scenario.start_offset, scenario.find_this, scenario.replace_with); @@ -766,7 +769,7 @@ // std::string with ample capacity: should be possible to grow in-place. for (const auto& scenario : cases) { - std::string str = scenario.str.as_string(); + std::string str(scenario.str); str.reserve(std::max(scenario.str.length(), scenario.expected.length()) * 2); @@ -779,7 +782,7 @@ TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { static const struct { const char* str; - string16::size_type start_offset; + std::u16string::size_type start_offset; const char* find_this; const char* replace_with; const char* expected; @@ -798,7 +801,7 @@ }; for (const auto& i : cases) { - string16 str = ASCIIToUTF16(i.str); + std::u16string str = ASCIIToUTF16(i.str); ReplaceFirstSubstringAfterOffset(&str, i.start_offset, ASCIIToUTF16(i.find_this), ASCIIToUTF16(i.replace_with)); @@ -856,25 +859,25 @@ } TEST(StringUtilTest, JoinString16) { - string16 separator = ASCIIToUTF16(", "); - std::vector<string16> parts; - EXPECT_EQ(string16(), JoinString(parts, separator)); + std::u16string separator = u", "; + std::vector<std::u16string> parts; + EXPECT_EQ(std::u16string(), JoinString(parts, separator)); - parts.push_back(string16()); - EXPECT_EQ(string16(), JoinString(parts, separator)); + parts.push_back(std::u16string()); + EXPECT_EQ(std::u16string(), JoinString(parts, separator)); parts.clear(); - parts.push_back(ASCIIToUTF16("a")); - EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); + parts.push_back(u"a"); + EXPECT_EQ(u"a", JoinString(parts, separator)); - parts.push_back(ASCIIToUTF16("b")); - parts.push_back(ASCIIToUTF16("c")); - EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); + parts.push_back(u"b"); + parts.push_back(u"c"); + EXPECT_EQ(u"a, b, c", JoinString(parts, separator)); - parts.push_back(ASCIIToUTF16("")); - EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); - parts.push_back(ASCIIToUTF16(" ")); - EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); + parts.push_back(u""); + EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator)); + parts.push_back(u" "); + EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|")); } TEST(StringUtilTest, JoinStringPiece) { @@ -901,30 +904,30 @@ } TEST(StringUtilTest, JoinStringPiece16) { - string16 separator = ASCIIToUTF16(", "); + std::u16string separator = u", "; std::vector<StringPiece16> parts; - EXPECT_EQ(string16(), JoinString(parts, separator)); + EXPECT_EQ(std::u16string(), JoinString(parts, separator)); // Test empty first part (https://crbug.com/698073). parts.push_back(StringPiece16()); - EXPECT_EQ(string16(), JoinString(parts, separator)); + EXPECT_EQ(std::u16string(), JoinString(parts, separator)); parts.clear(); - const string16 kA = ASCIIToUTF16("a"); + const std::u16string kA = u"a"; parts.push_back(kA); - EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); + EXPECT_EQ(u"a", JoinString(parts, separator)); - const string16 kB = ASCIIToUTF16("b"); + const std::u16string kB = u"b"; parts.push_back(kB); - const string16 kC = ASCIIToUTF16("c"); + const std::u16string kC = u"c"; parts.push_back(kC); - EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); + EXPECT_EQ(u"a, b, c", JoinString(parts, separator)); parts.push_back(StringPiece16()); - EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); - const string16 kSpace = ASCIIToUTF16(" "); + EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator)); + const std::u16string kSpace = u" "; parts.push_back(kSpace); - EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); + EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|")); } TEST(StringUtilTest, JoinStringInitializerList) { @@ -952,31 +955,29 @@ } TEST(StringUtilTest, JoinStringInitializerList16) { - string16 separator = ASCIIToUTF16(", "); - EXPECT_EQ(string16(), JoinString({}, separator)); + std::u16string separator = u", "; + EXPECT_EQ(std::u16string(), JoinString({}, separator)); // Test empty first part (https://crbug.com/698073). - EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator)); + EXPECT_EQ(std::u16string(), JoinString({StringPiece16()}, separator)); // With string16s. - const string16 kA = ASCIIToUTF16("a"); - EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator)); + const std::u16string kA = u"a"; + EXPECT_EQ(u"a", JoinString({kA}, separator)); - const string16 kB = ASCIIToUTF16("b"); - const string16 kC = ASCIIToUTF16("c"); - EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator)); + const std::u16string kB = u"b"; + const std::u16string kC = u"c"; + EXPECT_EQ(u"a, b, c", JoinString({kA, kB, kC}, separator)); - EXPECT_EQ(ASCIIToUTF16("a, b, c, "), - JoinString({kA, kB, kC, StringPiece16()}, separator)); - const string16 kSpace = ASCIIToUTF16(" "); - EXPECT_EQ( - ASCIIToUTF16("a|b|c|| "), - JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|"))); + EXPECT_EQ(u"a, b, c, ", JoinString({kA, kB, kC, StringPiece16()}, separator)); + const std::u16string kSpace = u" "; + EXPECT_EQ(u"a|b|c|| ", + JoinString({kA, kB, kC, StringPiece16(), kSpace}, u"|")); // With StringPiece16s. const StringPiece16 kPieceA = kA; const StringPiece16 kPieceB = kB; - EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator)); + EXPECT_EQ(u"a, b", JoinString({kPieceA, kPieceB}, separator)); } TEST(StringUtilTest, StartsWith) { @@ -999,83 +1000,74 @@ gurl_base::CompareCase::INSENSITIVE_ASCII)); EXPECT_TRUE(StartsWith("java", std::string(), gurl_base::CompareCase::SENSITIVE)); - EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), - ASCIIToUTF16("javascript"), + EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript", gurl_base::CompareCase::SENSITIVE)); - EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"), - ASCIIToUTF16("javascript"), + EXPECT_FALSE(StartsWith(u"JavaScript:url", u"javascript", gurl_base::CompareCase::SENSITIVE)); - EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), - ASCIIToUTF16("javascript"), + EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript", gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"), - ASCIIToUTF16("javascript"), + EXPECT_TRUE(StartsWith(u"JavaScript:url", u"javascript", gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"), + EXPECT_FALSE( + StartsWith(u"java", u"javascript", gurl_base::CompareCase::SENSITIVE)); + EXPECT_FALSE( + StartsWith(u"java", u"javascript", gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_FALSE(StartsWith(std::u16string(), u"javascript", gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), + EXPECT_FALSE(StartsWith(std::u16string(), u"javascript", gurl_base::CompareCase::SENSITIVE)); - EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), + EXPECT_TRUE(StartsWith(u"java", std::u16string(), gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), - gurl_base::CompareCase::SENSITIVE)); + EXPECT_TRUE( + StartsWith(u"java", std::u16string(), gurl_base::CompareCase::SENSITIVE)); } TEST(StringUtilTest, EndsWith) { - EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), - gurl_base::CompareCase::SENSITIVE)); - EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), - gurl_base::CompareCase::SENSITIVE)); EXPECT_TRUE( - EndsWith(string16(), string16(), gurl_base::CompareCase::INSENSITIVE_ASCII)); - EXPECT_TRUE(EndsWith(string16(), string16(), gurl_base::CompareCase::SENSITIVE)); + EndsWith(u"Foo.plugin", u".plugin", gurl_base::CompareCase::SENSITIVE)); + EXPECT_FALSE( + EndsWith(u"Foo.Plugin", u".plugin", gurl_base::CompareCase::SENSITIVE)); + EXPECT_TRUE(EndsWith(u"Foo.plugin", u".plugin", + gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_TRUE(EndsWith(u"Foo.Plugin", u".plugin", + gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_FALSE(EndsWith(u".plug", u".plugin", gurl_base::CompareCase::SENSITIVE)); + EXPECT_FALSE( + EndsWith(u".plug", u".plugin", gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_FALSE( + EndsWith(u"Foo.plugin Bar", u".plugin", gurl_base::CompareCase::SENSITIVE)); + EXPECT_FALSE(EndsWith(u"Foo.plugin Bar", u".plugin", + gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_FALSE(EndsWith(std::u16string(), u".plugin", + gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_FALSE( + EndsWith(std::u16string(), u".plugin", gurl_base::CompareCase::SENSITIVE)); + EXPECT_TRUE(EndsWith(u"Foo.plugin", std::u16string(), + gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_TRUE( + EndsWith(u"Foo.plugin", std::u16string(), gurl_base::CompareCase::SENSITIVE)); + EXPECT_TRUE( + EndsWith(u".plugin", u".plugin", gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_TRUE(EndsWith(u".plugin", u".plugin", gurl_base::CompareCase::SENSITIVE)); + EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(), + gurl_base::CompareCase::INSENSITIVE_ASCII)); + EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(), + gurl_base::CompareCase::SENSITIVE)); } TEST(StringUtilTest, GetStringFWithOffsets) { - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("1")); - subst.push_back(ASCIIToUTF16("2")); + std::vector<std::u16string> subst; + subst.push_back(u"1"); + subst.push_back(u"2"); std::vector<size_t> offsets; - ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), - subst, - &offsets); + ReplaceStringPlaceholders(u"Hello, $1. Your number is $2.", subst, &offsets); EXPECT_EQ(2U, offsets.size()); EXPECT_EQ(7U, offsets[0]); EXPECT_EQ(25U, offsets[1]); offsets.clear(); - ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), - subst, - &offsets); + ReplaceStringPlaceholders(u"Hello, $2. Your number is $1.", subst, &offsets); EXPECT_EQ(2U, offsets.size()); EXPECT_EQ(25U, offsets[0]); EXPECT_EQ(7U, offsets[1]); @@ -1085,54 +1077,52 @@ TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { // Test whether replacestringplaceholders works as expected when there // are fewer inputs than outputs. - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("9a")); - subst.push_back(ASCIIToUTF16("8b")); - subst.push_back(ASCIIToUTF16("7c")); + std::vector<std::u16string> subst; + subst.push_back(u"9a"); + subst.push_back(u"8b"); + subst.push_back(u"7c"); - string16 formatted = - ReplaceStringPlaceholders( - ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr); + std::u16string formatted = ReplaceStringPlaceholders( + u"$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i", subst, nullptr); - EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted); + EXPECT_EQ(u"9aa,8bb,7cc,d,e,f,9ag,8bh,7ci", formatted); } TEST(StringUtilTest, ReplaceStringPlaceholders) { - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("9a")); - subst.push_back(ASCIIToUTF16("8b")); - subst.push_back(ASCIIToUTF16("7c")); - subst.push_back(ASCIIToUTF16("6d")); - subst.push_back(ASCIIToUTF16("5e")); - subst.push_back(ASCIIToUTF16("4f")); - subst.push_back(ASCIIToUTF16("3g")); - subst.push_back(ASCIIToUTF16("2h")); - subst.push_back(ASCIIToUTF16("1i")); + std::vector<std::u16string> subst; + subst.push_back(u"9a"); + subst.push_back(u"8b"); + subst.push_back(u"7c"); + subst.push_back(u"6d"); + subst.push_back(u"5e"); + subst.push_back(u"4f"); + subst.push_back(u"3g"); + subst.push_back(u"2h"); + subst.push_back(u"1i"); - string16 formatted = - ReplaceStringPlaceholders( - ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr); + std::u16string formatted = ReplaceStringPlaceholders( + u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr); - EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted); + EXPECT_EQ(u"9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted); } TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) { // In this test, some of the substitutions are shorter than the placeholders, // but overall the string gets longer. - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("9a____")); - subst.push_back(ASCIIToUTF16("B")); - subst.push_back(ASCIIToUTF16("7c___")); - subst.push_back(ASCIIToUTF16("d")); - subst.push_back(ASCIIToUTF16("5e____")); - subst.push_back(ASCIIToUTF16("F")); - subst.push_back(ASCIIToUTF16("3g___")); - subst.push_back(ASCIIToUTF16("h")); - subst.push_back(ASCIIToUTF16("1i_____")); + std::vector<std::u16string> subst; + subst.push_back(u"9a____"); + subst.push_back(u"B"); + subst.push_back(u"7c___"); + subst.push_back(u"d"); + subst.push_back(u"5e____"); + subst.push_back(u"F"); + subst.push_back(u"3g___"); + subst.push_back(u"h"); + subst.push_back(u"1i_____"); - string16 original = ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"); - string16 expected = - ASCIIToUTF16("9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i"); + std::u16string original = u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"; + std::u16string expected = + u"9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i"; EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr)); @@ -1151,33 +1141,33 @@ // In this test, some of the substitutions are longer than the placeholders, // but overall the string gets smaller. Additionally, the placeholders appear // in a permuted order. - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("z")); - subst.push_back(ASCIIToUTF16("y")); - subst.push_back(ASCIIToUTF16("XYZW")); - subst.push_back(ASCIIToUTF16("x")); - subst.push_back(ASCIIToUTF16("w")); + std::vector<std::u16string> subst; + subst.push_back(u"z"); + subst.push_back(u"y"); + subst.push_back(u"XYZW"); + subst.push_back(u"x"); + subst.push_back(u"w"); - string16 formatted = - ReplaceStringPlaceholders(ASCIIToUTF16("$3_$4$2$1$5"), subst, nullptr); + std::u16string formatted = + ReplaceStringPlaceholders(u"$3_$4$2$1$5", subst, nullptr); - EXPECT_EQ(ASCIIToUTF16("XYZW_xyzw"), formatted); + EXPECT_EQ(u"XYZW_xyzw", formatted); } TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) { - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("1a")); - string16 formatted = - ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr); - EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted); + std::vector<std::u16string> subst; + subst.push_back(u"1a"); + std::u16string formatted = + ReplaceStringPlaceholders(u" $16 ", subst, nullptr); + EXPECT_EQ(u" 1a6 ", formatted); } TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) { - std::vector<string16> subst; - subst.push_back(ASCIIToUTF16("1a")); - string16 formatted = - ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr); - EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted); + std::vector<std::u16string> subst; + subst.push_back(u"1a"); + std::u16string formatted = + ReplaceStringPlaceholders(u"+$-+$A+$1+", subst, nullptr); + EXPECT_EQ(u"+++1a+", formatted); } TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { @@ -1324,14 +1314,14 @@ EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size()); EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty()); - constexpr char16 kBar[] = STRING16_LITERAL("Bar"); + constexpr char16_t kBar[] = u"Bar"; static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, ""); static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, ""); static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, ""); static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), ""); static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), ""); - string16 bar = kBar; + std::u16string bar = kBar; EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar); EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data()); EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size()); @@ -1457,13 +1447,12 @@ EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII)); EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII)); - EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16)); - EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16)); - EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16)); - EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16)); - EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16)); - EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "), - kWhitespaceUTF16)); + EXPECT_TRUE(ContainsOnlyChars(std::u16string(), kWhitespaceUTF16)); + EXPECT_TRUE(ContainsOnlyChars(u" ", kWhitespaceUTF16)); + EXPECT_TRUE(ContainsOnlyChars(u"\t", kWhitespaceUTF16)); + EXPECT_TRUE(ContainsOnlyChars(u"\t \r \n ", kWhitespaceUTF16)); + EXPECT_FALSE(ContainsOnlyChars(u"a", kWhitespaceUTF16)); + EXPECT_FALSE(ContainsOnlyChars(u"\thello\r \n ", kWhitespaceUTF16)); } TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
diff --git a/base/strings/string_util_win.cc b/base/strings/string_util_win.cc index 1a98101..9d475ba 100644 --- a/base/strings/string_util_win.cc +++ b/base/strings/string_util_win.cc
@@ -8,7 +8,6 @@ namespace gurl_base { -#if defined(BASE_STRING16_IS_STD_U16STRING) bool IsStringASCII(WStringPiece str) { return internal::DoIsStringASCII(str.data(), str.length()); } @@ -102,7 +101,7 @@ WStringPiece find_this, WStringPiece replace_with) { internal::DoReplaceMatchesAfterOffset( - str, start_offset, internal::SubstringMatcher<std::wstring>{find_this}, + str, start_offset, internal::MakeSubstringMatcher(find_this), replace_with, internal::ReplaceType::REPLACE_FIRST); } @@ -111,7 +110,7 @@ WStringPiece find_this, WStringPiece replace_with) { internal::DoReplaceMatchesAfterOffset( - str, start_offset, internal::SubstringMatcher<std::wstring>{find_this}, + str, start_offset, internal::MakeSubstringMatcher(find_this), replace_with, internal::ReplaceType::REPLACE_ALL); } @@ -140,6 +139,4 @@ return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } -#endif - } // namespace base
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h index 3ddbc92..7c04176 100644 --- a/base/strings/string_util_win.h +++ b/base/strings/string_util_win.h
@@ -16,7 +16,6 @@ #include "polyfills/base/check.h" #include "base/containers/span.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" @@ -49,22 +48,22 @@ // Utility functions to access the underlying string buffer as a wide char // pointer. // -// Note: These functions violate strict aliasing when char16 and wchar_t are +// Note: These functions violate strict aliasing when char16_t and wchar_t are // unrelated types. We thus pass -fno-strict-aliasing to the compiler on // non-Windows platforms [1], and rely on it being off in Clang's CL mode [2]. // // [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244 // [2] // https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949 -inline wchar_t* as_writable_wcstr(char16* str) { +inline wchar_t* as_writable_wcstr(char16_t* str) { return reinterpret_cast<wchar_t*>(str); } -inline wchar_t* as_writable_wcstr(string16& str) { +inline wchar_t* as_writable_wcstr(std::u16string& str) { return reinterpret_cast<wchar_t*>(data(str)); } -inline const wchar_t* as_wcstr(const char16* str) { +inline const wchar_t* as_wcstr(const char16_t* str) { return reinterpret_cast<const wchar_t*>(str); } @@ -72,21 +71,22 @@ return reinterpret_cast<const wchar_t*>(str.data()); } -// Utility functions to access the underlying string buffer as a char16 pointer. -inline char16* as_writable_u16cstr(wchar_t* str) { - return reinterpret_cast<char16*>(str); +// Utility functions to access the underlying string buffer as a char16_t +// pointer. +inline char16_t* as_writable_u16cstr(wchar_t* str) { + return reinterpret_cast<char16_t*>(str); } -inline char16* as_writable_u16cstr(std::wstring& str) { - return reinterpret_cast<char16*>(data(str)); +inline char16_t* as_writable_u16cstr(std::wstring& str) { + return reinterpret_cast<char16_t*>(data(str)); } -inline const char16* as_u16cstr(const wchar_t* str) { - return reinterpret_cast<const char16*>(str); +inline const char16_t* as_u16cstr(const wchar_t* str) { + return reinterpret_cast<const char16_t*>(str); } -inline const char16* as_u16cstr(WStringPiece str) { - return reinterpret_cast<const char16*>(str.data()); +inline const char16_t* as_u16cstr(WStringPiece str) { + return reinterpret_cast<const char16_t*>(str.data()); } // Utility functions to convert between gurl_base::WStringPiece and @@ -103,17 +103,12 @@ return std::wstring(as_wcstr(str.data()), str.size()); } -inline string16 AsString16(WStringPiece str) { - return string16(as_u16cstr(str.data()), str.size()); +inline std::u16string AsString16(WStringPiece str) { + return std::u16string(as_u16cstr(str.data()), str.size()); } // The following section contains overloads of the cross-platform APIs for -// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring -// and gurl_base::string16 are distinct types, as otherwise this would result in an -// ODR violation. -// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is -// std::u16string. -#if defined(BASE_STRING16_IS_STD_U16STRING) +// std::wstring and gurl_base::WStringPiece. BASE_EXPORT bool IsStringASCII(WStringPiece str); BASE_EXPORT std::wstring ToLowerASCII(WStringPiece str); @@ -192,9 +187,8 @@ BASE_EXPORT std::wstring ReplaceStringPlaceholders( WStringPiece format_string, - const std::vector<string16>& subst, + const std::vector<std::wstring>& subst, std::vector<size_t>* offsets); -#endif } // namespace base
diff --git a/base/strings/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc index c2e8707..9da8861 100644 --- a/base/strings/stringprintf_unittest.cc +++ b/base/strings/stringprintf_unittest.cc
@@ -7,7 +7,6 @@ #include <errno.h> #include <stddef.h> -#include "base/macros.h" #include "build/build_config.h" #include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/sys_string_conversions.h b/base/strings/sys_string_conversions.h index 4183d26..51977fe 100644 --- a/base/strings/sys_string_conversions.h +++ b/base/strings/sys_string_conversions.h
@@ -14,7 +14,6 @@ #include <string> #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "build/build_config.h" @@ -82,12 +81,13 @@ // Converts a CFStringRef to an STL string. Returns an empty string on failure. BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref) WARN_UNUSED_RESULT; -BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref) WARN_UNUSED_RESULT; +BASE_EXPORT std::u16string SysCFStringRefToUTF16(CFStringRef ref) + WARN_UNUSED_RESULT; // Same, but accepts NSString input. Converts nil NSString* to the appropriate // string type of length 0. BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref) WARN_UNUSED_RESULT; -BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT; +BASE_EXPORT std::u16string SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT; #endif // defined(OS_APPLE)
diff --git a/base/strings/sys_string_conversions_unittest.cc b/base/strings/sys_string_conversions_unittest.cc index 2f31dcc..95995c6 100644 --- a/base/strings/sys_string_conversions_unittest.cc +++ b/base/strings/sys_string_conversions_unittest.cc
@@ -6,7 +6,6 @@ #include <string> -#include "base/macros.h" #include "base/strings/string_piece.h" #include "base/strings/sys_string_conversions.h" #include "base/strings/utf_string_conversions.h"
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc index 5a492d6..6120b71 100644 --- a/base/strings/utf_offset_string_conversions.cc +++ b/base/strings/utf_offset_string_conversions.cc
@@ -37,14 +37,14 @@ size_t* offset, size_t limit) { GURL_DCHECK(offset); - if (*offset == string16::npos) + if (*offset == std::u16string::npos) return; int adjustment = 0; for (const auto& i : adjustments) { if (*offset <= i.original_offset) break; if (*offset < (i.original_offset + i.original_length)) { - *offset = string16::npos; + *offset = std::u16string::npos; return; } adjustment += static_cast<int>(i.original_length - i.output_length); @@ -52,7 +52,7 @@ *offset -= adjustment; if (*offset > limit) - *offset = string16::npos; + *offset = std::u16string::npos; } // static @@ -68,7 +68,7 @@ // static void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments, size_t* offset) { - if (*offset == string16::npos) + if (*offset == std::u16string::npos) return; int adjustment = 0; for (const auto& i : adjustments) { @@ -76,7 +76,7 @@ break; adjustment += static_cast<int>(i.original_length - i.output_length); if ((*offset + adjustment) < (i.original_offset + i.original_length)) { - *offset = string16::npos; + *offset = std::u16string::npos; return; } } @@ -219,29 +219,29 @@ bool UTF8ToUTF16WithAdjustments( const char* src, size_t src_len, - string16* output, + std::u16string* output, gurl_base::OffsetAdjuster::Adjustments* adjustments) { PrepareForUTF16Or32Output(src, src_len, output); return ConvertUnicode(src, src_len, output, adjustments); } -string16 UTF8ToUTF16WithAdjustments( +std::u16string UTF8ToUTF16WithAdjustments( const gurl_base::StringPiece& utf8, gurl_base::OffsetAdjuster::Adjustments* adjustments) { - string16 result; + std::u16string result; UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments); return result; } -string16 UTF8ToUTF16AndAdjustOffsets( +std::u16string UTF8ToUTF16AndAdjustOffsets( const gurl_base::StringPiece& utf8, std::vector<size_t>* offsets_for_adjustment) { for (size_t& offset : *offsets_for_adjustment) { if (offset > utf8.length()) - offset = string16::npos; + offset = std::u16string::npos; } OffsetAdjuster::Adjustments adjustments; - string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments); + std::u16string result = UTF8ToUTF16WithAdjustments(utf8, &adjustments); OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment); return result; } @@ -251,7 +251,7 @@ std::vector<size_t>* offsets_for_adjustment) { for (size_t& offset : *offsets_for_adjustment) { if (offset > utf16.length()) - offset = string16::npos; + offset = std::u16string::npos; } std::string result; PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
diff --git a/base/strings/utf_offset_string_conversions.h b/base/strings/utf_offset_string_conversions.h index c2e2ba7..aa4e59e 100644 --- a/base/strings/utf_offset_string_conversions.h +++ b/base/strings/utf_offset_string_conversions.h
@@ -11,7 +11,6 @@ #include <vector> #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" namespace gurl_base { @@ -35,7 +34,7 @@ // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments // recorded in |adjustments|. Adjusted offsets greater than |limit| will be - // set to string16::npos. + // set to std::u16string::npos. // // Offsets represents insertion/selection points between characters: if |src| // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the @@ -43,23 +42,24 @@ // exit, each offset will have been modified to point at the same logical // position in the output string. If an offset cannot be successfully // adjusted (e.g., because it points into the middle of a multibyte sequence), - // it will be set to string16::npos. + // it will be set to std::u16string::npos. static void AdjustOffsets(const Adjustments& adjustments, std::vector<size_t>* offsets_for_adjustment, - size_t limit = string16::npos); + size_t limit = std::u16string::npos); // Adjusts the single |offset| to reflect the adjustments recorded in // |adjustments|. static void AdjustOffset(const Adjustments& adjustments, size_t* offset, - size_t limit = string16::npos); + size_t limit = std::u16string::npos); // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse // of the adjustments recorded in |adjustments|. In other words, the offsets // provided represent offsets into an adjusted string and the caller wants // to know the offsets they correspond to in the original string. If an // offset cannot be successfully unadjusted (e.g., because it points into - // the middle of a multibyte sequence), it will be set to string16::npos. + // the middle of a multibyte sequence), it will be set to + // std::u16string::npos. static void UnadjustOffsets(const Adjustments& adjustments, std::vector<size_t>* offsets_for_unadjustment); @@ -94,15 +94,16 @@ BASE_EXPORT bool UTF8ToUTF16WithAdjustments( const char* src, size_t src_len, - string16* output, + std::u16string* output, gurl_base::OffsetAdjuster::Adjustments* adjustments); -BASE_EXPORT string16 UTF8ToUTF16WithAdjustments( +BASE_EXPORT std::u16string UTF8ToUTF16WithAdjustments( const gurl_base::StringPiece& utf8, gurl_base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT; // As above, but instead internally examines the adjustments and applies them // to |offsets_for_adjustment|. Input offsets greater than the length of the -// input string will be set to string16::npos. See comments by AdjustOffsets(). -BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets( +// input string will be set to std::u16string::npos. See comments by +// AdjustOffsets(). +BASE_EXPORT std::u16string UTF8ToUTF16AndAdjustOffsets( const gurl_base::StringPiece& utf8, std::vector<size_t>* offsets_for_adjustment); BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc index 8416524..0775dc4 100644 --- a/base/strings/utf_offset_string_conversions_unittest.cc +++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -15,7 +15,7 @@ namespace { -static const size_t kNpos = string16::npos; +static const size_t kNpos = std::u16string::npos; } // namespace @@ -45,7 +45,7 @@ } struct UTF16ToUTF8Case { - char16 utf16[10]; + char16_t utf16[10]; size_t input_offset; size_t output_offset; } utf16_to_utf8_cases[] = {
diff --git a/base/strings/utf_string_conversion_utils.cc b/base/strings/utf_string_conversion_utils.cc index ce432e7..da68dd3 100644 --- a/base/strings/utf_string_conversion_utils.cc +++ b/base/strings/utf_string_conversion_utils.cc
@@ -30,7 +30,7 @@ return IsValidCodepoint(code_point); } -bool ReadUnicodeCharacter(const char16* src, +bool ReadUnicodeCharacter(const char16_t* src, int32_t src_len, int32_t* char_index, uint32_t* code_point) { @@ -90,10 +90,10 @@ return char_offset - original_char_offset; } -size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) { +size_t WriteUnicodeCharacter(uint32_t code_point, std::u16string* output) { if (CBU16_LENGTH(code_point) == 1) { // Thie code point is in the Basic Multilingual Plane (BMP). - output->push_back(static_cast<char16>(code_point)); + output->push_back(static_cast<char16_t>(code_point)); return 1; } // Non-BMP characters use a double-character encoding. @@ -123,10 +123,10 @@ // Instantiate versions we know callers will need. #if !defined(OS_WIN) -// wchar_t and char16 are the same thing on Windows. +// wchar_t and char16_t are the same thing on Windows. template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*); #endif -template void PrepareForUTF8Output(const char16*, size_t, std::string*); +template void PrepareForUTF8Output(const char16_t*, size_t, std::string*); template<typename STRING> void PrepareForUTF16Or32Output(const char* src, @@ -147,9 +147,9 @@ // Instantiate versions we know callers will need. #if !defined(OS_WIN) -// std::wstring and string16 are the same thing on Windows. +// std::wstring and std::u16string are the same thing on Windows. template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*); #endif -template void PrepareForUTF16Or32Output(const char*, size_t, string16*); +template void PrepareForUTF16Or32Output(const char*, size_t, std::u16string*); } // namespace base
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h index 075832e..640c7c6 100644 --- a/base/strings/utf_string_conversion_utils.h +++ b/base/strings/utf_string_conversion_utils.h
@@ -11,8 +11,9 @@ #include <stddef.h> #include <stdint.h> +#include <string> + #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" namespace gurl_base { @@ -50,7 +51,7 @@ uint32_t* code_point_out); // Reads a UTF-16 character. The usage is the same as the 8-bit version above. -BASE_EXPORT bool ReadUnicodeCharacter(const char16* src, +BASE_EXPORT bool ReadUnicodeCharacter(const char16_t* src, int32_t src_len, int32_t* char_index, uint32_t* code_point); @@ -72,7 +73,8 @@ // Appends the given code point as a UTF-16 character to the given 16-bit // string. Returns the number of 16-bit values written. -BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output); +BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, + std::u16string* output); #if defined(WCHAR_T_IS_UTF32) // Appends the given UTF-32 character to the given 32-bit string. Returns the
diff --git a/base/strings/utf_string_conversions.cc b/base/strings/utf_string_conversions.cc index 92333e2..9595e7b 100644 --- a/base/strings/utf_string_conversions.cc +++ b/base/strings/utf_string_conversions.cc
@@ -35,7 +35,7 @@ }; template <> -struct SizeCoefficient<char16, char> { +struct SizeCoefficient<char16_t, char> { // One UTF-16 codeunit corresponds to at most 3 codeunits in UTF-8. static constexpr int value = 3; }; @@ -48,7 +48,7 @@ }; template <> -struct SizeCoefficient<wchar_t, char16> { +struct SizeCoefficient<wchar_t, char16_t> { // UTF-16 uses at most 2 codeunits per character. static constexpr int value = 2; }; @@ -111,13 +111,13 @@ } template <typename DestChar> -bool DoUTFConversion(const char16* src, +bool DoUTFConversion(const char16_t* src, int32_t src_len, DestChar* dest, int32_t* dest_len) { bool success = true; - auto ConvertSingleChar = [&success](char16 in) -> int32_t { + auto ConvertSingleChar = [&success](char16_t in) -> int32_t { if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) { success = false; return kErrorCodePoint; @@ -211,19 +211,19 @@ // UTF16 <-> UTF8 -------------------------------------------------------------- -bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { +bool UTF8ToUTF16(const char* src, size_t src_len, std::u16string* output) { return UTFConversion(StringPiece(src, src_len), output); } -string16 UTF8ToUTF16(StringPiece utf8) { - string16 ret; +std::u16string UTF8ToUTF16(StringPiece utf8) { + std::u16string ret; // Ignore the success flag of this call, it will do the best it can for // invalid input, which is what we want here. UTF8ToUTF16(utf8.data(), utf8.size(), &ret); return ret; } -bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { +bool UTF16ToUTF8(const char16_t* src, size_t src_len, std::string* output) { return UTFConversion(StringPiece16(src, src_len), output); } @@ -240,16 +240,16 @@ #if defined(WCHAR_T_IS_UTF16) // When wide == UTF-16 the conversions are a NOP. -bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { +bool WideToUTF16(const wchar_t* src, size_t src_len, std::u16string* output) { output->assign(src, src + src_len); return true; } -string16 WideToUTF16(WStringPiece wide) { - return string16(wide.begin(), wide.end()); +std::u16string WideToUTF16(WStringPiece wide) { + return std::u16string(wide.begin(), wide.end()); } -bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { +bool UTF16ToWide(const char16_t* src, size_t src_len, std::wstring* output) { output->assign(src, src + src_len); return true; } @@ -260,19 +260,19 @@ #elif defined(WCHAR_T_IS_UTF32) -bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { +bool WideToUTF16(const wchar_t* src, size_t src_len, std::u16string* output) { return UTFConversion(gurl_base::WStringPiece(src, src_len), output); } -string16 WideToUTF16(WStringPiece wide) { - string16 ret; +std::u16string WideToUTF16(WStringPiece wide) { + std::u16string ret; // Ignore the success flag of this call, it will do the best it can for // invalid input, which is what we want here. WideToUTF16(wide.data(), wide.length(), &ret); return ret; } -bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { +bool UTF16ToWide(const char16_t* src, size_t src_len, std::wstring* output) { return UTFConversion(StringPiece16(src, src_len), output); } @@ -329,9 +329,9 @@ #endif // defined(WCHAR_T_IS_UTF32) -string16 ASCIIToUTF16(StringPiece ascii) { +std::u16string ASCIIToUTF16(StringPiece ascii) { GURL_DCHECK(IsStringASCII(ascii)) << ascii; - return string16(ascii.begin(), ascii.end()); + return std::u16string(ascii.begin(), ascii.end()); } std::string UTF16ToASCII(StringPiece16 utf16) {
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h index 043b6ae..ffb56e4 100644 --- a/base/strings/utf_string_conversions.h +++ b/base/strings/utf_string_conversions.h
@@ -10,7 +10,6 @@ #include <string> #include "polyfills/base/base_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "build/build_config.h" @@ -29,22 +28,27 @@ std::wstring* output); BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT; -BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len, - string16* output); -BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT; -BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len, +BASE_EXPORT bool WideToUTF16(const wchar_t* src, + size_t src_len, + std::u16string* output); +BASE_EXPORT std::u16string WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT; +BASE_EXPORT bool UTF16ToWide(const char16_t* src, + size_t src_len, std::wstring* output); BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT; -BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); -BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT; -BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len, +BASE_EXPORT bool UTF8ToUTF16(const char* src, + size_t src_len, + std::u16string* output); +BASE_EXPORT std::u16string UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT; +BASE_EXPORT bool UTF16ToUTF8(const char16_t* src, + size_t src_len, std::string* output); BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT; // This converts an ASCII string, typically a hardcoded constant, to a UTF16 // string. -BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT; +BASE_EXPORT std::u16string ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT; // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII // beforehand. @@ -60,6 +64,35 @@ BASE_EXPORT std::string WideToASCII(WStringPiece wide) WARN_UNUSED_RESULT; #endif // defined(WCHAR_T_IS_UTF16) +// The conversion functions in this file should not be used to convert string +// literals. Instead, the corresponding prefixes (e.g. u"" for UTF16 or L"" for +// Wide) should be used. Deleting the overloads here catches these cases at +// compile time. +template <size_t N> +std::u16string WideToUTF16(const wchar_t (&str)[N]) { + static_assert(N == 0, "Error: Use the u\"...\" prefix instead."); + return std::u16string(); +} + +template <size_t N> +std::u16string UTF8ToUTF16(const char (&str)[N]) { + static_assert(N == 0, "Error: Use the u\"...\" prefix instead."); + return std::u16string(); +} + +template <size_t N> +std::u16string ASCIIToUTF16(const char (&str)[N]) { + static_assert(N == 0, "Error: Use the u\"...\" prefix instead."); + return std::u16string(); +} + +// Mutable character arrays are usually only populated during runtime. Continue +// to allow this conversion. +template <size_t N> +std::u16string ASCIIToUTF16(char (&str)[N]) { + return ASCIIToUTF16(StringPiece(str)); +} + } // namespace base #endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
diff --git a/base/strings/utf_string_conversions_fuzzer.cc b/base/strings/utf_string_conversions_fuzzer.cc index 55e75f7..932012a 100644 --- a/base/strings/utf_string_conversions_fuzzer.cc +++ b/base/strings/utf_string_conversions_fuzzer.cc
@@ -8,7 +8,7 @@ std::string output_std_string; std::wstring output_std_wstring; -gurl_base::string16 output_string16; +std::u16string output_string16; // Entry point for LibFuzzer. extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { @@ -22,15 +22,15 @@ gurl_base::UTF8ToUTF16(reinterpret_cast<const char*>(data), size, &output_string16); - // Test for char16. + // Test for char16_t. if (size % 2 == 0) { gurl_base::StringPiece16 string_piece_input16( - reinterpret_cast<const gurl_base::char16*>(data), size / 2); + reinterpret_cast<const char16_t*>(data), size / 2); ignore_result(gurl_base::UTF16ToWide(output_string16)); - gurl_base::UTF16ToWide(reinterpret_cast<const gurl_base::char16*>(data), size / 2, + gurl_base::UTF16ToWide(reinterpret_cast<const char16_t*>(data), size / 2, &output_std_wstring); ignore_result(gurl_base::UTF16ToUTF8(string_piece_input16)); - gurl_base::UTF16ToUTF8(reinterpret_cast<const gurl_base::char16*>(data), size / 2, + gurl_base::UTF16ToUTF8(reinterpret_cast<const char16_t*>(data), size / 2, &output_std_string); }
diff --git a/base/strings/utf_string_conversions_unittest.cc b/base/strings/utf_string_conversions_unittest.cc index 6cffe99..752bf95 100644 --- a/base/strings/utf_string_conversions_unittest.cc +++ b/base/strings/utf_string_conversions_unittest.cc
@@ -180,19 +180,15 @@ #endif // defined(WCHAR_T_IS_UTF32) TEST(UTFStringConversionsTest, ConvertMultiString) { - static char16 multi16[] = { - 'f', 'o', 'o', '\0', - 'b', 'a', 'r', '\0', - 'b', 'a', 'z', '\0', - '\0' - }; + static char16_t multi16[] = {'f', 'o', 'o', '\0', 'b', 'a', 'r', + '\0', 'b', 'a', 'z', '\0', '\0'}; static char multi[] = { 'f', 'o', 'o', '\0', 'b', 'a', 'r', '\0', 'b', 'a', 'z', '\0', '\0' }; - string16 multistring16; + std::u16string multistring16; memcpy(WriteInto(&multistring16, gurl_base::size(multi16)), multi16, sizeof(multi16)); EXPECT_EQ(gurl_base::size(multi16) - 1, multistring16.length());
diff --git a/base/template_util.h b/base/template_util.h index 4b69c7a..78b52ee 100644 --- a/base/template_util.h +++ b/base/template_util.h
@@ -12,6 +12,7 @@ #include <utility> #include <vector> +#include "base/compiler_specific.h" #include "build/build_config.h" // Some versions of libstdc++ have partial support for type_traits, but misses @@ -146,8 +147,9 @@ #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 7 // Workaround for g++7 and earlier family. // Due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80654, without this -// Optional<std::vector<T>> where T is non-copyable causes a compile error. -// As we know it is not trivially copy constructible, explicitly declare so. +// absl::optional<std::vector<T>> where T is non-copyable causes a compile +// error. As we know it is not trivially copy constructible, explicitly declare +// so. template <typename T> struct is_trivially_copy_constructible : std::is_trivially_copy_constructible<T> {}; @@ -233,16 +235,107 @@ template <typename B> struct negation : bool_constant<!static_cast<bool>(B::value)> {}; -// Implementation of C++17's std::invoke_result_t. +// Implementation of C++17's invoke_result. // // This implementation adds references to `Functor` and `Args` to work around -// some quirks of std::result_of_t. See the #Notes section of [1] for details. +// some quirks of std::result_of. See the #Notes section of [1] for details. // // References: // [1] https://en.cppreference.com/w/cpp/types/result_of -// [2] https://wg21.link/meta.type.synop#lib:invoke_result_t +// [2] https://wg21.link/meta.trans.other#lib:invoke_result template <typename Functor, typename... Args> -using invoke_result_t = std::result_of_t<Functor && (Args && ...)>; +using invoke_result = std::result_of<Functor && (Args && ...)>; + +// Implementation of C++17's std::invoke_result_t. +// +// Reference: https://wg21.link/meta.type.synop#lib:invoke_result_t +template <typename Functor, typename... Args> +using invoke_result_t = typename invoke_result<Functor, Args...>::type; + +namespace internal { + +// Base case, `InvokeResult` does not have a nested type member. This means `F` +// could not be invoked with `Args...` and thus is not invocable. +template <typename InvokeResult, typename R, typename = void> +struct IsInvocableImpl : std::false_type {}; + +// Happy case, `InvokeResult` does have a nested type member. Now check whether +// `InvokeResult::type` is convertible to `R`. Short circuit in case +// `std::is_void<R>`. +template <typename InvokeResult, typename R> +struct IsInvocableImpl<InvokeResult, R, void_t<typename InvokeResult::type>> + : disjunction<std::is_void<R>, + std::is_convertible<typename InvokeResult::type, R>> {}; + +} // namespace internal + +// Implementation of C++17's std::is_invocable_r. +// +// Returns whether `F` can be invoked with `Args...` and the result is +// convertible to `R`. +// +// Reference: https://wg21.link/meta.rel#lib:is_invocable_r +template <typename R, typename F, typename... Args> +struct is_invocable_r + : internal::IsInvocableImpl<invoke_result<F, Args...>, R> {}; + +// Implementation of C++17's std::is_invocable. +// +// Returns whether `F` can be invoked with `Args...`. +// +// Reference: https://wg21.link/meta.rel#lib:is_invocable +template <typename F, typename... Args> +struct is_invocable : is_invocable_r<void, F, Args...> {}; + +namespace internal { + +// The indirection with std::is_enum<T> is required, because instantiating +// std::underlying_type_t<T> when T is not an enum is UB prior to C++20. +template <typename T, bool = std::is_enum<T>::value> +struct IsScopedEnumImpl : std::false_type {}; + +template <typename T> +struct IsScopedEnumImpl<T, /*std::is_enum<T>::value=*/true> + : negation<std::is_convertible<T, std::underlying_type_t<T>>> {}; + +} // namespace internal + +// Implementation of C++23's std::is_scoped_enum +// +// Reference: https://en.cppreference.com/w/cpp/types/is_scoped_enum +template <typename T> +struct is_scoped_enum : internal::IsScopedEnumImpl<T> {}; + +// Implementation of C++20's std::remove_cvref. +// +// References: +// - https://en.cppreference.com/w/cpp/types/remove_cvref +// - https://wg21.link/meta.trans.other#lib:remove_cvref +template <typename T> +struct remove_cvref { + using type = std::remove_cv_t<std::remove_reference_t<T>>; +}; + +// Implementation of C++20's std::remove_cvref_t. +// +// References: +// - https://en.cppreference.com/w/cpp/types/remove_cvref +// - https://wg21.link/meta.type.synop#lib:remove_cvref_t +template <typename T> +using remove_cvref_t = typename remove_cvref<T>::type; + +// Implementation of C++20's std::is_constant_evaluated. +// +// References: +// - https://en.cppreference.com/w/cpp/types/is_constant_evaluated +// - https://wg21.link/meta.const.eval +constexpr bool is_constant_evaluated() noexcept { +#if HAS_BUILTIN(__builtin_is_constant_evaluated) + return __builtin_is_constant_evaluated(); +#else + return false; +#endif +} // Simplified implementation of C++20's std::iter_value_t. // As opposed to std::iter_value_t, this implementation does not restrict @@ -251,8 +344,8 @@ // // Reference: https://wg21.link/readable.traits#2 template <typename Iter> -using iter_value_t = typename std::iterator_traits< - std::remove_cv_t<std::remove_reference_t<Iter>>>::value_type; +using iter_value_t = + typename std::iterator_traits<remove_cvref_t<Iter>>::value_type; // Simplified implementation of C++20's std::iter_reference_t. // As opposed to std::iter_reference_t, this implementation does not restrict @@ -281,7 +374,7 @@ typename Proj, typename IndirectResultT = indirect_result_t<Proj, Iter>> struct projected { - using value_type = std::remove_cv_t<std::remove_reference_t<IndirectResultT>>; + using value_type = remove_cvref_t<IndirectResultT>; IndirectResultT operator*() const; // not defined };
diff --git a/build/build_config.h b/build/build_config.h index c69df41..daf51ff 100644 --- a/build/build_config.h +++ b/build/build_config.h
@@ -94,7 +94,7 @@ #error Please add support for your platform in build/build_config.h #endif // NOTE: Adding a new port? Please follow -// https://chromium.googlesource.com/chromium/src/+/master/docs/new_port_policy.md +// https://chromium.googlesource.com/chromium/src/+/main/docs/new_port_policy.md #if defined(OS_MAC) || defined(OS_IOS) #define OS_APPLE 1 @@ -224,7 +224,7 @@ // The compiler thinks std::string::const_iterator and "const char*" are // equivalent types. #define STD_STRING_ITERATOR_IS_CHAR_POINTER -// The compiler thinks gurl_base::string16::const_iterator and "char16*" are +// The compiler thinks std::u16string::const_iterator and "char16*" are // equivalent types. #define BASE_STRING16_ITERATOR_IS_CHAR16_POINTER #endif
diff --git a/copy.bara.sky b/copy.bara.sky index c50f711..1384bee 100644 --- a/copy.bara.sky +++ b/copy.bara.sky
@@ -18,12 +18,12 @@ "base/containers/contiguous_iterator.h", "base/containers/span.h", "base/containers/util.h", + "base/cxx17_backports.h", "base/debug/leak_annotations.h", "base/functional/*.h", "base/i18n/uchar.h", "base/macros.h", "base/no_destructor.h", - "base/optional.h", "base/ranges/*.h", "base/stl_util.h", "base/strings/*.cc", @@ -63,8 +63,10 @@ "base/debug/alias.h", "base/export_template.h", "base/logging.h", + "base/metrics/histogram_macros.h", "base/notreached.h", "base/trace_event/memory_usage_estimator.h", + "third_party/perfetto/include/perfetto/tracing/traced_value.h", ] transformations = [ @@ -81,6 +83,12 @@ core.replace("namespace base ", "namespace gurl_base "), core.replace("base::", "gurl_base::"), + # Use Abseil at upstream-recommended paths. + core.replace("third_party/abseil-cpp/absl", "absl"), + + # Fix some Perfetto includes. + core.replace("perfetto/tracing/traced_value_forward.h", "perfetto/tracing/traced_value.h"), + # Use system ICU. core.replace( '"third_party/icu/source/common/unicode/${file}.h"',
diff --git a/polyfills/BUILD b/polyfills/BUILD index 820c63d..9bf74f4 100644 --- a/polyfills/BUILD +++ b/polyfills/BUILD
@@ -13,8 +13,10 @@ "base/debug/alias.h", "base/export_template.h", "base/logging.h", + "base/metrics/histogram_macros.h", "base/notreached.h", "base/trace_event/memory_usage_estimator.h", + "third_party/perfetto/include/perfetto/tracing/traced_value.h", ], copts = build_config.default_copts, visibility = ["//visibility:public"],
diff --git a/polyfills/base/check_op.h b/polyfills/base/check_op.h index ecc127a..faba308 100644 --- a/polyfills/base/check_op.h +++ b/polyfills/base/check_op.h
@@ -6,5 +6,6 @@ #define POLYFILLS_BASE_CHECK_OP_H_ #include "polyfills/base/logging.h" +#include "base/template_util.h" #endif /* POLYFILLS_BASE_CHECK_OP_H_ */
diff --git a/polyfills/base/logging.h b/polyfills/base/logging.h index afe296e..3d7aadc 100644 --- a/polyfills/base/logging.h +++ b/polyfills/base/logging.h
@@ -22,6 +22,7 @@ #define GURL_CHECK_GE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK_LE(statement, statement2) GurlFakeLogSink({statement, statement2}) +#define GURL_CHECK_LT(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK_EQ(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK(statement) GurlFakeLogSink({statement})
diff --git a/polyfills/base/metrics/histogram_macros.h b/polyfills/base/metrics/histogram_macros.h new file mode 100644 index 0000000..127c53c --- /dev/null +++ b/polyfills/base/metrics/histogram_macros.h
@@ -0,0 +1,11 @@ +// Copyright (c) 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_ +#define POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_ + +#define UMA_HISTOGRAM_ENUMERATION(name, ...) do {} while(false) + +#endif /* POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_ */ +
diff --git a/polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h b/polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h new file mode 100644 index 0000000..b2f0286 --- /dev/null +++ b/polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h
@@ -0,0 +1,17 @@ +// Copyright (c) 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef POLYFILLS_THIRD_PARTY_PERFETTO_INCLUDE_PERFETTO_TRACING_TRACED_VALUE_H_ +#define POLYFILLS_THIRD_PARTY_PERFETTO_INCLUDE_PERFETTO_TRACING_TRACED_VALUE_H_ + +namespace perfetto { + +class TracedValue { + public: + void WriteString(const std::string&) && {} +}; + +} // namespace perfetto + +#endif // POLYFILLS_THIRD_PARTY_PERFETTO_INCLUDE_PERFETTO_TRACING_TRACED_VALUE_H_
diff --git a/url/BUILD b/url/BUILD index f2ec8da..6ed3fc5 100644 --- a/url/BUILD +++ b/url/BUILD
@@ -43,7 +43,9 @@ "url_file.h", "url_util.h", ], - copts = build_config.default_copts, + copts = build_config.default_copts + [ + "-Wno-c++11-narrowing", + ], linkopts = build_config.url_linkopts, visibility = ["//visibility:public"], deps = [
diff --git a/url/gurl.cc b/url/gurl.cc index 3b7d9f5..2d68889 100644 --- a/url/gurl.cc +++ b/url/gurl.cc
@@ -7,6 +7,7 @@ #include <stddef.h> #include <algorithm> +#include <memory> #include <ostream> #include <utility> @@ -15,6 +16,7 @@ #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "polyfills/base/trace_event/memory_usage_estimator.h" +#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h" #include "url/url_canon_stdstring.h" #include "url/url_util.h" @@ -26,7 +28,7 @@ is_valid_(other.is_valid_), parsed_(other.parsed_) { if (other.inner_url_) - inner_url_.reset(new GURL(*other.inner_url_)); + inner_url_ = std::make_unique<GURL>(*other.inner_url_); // Valid filesystem urls should always have an inner_url_. GURL_DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); } @@ -49,7 +51,7 @@ } GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) { - InitCanonical(gurl_base::StringPiece(url_string), false); + InitCanonical(url_string, false); } GURL::GURL(const char* canonical_spec, @@ -67,9 +69,8 @@ InitializeFromCanonicalSpec(); } -template<typename STR> -void GURL::InitCanonical(gurl_base::BasicStringPiece<STR> input_spec, - bool trim_path_end) { +template <typename T, typename CharT> +void GURL::InitCanonical(T input_spec, bool trim_path_end) { url::StdStringCanonOutput output(&spec_); is_valid_ = url::Canonicalize( input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end, @@ -77,8 +78,8 @@ output.Complete(); // Must be done before using string. if (is_valid_ && SchemeIsFileSystem()) { - inner_url_.reset(new GURL(spec_.data(), parsed_.Length(), - *parsed_.inner_parsed(), true)); + inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(), + *parsed_.inner_parsed(), true); } // Valid URLs always have non-empty specs. GURL_DCHECK(!is_valid_ || !spec_.empty()); @@ -86,9 +87,8 @@ void GURL::InitializeFromCanonicalSpec() { if (is_valid_ && SchemeIsFileSystem()) { - inner_url_.reset( - new GURL(spec_.data(), parsed_.Length(), - *parsed_.inner_parsed(), true)); + inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(), + *parsed_.inner_parsed(), true); } #ifndef NDEBUG @@ -138,7 +138,7 @@ else if (inner_url_) *inner_url_ = *other.inner_url_; else - inner_url_.reset(new GURL(*other.inner_url_)); + inner_url_ = std::make_unique<GURL>(*other.inner_url_); return *this; } @@ -189,9 +189,9 @@ output.Complete(); result.is_valid_ = true; if (result.SchemeIsFileSystem()) { - result.inner_url_.reset( - new GURL(result.spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } @@ -215,9 +215,9 @@ output.Complete(); result.is_valid_ = true; if (result.SchemeIsFileSystem()) { - result.inner_url_.reset( - new GURL(result.spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } @@ -238,16 +238,16 @@ output.Complete(); if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_.reset(new GURL(result.spec_.data(), - result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } // Note: code duplicated above (it's inconvenient to use a template here). GURL GURL::ReplaceComponents( - const url::Replacements<gurl_base::char16>& replacements) const { + const url::Replacements<char16_t>& replacements) const { GURL result; // Not allowed for invalid URLs. @@ -261,9 +261,9 @@ output.Complete(); if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_.reset(new GURL(result.spec_.data(), - result.parsed_.Length(), - *result.parsed_.inner_parsed(), true)); + result.inner_url_ = + std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), + *result.parsed_.inner_parsed(), true); } return result; } @@ -412,11 +412,11 @@ } std::string GURL::PathForRequest() const { - return PathForRequestPiece().as_string(); + return std::string(PathForRequestPiece()); } std::string GURL::HostNoBrackets() const { - return HostNoBracketsPiece().as_string(); + return std::string(HostNoBracketsPiece()); } gurl_base::StringPiece GURL::HostNoBracketsPiece() const { @@ -501,13 +501,17 @@ if ((actual_path.size() == allowed_path.size() + 1) && actual_path.back() == '/') { - GURL_DCHECK_EQ(actual_path, allowed_path.as_string() + '/'); + GURL_DCHECK_EQ(actual_path, std::string(allowed_path) + '/'); return true; } return false; } +void GURL::WriteIntoTrace(perfetto::TracedValue context) const { + std::move(context).WriteString(possibly_invalid_spec()); +} + std::ostream& operator<<(std::ostream& out, const GURL& url) { return out << url.possibly_invalid_spec(); }
diff --git a/url/gurl.h b/url/gurl.h index 37e1c8d..21e6611 100644 --- a/url/gurl.h +++ b/url/gurl.h
@@ -13,8 +13,8 @@ #include "polyfills/base/component_export.h" #include "polyfills/base/debug/alias.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" +#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" #include "url/url_canon_stdstring.h" @@ -45,8 +45,8 @@ // will know to escape this and produce the desired result. class COMPONENT_EXPORT(URL) GURL { public: - typedef url::StringPieceReplacements<std::string> Replacements; - typedef url::StringPieceReplacements<gurl_base::string16> ReplacementsW; + typedef url::StringPieceReplacements<char> Replacements; + typedef url::StringPieceReplacements<char16_t> ReplacementsW; // Creates an empty, invalid URL. GURL(); @@ -166,8 +166,7 @@ // Note that we use the more general url::Replacements type to give // callers extra flexibility rather than our override. GURL ReplaceComponents(const url::Replacements<char>& replacements) const; - GURL ReplaceComponents( - const url::Replacements<gurl_base::char16>& replacements) const; + GURL ReplaceComponents(const url::Replacements<char16_t>& replacements) const; // A helper function that is equivalent to replacing the path with a slash // and clearing out everything after that. We sometimes need to know just the @@ -438,6 +437,8 @@ static bool IsAboutPath(gurl_base::StringPiece actual_path, gurl_base::StringPiece allowed_path); + void WriteIntoTrace(perfetto::TracedValue context) const; + private: // Variant of the string parsing constructor that allows the caller to elect // retain trailing whitespace, if any, on the passed URL spec, but only if @@ -447,9 +448,8 @@ enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE }; GURL(const std::string& url_string, RetainWhiteSpaceSelector); - template<typename STR> - void InitCanonical(gurl_base::BasicStringPiece<STR> input_spec, - bool trim_path_end); + template <typename T, typename CharT = typename T::value_type> + void InitCanonical(T input_spec, bool trim_path_end); void InitializeFromCanonicalSpec();
diff --git a/url/gurl_abstract_tests.h b/url/gurl_abstract_tests.h new file mode 100644 index 0000000..ffe9942 --- /dev/null +++ b/url/gurl_abstract_tests.h
@@ -0,0 +1,119 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef URL_GURL_ABSTRACT_TESTS_H_ +#define URL_GURL_ABSTRACT_TESTS_H_ + +// Test suite for tests that cover both url::Url and blink::SecurityUrl. +// +// AbstractUrlTest below abstracts away differences between GURL and blink::KURL +// by parametrizing the tests with a class that has to expose the following +// members: +// using UrlType = ...; +// static UrlType CreateUrlFromString(gurl_base::StringPiece s); +// static bool IsAboutBlank(const UrlType& url); +// static bool IsAboutSrcdoc(const UrlType& url); +template <typename TUrlTraits> +class AbstractUrlTest : public testing::Test { + protected: + // Wrappers that help ellide away TUrlTraits. + // + // Note that calling the wrappers needs to be prefixed with `this->...` to + // avoid hitting: explicit qualification required to use member 'IsAboutBlank' + // from dependent base class. + using UrlType = typename TUrlTraits::UrlType; + UrlType CreateUrlFromString(gurl_base::StringPiece s) { + return TUrlTraits::CreateUrlFromString(s); + } + bool IsAboutBlank(const UrlType& url) { + return TUrlTraits::IsAboutBlank(url); + } + bool IsAboutSrcdoc(const UrlType& url) { + return TUrlTraits::IsAboutSrcdoc(url); + } +}; + +TYPED_TEST_SUITE_P(AbstractUrlTest); + +TYPED_TEST_P(AbstractUrlTest, IsAboutBlankTest) { + // See https://tools.ietf.org/html/rfc6694 which explicitly allows + // `about-query` and `about-fragment` parts in about: URLs. + const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo", + "about:blank/#foo", + "about:blank?foo#foo"}; + for (const auto& input : kAboutBlankUrls) { + SCOPED_TRACE(testing::Message() << "Test input: " << input); + auto url = this->CreateUrlFromString(input); + EXPECT_TRUE(this->IsAboutBlank(url)); + } + + const std::string kNotAboutBlankUrls[] = {"", + "about", + "about:", + "about:blanky", + "about:blan", + "about:about:blank:", + "data:blank", + "http:blank", + "about://blank", + "about:blank/foo", + "about://:8000/blank", + "about://foo:foo@/blank", + "foo@about:blank", + "foo:bar@about:blank", + "about:blank:8000", + "about:blANk"}; + for (const auto& input : kNotAboutBlankUrls) { + SCOPED_TRACE(testing::Message() << "Test input: " << input); + auto url = this->CreateUrlFromString(input); + EXPECT_FALSE(this->IsAboutBlank(url)); + } +} + +TYPED_TEST_P(AbstractUrlTest, IsAboutSrcdocTest) { + // See https://tools.ietf.org/html/rfc6694 which explicitly allows + // `about-query` and `about-fragment` parts in about: URLs. + // + // `about:srcdoc` is defined in + // https://html.spec.whatwg.org/multipage/urls-and-fetching.html#about:srcdoc + // which refers to rfc6694 for details. + const std::string kAboutSrcdocUrls[] = { + "about:srcdoc", "about:srcdoc/", "about:srcdoc?foo", "about:srcdoc/#foo", + "about:srcdoc?foo#foo"}; + for (const auto& input : kAboutSrcdocUrls) { + SCOPED_TRACE(testing::Message() << "Test input: " << input); + auto url = this->CreateUrlFromString(input); + EXPECT_TRUE(this->IsAboutSrcdoc(url)); + } + + const std::string kNotAboutSrcdocUrls[] = {"", + "about", + "about:", + "about:srcdocx", + "about:srcdo", + "about:about:srcdoc:", + "data:srcdoc", + "http:srcdoc", + "about:srcdo", + "about://srcdoc", + "about://srcdoc\\", + "about:srcdoc/foo", + "about://:8000/srcdoc", + "about://foo:foo@/srcdoc", + "foo@about:srcdoc", + "foo:bar@about:srcdoc", + "about:srcdoc:8000", + "about:srCDOc"}; + for (const auto& input : kNotAboutSrcdocUrls) { + SCOPED_TRACE(testing::Message() << "Test input: " << input); + auto url = this->CreateUrlFromString(input); + EXPECT_FALSE(this->IsAboutSrcdoc(url)); + } +} + +REGISTER_TYPED_TEST_SUITE_P(AbstractUrlTest, + IsAboutBlankTest, + IsAboutSrcdocTest); + +#endif // URL_GURL_ABSTRACT_TESTS_H_
diff --git a/url/gurl_fuzzer.cc b/url/gurl_fuzzer.cc index 3b28aea..0c3c101 100644 --- a/url/gurl_fuzzer.cc +++ b/url/gurl_fuzzer.cc
@@ -52,9 +52,9 @@ CheckReplaceComponentsPreservesSpec(url_from_string_piece); } // Test for StringPiece16 if size is even. - if (size % 2 == 0) { + if (size % sizeof(char16_t) == 0) { gurl_base::StringPiece16 string_piece_input16( - reinterpret_cast<const gurl_base::char16*>(data), size / 2); + reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t)); const GURL url_from_string_piece16(string_piece_input16); CheckIdempotency(url_from_string_piece16); CheckReplaceComponentsPreservesSpec(url_from_string_piece16); @@ -78,10 +78,10 @@ url_from_string_piece_part.Resolve(relative_string); - if (relative_size % 2 == 0) { - gurl_base::string16 relative_string16( - reinterpret_cast<const gurl_base::char16*>(data + size_t_bytes), - relative_size / 2); + if (relative_size % sizeof(char16_t) == 0) { + std::u16string relative_string16( + reinterpret_cast<const char16_t*>(data + size_t_bytes), + relative_size / sizeof(char16_t)); url_from_string_piece_part.Resolve(relative_string16); } }
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index 6295d98..0b81da4 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc
@@ -68,11 +68,11 @@ // the parser is already tested and works, so we are mostly interested if the // object does the right thing with the results. TEST(GURLTest, Components) { - GURL empty_url(gurl_base::UTF8ToUTF16("")); + GURL empty_url(u""); EXPECT_TRUE(empty_url.is_empty()); EXPECT_FALSE(empty_url.is_valid()); - GURL url(gurl_base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref")); + GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref"); EXPECT_FALSE(url.is_empty()); EXPECT_TRUE(url.is_valid()); EXPECT_TRUE(url.SchemeIs("http")); @@ -117,8 +117,7 @@ } TEST(GURLTest, Copy) { - GURL url(gurl_base::UTF8ToUTF16( - "http://user:pass@google.com:99/foo;bar?q=a#ref")); + GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref"); GURL url2(url); EXPECT_TRUE(url2.is_valid()); @@ -151,8 +150,7 @@ } TEST(GURLTest, Assign) { - GURL url(gurl_base::UTF8ToUTF16( - "http://user:pass@google.com:99/foo;bar?q=a#ref")); + GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref"); GURL url2; url2 = url; @@ -194,8 +192,7 @@ } TEST(GURLTest, CopyFileSystem) { - GURL url(gurl_base::UTF8ToUTF16( - "filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref")); + GURL url(u"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"); GURL url2(url); EXPECT_TRUE(url2.is_valid()); @@ -268,21 +265,49 @@ EXPECT_EQ("/", url.path()); } -// Given an invalid URL, we should still get most of the components. +// Given invalid URLs, we should still get most of the components. TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) { - GURL url("http:google.com:foo"); - EXPECT_FALSE(url.is_valid()); - EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); + constexpr struct InvalidURLTestExpectations { + const char* url; + const char* spec; + const char* scheme; + const char* host; + const char* port; + const char* path; + // Extend as needed... + } expectations[] = { + { + "http:google.com:foo", + "http://google.com:foo/", + "http", + "google.com", + "foo", + "/", + }, + { + "https:google.com:foo", + "https://google.com:foo/", + "https", + "google.com", + "foo", + "/", + }, + }; - EXPECT_EQ("http", url.scheme()); - EXPECT_EQ("", url.username()); - EXPECT_EQ("", url.password()); - EXPECT_EQ("google.com", url.host()); - EXPECT_EQ("foo", url.port()); - EXPECT_EQ(PORT_INVALID, url.IntPort()); - EXPECT_EQ("/", url.path()); - EXPECT_EQ("", url.query()); - EXPECT_EQ("", url.ref()); + for (const auto& e : expectations) { + const GURL url(e.url); + EXPECT_FALSE(url.is_valid()); + EXPECT_EQ(e.spec, url.possibly_invalid_spec()); + EXPECT_EQ(e.scheme, url.scheme()); + EXPECT_EQ("", url.username()); + EXPECT_EQ("", url.password()); + EXPECT_EQ(e.host, url.host()); + EXPECT_EQ(e.port, url.port()); + EXPECT_EQ(PORT_INVALID, url.IntPort()); + EXPECT_EQ(e.path, url.path()); + EXPECT_EQ("", url.query()); + EXPECT_EQ("", url.ref()); + } } TEST(GURLTest, Resolve) { @@ -314,6 +339,7 @@ // A non-standard base can be replaced with a standard absolute URL. {"data:blahblah", "http://google.com/", true, "http://google.com/"}, {"data:blahblah", "http:google.com", true, "http://google.com/"}, + {"data:blahblah", "https:google.com", true, "https://google.com/"}, // Filesystem URLs have different paths to test. {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, @@ -535,7 +561,7 @@ GURL url(" data: one ? two # three "); // By default the trailing whitespace will have been stripped. - EXPECT_EQ("data: one ? two # three", url.spec()); + EXPECT_EQ("data: one ? two #%20three", url.spec()); GURL::Replacements repl; repl.ClearRef(); GURL url_no_ref = url.ReplaceComponents(repl); @@ -953,6 +979,21 @@ EXPECT_STREQ("https://foo.com/bar", url_debug_alias); } +TEST(GURLTest, InvalidHost) { + // This contains an invalid percent escape (%T%) and also a valid + // percent escape that's not 7-bit ascii (%ae), so that the unescaped + // host contains both an invalid percent escape and invalid UTF-8. + GURL url("http://%T%Ae"); + + EXPECT_FALSE(url.is_valid()); + EXPECT_TRUE(url.SchemeIs(url::kHttpScheme)); + + // The invalid percent escape becomes an escaped percent sign (%25), and the + // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as + // UTF-8. + EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD"); +} + TEST(GURLTest, PortZero) { GURL port_zero_url("http://127.0.0.1:0/blah");
diff --git a/url/origin.cc b/url/origin.cc index ca37428..33e26f9 100644 --- a/url/origin.cc +++ b/url/origin.cc
@@ -16,7 +16,9 @@ #include "base/pickle.h" #include "base/strings/strcat.h" #include "base/strings/string_number_conversions.h" +#include "base/strings/string_piece.h" #include "base/strings/string_util.h" +#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h" #include "url/gurl.h" #include "url/url_canon.h" #include "url/url_canon_stdstring.h" @@ -72,25 +74,25 @@ Origin::~Origin() = default; // static -gurl_base::Optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization( +absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization( gurl_base::StringPiece scheme, gurl_base::StringPiece host, uint16_t port) { - SchemeHostPort tuple(scheme.as_string(), host.as_string(), port, + SchemeHostPort tuple(std::string(scheme), std::string(host), port, SchemeHostPort::CHECK_CANONICALIZATION); if (!tuple.IsValid()) - return gurl_base::nullopt; + return absl::nullopt; return Origin(std::move(tuple)); } // static -gurl_base::Optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization( +absl::optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization( gurl_base::StringPiece precursor_scheme, gurl_base::StringPiece precursor_host, uint16_t precursor_port, const Origin::Nonce& nonce) { - SchemeHostPort precursor(precursor_scheme.as_string(), - precursor_host.as_string(), precursor_port, + SchemeHostPort precursor(std::string(precursor_scheme), + std::string(precursor_host), precursor_port, SchemeHostPort::CHECK_CANONICALIZATION); // For opaque origins, it is okay for the SchemeHostPort to be invalid; // however, this should only arise when the arguments indicate the @@ -98,7 +100,7 @@ if (!precursor.IsValid() && !(precursor_scheme.empty() && precursor_host.empty() && precursor_port == 0)) { - return gurl_base::nullopt; + return absl::nullopt; } return Origin(std::move(nonce), std::move(precursor)); } @@ -147,11 +149,11 @@ return tuple_.GetURL(); } -gurl_base::Optional<gurl_base::UnguessableToken> Origin::GetNonceForSerialization() +absl::optional<gurl_base::UnguessableToken> Origin::GetNonceForSerialization() const { // TODO(nasko): Consider not making a copy here, but return a reference to // the nonce. - return nonce_ ? gurl_base::make_optional(nonce_->token()) : gurl_base::nullopt; + return nonce_ ? absl::make_optional(nonce_->token()) : absl::nullopt; } bool Origin::IsSameOriginWith(const Origin& other) const { @@ -287,11 +289,11 @@ GURL_DCHECK_EQ(0U, port()); } -gurl_base::Optional<std::string> Origin::SerializeWithNonce() const { +absl::optional<std::string> Origin::SerializeWithNonce() const { return SerializeWithNonceImpl(); } -gurl_base::Optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() { +absl::optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() { GetNonceForSerialization(); return SerializeWithNonceImpl(); } @@ -300,9 +302,9 @@ // string - tuple_.GetURL().spec(). // uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized. // uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized. -gurl_base::Optional<std::string> Origin::SerializeWithNonceImpl() const { +absl::optional<std::string> Origin::SerializeWithNonceImpl() const { if (!opaque() && !tuple_.IsValid()) - return gurl_base::nullopt; + return absl::nullopt; gurl_base::Pickle pickle; pickle.WriteString(tuple_.Serialize()); @@ -323,16 +325,16 @@ } // static -gurl_base::Optional<Origin> Origin::Deserialize(const std::string& value) { +absl::optional<Origin> Origin::Deserialize(const std::string& value) { std::string data; if (!gurl_base::Base64Decode(value, &data)) - return gurl_base::nullopt; + return absl::nullopt; gurl_base::Pickle pickle(reinterpret_cast<char*>(&data[0]), data.size()); gurl_base::PickleIterator reader(pickle); std::string pickled_url; if (!reader.ReadString(&pickled_url)) - return gurl_base::nullopt; + return absl::nullopt; GURL url(pickled_url); // If only a tuple was serialized, then this origin is not opaque. For opaque @@ -341,24 +343,24 @@ // Opaque origins without a tuple are ok. if (!is_opaque && !url.is_valid()) - return gurl_base::nullopt; + return absl::nullopt; SchemeHostPort tuple(url); // Possible successful early return if the pickled Origin was not opaque. if (!is_opaque) { Origin origin(tuple); if (origin.opaque()) - return gurl_base::nullopt; // Something went horribly wrong. + return absl::nullopt; // Something went horribly wrong. return origin; } uint64_t nonce_high = 0; if (!reader.ReadUInt64(&nonce_high)) - return gurl_base::nullopt; + return absl::nullopt; uint64_t nonce_low = 0; if (!reader.ReadUInt64(&nonce_low)) - return gurl_base::nullopt; + return absl::nullopt; Origin::Nonce nonce; if (nonce_high != 0 && nonce_low != 0) { @@ -372,6 +374,10 @@ return origin; } +void Origin::WriteIntoTrace(perfetto::TracedValue context) const { + std::move(context).WriteString(GetDebugString()); +} + std::ostream& operator<<(std::ostream& out, const url::Origin& origin) { out << origin.GetDebugString(); return out;
diff --git a/url/origin.h b/url/origin.h index 8cb3ef0..bfd3b36 100644 --- a/url/origin.h +++ b/url/origin.h
@@ -13,13 +13,13 @@ #include "polyfills/base/component_export.h" #include "polyfills/base/debug/alias.h" #include "base/debug/crash_logging.h" -#include "base/optional.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/unguessable_token.h" #include "build/build_config.h" #include "ipc/ipc_param_traits.h" +#include "absl/types/optional.h" +#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h" #include "url/scheme_host_port.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -182,7 +182,7 @@ // forth over IPC (as transitioning through GURL would risk potentially // dangerous recanonicalization); other potential callers should prefer the // 'GURL'-based constructor. - static gurl_base::Optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization( + static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization( gurl_base::StringPiece scheme, gurl_base::StringPiece host, uint16_t port); @@ -295,6 +295,8 @@ const gurl_base::android::JavaRef<jobject>& java_origin); #endif // OS_ANDROID + void WriteIntoTrace(perfetto::TracedValue context) const; + private: friend class blink::SecurityOrigin; // SchemefulSite needs access to the serialization/deserialization logic which @@ -377,7 +379,7 @@ // This factory method should be used in order to pass opaque Origin objects // back and forth over IPC (as transitioning through GURL would risk // potentially dangerous recanonicalization). - static gurl_base::Optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization( + static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization( gurl_base::StringPiece precursor_scheme, gurl_base::StringPiece precursor_host, uint16_t precursor_port, @@ -392,23 +394,23 @@ // Get the nonce associated with this origin, if it is opaque. This should be // used only when trying to send an Origin across an IPC pipe. - gurl_base::Optional<gurl_base::UnguessableToken> GetNonceForSerialization() const; + absl::optional<gurl_base::UnguessableToken> GetNonceForSerialization() const; // Serializes this Origin, including its nonce if it is opaque. If an opaque // origin's |tuple_| is invalid nullopt is returned. If the nonce is not // initialized, a nonce of 0 is used. Use of this method should be limited as // an opaque origin will never be matchable in future browser sessions. - gurl_base::Optional<std::string> SerializeWithNonce() const; + absl::optional<std::string> SerializeWithNonce() const; // Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to // serializing. - gurl_base::Optional<std::string> SerializeWithNonceAndInitIfNeeded(); + absl::optional<std::string> SerializeWithNonceAndInitIfNeeded(); - gurl_base::Optional<std::string> SerializeWithNonceImpl() const; + absl::optional<std::string> SerializeWithNonceImpl() const; // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the // value was invalid in any way. - static gurl_base::Optional<Origin> Deserialize(const std::string& value); + static absl::optional<Origin> Deserialize(const std::string& value); // The tuple is used for both tuple origins (e.g. https://example.com:80), as // well as for opaque origins, where it tracks the tuple origin from which @@ -419,7 +421,7 @@ // The nonce is used for maintaining identity of an opaque origin. This // nonce is preserved when an opaque origin is copied or moved. An Origin // is considered opaque if and only if |nonce_| holds a value. - gurl_base::Optional<Nonce> nonce_; + absl::optional<Nonce> nonce_; }; // Pretty-printers for logging. These expose the internal state of the nonce.
diff --git a/url/origin_abstract_tests.cc b/url/origin_abstract_tests.cc new file mode 100644 index 0000000..1619eae --- /dev/null +++ b/url/origin_abstract_tests.cc
@@ -0,0 +1,104 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "url/origin_abstract_tests.h" + +namespace url { + +void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) { + EXPECT_EQ(a, b); + const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec(); + const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec(); + EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin); + EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len); + EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin); + EXPECT_EQ(a_parsed.username.len, b_parsed.username.len); + EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin); + EXPECT_EQ(a_parsed.password.len, b_parsed.password.len); + EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin); + EXPECT_EQ(a_parsed.host.len, b_parsed.host.len); + EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin); + EXPECT_EQ(a_parsed.port.len, b_parsed.port.len); + EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin); + EXPECT_EQ(a_parsed.path.len, b_parsed.path.len); + EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin); + EXPECT_EQ(a_parsed.query.len, b_parsed.query.len); + EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin); + EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len); +} + +// static +Origin UrlOriginTestTraits::CreateOriginFromString(gurl_base::StringPiece s) { + return Origin::Create(GURL(s)); +} + +// static +Origin UrlOriginTestTraits::CreateUniqueOpaqueOrigin() { + return Origin(); +} + +// static +Origin UrlOriginTestTraits::CreateWithReferenceOrigin( + gurl_base::StringPiece url, + const Origin& reference_origin) { + return Origin::Resolve(GURL(url), reference_origin); +} + +// static +Origin UrlOriginTestTraits::DeriveNewOpaqueOrigin( + const Origin& reference_origin) { + return reference_origin.DeriveNewOpaqueOrigin(); +} + +// static +bool UrlOriginTestTraits::IsOpaque(const Origin& origin) { + return origin.opaque(); +} + +// static +std::string UrlOriginTestTraits::GetScheme(const Origin& origin) { + return origin.scheme(); +} + +// static +std::string UrlOriginTestTraits::GetHost(const Origin& origin) { + return origin.host(); +} + +// static +uint16_t UrlOriginTestTraits::GetPort(const Origin& origin) { + return origin.port(); +} + +// static +SchemeHostPort UrlOriginTestTraits::GetTupleOrPrecursorTupleIfOpaque( + const Origin& origin) { + return origin.GetTupleOrPrecursorTupleIfOpaque(); +} + +// static +bool UrlOriginTestTraits::IsSameOrigin(const Origin& a, const Origin& b) { + return a.IsSameOriginWith(b); +} + +// static +std::string UrlOriginTestTraits::Serialize(const Origin& origin) { + std::string serialized = origin.Serialize(); + + // Extra test assertion for GetURL (which doesn't have an equivalent in + // blink::SecurityOrigin). + ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL()); + + return serialized; +} + +// static +bool UrlOriginTestTraits::IsValidUrl(gurl_base::StringPiece str) { + return GURL(str).is_valid(); +} + +// This is an abstract test suite which is instantiated by each implementation. +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AbstractOriginTest); + +} // namespace url
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h new file mode 100644 index 0000000..0c53f82 --- /dev/null +++ b/url/origin_abstract_tests.h
@@ -0,0 +1,536 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef URL_ORIGIN_ABSTRACT_TESTS_H_ +#define URL_ORIGIN_ABSTRACT_TESTS_H_ + +#include <string> +#include <type_traits> + +#include "base/containers/contains.h" +#include "base/strings/string_piece.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "absl/types/optional.h" +#include "url/gurl.h" +#include "url/origin.h" +#include "url/scheme_host_port.h" +#include "url/url_util.h" + +namespace url { + +void ExpectParsedUrlsEqual(const GURL& a, const GURL& b); + +// AbstractOriginTest below abstracts away differences between url::Origin and +// blink::SecurityOrigin by parametrizing the tests with a class that has to +// expose the same public members as UrlOriginTestTraits below. +class UrlOriginTestTraits { + public: + using OriginType = Origin; + + // Constructing an origin. + static OriginType CreateOriginFromString(gurl_base::StringPiece s); + static OriginType CreateUniqueOpaqueOrigin(); + static OriginType CreateWithReferenceOrigin( + gurl_base::StringPiece url, + const OriginType& reference_origin); + static OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin); + + // Accessors for origin properties. + static bool IsOpaque(const OriginType& origin); + static std::string GetScheme(const OriginType& origin); + static std::string GetHost(const OriginType& origin); + static uint16_t GetPort(const OriginType& origin); + static SchemeHostPort GetTupleOrPrecursorTupleIfOpaque( + const OriginType& origin); + + // Wrappers for other instance methods of OriginType. + static bool IsSameOrigin(const OriginType& a, const OriginType& b); + static std::string Serialize(const OriginType& origin); + + // "Accessors" of URL properties. + // + // TODO(lukasza): Consider merging together OriginTraitsBase here and + // UrlTraitsBase in //url/gurl_abstract_tests.h. + static bool IsValidUrl(gurl_base::StringPiece str); + + // Only static members = no constructors are needed. + UrlOriginTestTraits() = delete; +}; + +// Test suite for tests that cover both url::Origin and blink::SecurityOrigin. +template <typename TOriginTraits> +class AbstractOriginTest : public testing::Test { + public: + void SetUp() override { + const char* kSchemesToRegister[] = { + "noaccess", + "std-with-host", + "noaccess-std-with-host", + "local", + "local-noaccess", + "local-std-with-host", + "local-noaccess-std-with-host", + "also-local", + "sec", + "sec-std-with-host", + "sec-noaccess", + }; + for (const char* kScheme : kSchemesToRegister) { + std::string scheme(kScheme); + if (gurl_base::Contains(scheme, "noaccess")) + AddNoAccessScheme(kScheme); + if (gurl_base::Contains(scheme, "std-with-host")) + AddStandardScheme(kScheme, SchemeType::SCHEME_WITH_HOST); + if (gurl_base::Contains(scheme, "local")) + AddLocalScheme(kScheme); + if (gurl_base::Contains(scheme, "sec")) + AddSecureScheme(kScheme); + } + } + + protected: + // Wrappers that help ellide away TOriginTraits. + // + // Note that calling the wrappers needs to be prefixed with `this->...` to + // avoid hitting: explicit qualification required to use member 'IsOpaque' + // from dependent base class. + using OriginType = typename TOriginTraits::OriginType; + OriginType CreateOriginFromString(gurl_base::StringPiece s) { + return TOriginTraits::CreateOriginFromString(s); + } + OriginType CreateUniqueOpaqueOrigin() { + return TOriginTraits::CreateUniqueOpaqueOrigin(); + } + OriginType CreateWithReferenceOrigin(gurl_base::StringPiece url, + const OriginType& reference_origin) { + return TOriginTraits::CreateWithReferenceOrigin(url, reference_origin); + } + OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin) { + return TOriginTraits::DeriveNewOpaqueOrigin(reference_origin); + } + bool IsOpaque(const OriginType& origin) { + return TOriginTraits::IsOpaque(origin); + } + std::string GetScheme(const OriginType& origin) { + return TOriginTraits::GetScheme(origin); + } + std::string GetHost(const OriginType& origin) { + return TOriginTraits::GetHost(origin); + } + uint16_t GetPort(const OriginType& origin) { + return TOriginTraits::GetPort(origin); + } + SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(const OriginType& origin) { + return TOriginTraits::GetTupleOrPrecursorTupleIfOpaque(origin); + } + bool IsSameOrigin(const OriginType& a, const OriginType& b) { + bool is_a_same_with_b = TOriginTraits::IsSameOrigin(a, b); + bool is_b_same_with_a = TOriginTraits::IsSameOrigin(b, a); + EXPECT_EQ(is_a_same_with_b, is_b_same_with_a); + return is_a_same_with_b; + } + std::string Serialize(const OriginType& origin) { + return TOriginTraits::Serialize(origin); + } + bool IsValidUrl(gurl_base::StringPiece str) { + return TOriginTraits::IsValidUrl(str); + } + +#define EXPECT_SAME_ORIGIN(a, b) \ + EXPECT_TRUE(this->IsSameOrigin((a), (b))) \ + << "When checking if \"" << this->Serialize(a) << "\" is " \ + << "same-origin with \"" << this->Serialize(b) << "\"" + +#define EXPECT_CROSS_ORIGIN(a, b) \ + EXPECT_FALSE(this->IsSameOrigin((a), (b))) \ + << "When checking if \"" << this->Serialize(a) << "\" is " \ + << "cross-origin from \"" << this->Serialize(b) << "\"" + + void VerifyOriginInvariants(const OriginType& origin) { + // An origin is always same-origin with itself. + EXPECT_SAME_ORIGIN(origin, origin); + + // A copy of |origin| should be same-origin as well. + auto origin_copy = origin; + EXPECT_EQ(this->GetScheme(origin), this->GetScheme(origin_copy)); + EXPECT_EQ(this->GetHost(origin), this->GetHost(origin_copy)); + EXPECT_EQ(this->GetPort(origin), this->GetPort(origin_copy)); + EXPECT_EQ(this->IsOpaque(origin), this->IsOpaque(origin_copy)); + EXPECT_SAME_ORIGIN(origin, origin_copy); + + // An origin is always cross-origin from another, unique, opaque origin. + EXPECT_CROSS_ORIGIN(origin, this->CreateUniqueOpaqueOrigin()); + + // An origin is always cross-origin from another tuple origin. + auto different_tuple_origin = + this->CreateOriginFromString("https://not-in-the-list.test/"); + EXPECT_CROSS_ORIGIN(origin, different_tuple_origin); + + // Deriving an origin for "about:blank". + auto about_blank_origin1 = + this->CreateWithReferenceOrigin("about:blank", origin); + auto about_blank_origin2 = + this->CreateWithReferenceOrigin("about:blank?bar#foo", origin); + EXPECT_SAME_ORIGIN(origin, about_blank_origin1); + EXPECT_SAME_ORIGIN(origin, about_blank_origin2); + + // Derived opaque origins. + std::vector<OriginType> derived_origins = { + this->DeriveNewOpaqueOrigin(origin), + this->CreateWithReferenceOrigin("data:text/html,baz", origin), + this->DeriveNewOpaqueOrigin(about_blank_origin1), + }; + for (size_t i = 0; i < derived_origins.size(); i++) { + SCOPED_TRACE(testing::Message() << "Derived origin #" << i); + const OriginType& derived_origin = derived_origins[i]; + EXPECT_TRUE(this->IsOpaque(derived_origin)); + EXPECT_SAME_ORIGIN(derived_origin, derived_origin); + EXPECT_CROSS_ORIGIN(origin, derived_origin); + EXPECT_EQ(this->GetTupleOrPrecursorTupleIfOpaque(origin), + this->GetTupleOrPrecursorTupleIfOpaque(derived_origin)); + } + } + + void VerifyUniqueOpaqueOriginInvariants(const OriginType& origin) { + if (!this->IsOpaque(origin)) { + ADD_FAILURE() << "Got unexpectedly non-opaque origin: " + << this->Serialize(origin); + return; // Skip other test assertions. + } + + // Opaque origins should have an "empty" scheme, host and port. + EXPECT_EQ("", this->GetScheme(origin)); + EXPECT_EQ("", this->GetHost(origin)); + EXPECT_EQ(0, this->GetPort(origin)); + + // Unique opaque origins should have an empty precursor tuple. + EXPECT_EQ(SchemeHostPort(), this->GetTupleOrPrecursorTupleIfOpaque(origin)); + + // Serialization test. + EXPECT_EQ("null", this->Serialize(origin)); + + // Invariants that should hold for any origin. + VerifyOriginInvariants(origin); + } + + void TestUniqueOpaqueOrigin(gurl_base::StringPiece test_input) { + auto origin = this->CreateOriginFromString(test_input); + this->VerifyUniqueOpaqueOriginInvariants(origin); + + // Re-creating from the URL should be cross-origin. + auto origin_recreated_from_same_input = + this->CreateOriginFromString(test_input); + EXPECT_CROSS_ORIGIN(origin, origin_recreated_from_same_input); + } + + void VerifyTupleOriginInvariants(const OriginType& origin, + const SchemeHostPort& expected_tuple) { + if (this->IsOpaque(origin)) { + ADD_FAILURE() << "Got unexpectedly opaque origin"; + return; // Skip other test assertions. + } + SCOPED_TRACE(testing::Message() + << "Actual origin: " << this->Serialize(origin)); + + // Compare `origin` against the `expected_tuple`. + EXPECT_EQ(expected_tuple.scheme(), this->GetScheme(origin)); + EXPECT_EQ(expected_tuple.host(), this->GetHost(origin)); + EXPECT_EQ(expected_tuple.port(), this->GetPort(origin)); + EXPECT_EQ(expected_tuple, this->GetTupleOrPrecursorTupleIfOpaque(origin)); + + // Serialization test. + // + // TODO(lukasza): Consider preserving the hostname when serializing file: + // URLs. Dropping the hostname seems incompatible with section 6 of + // rfc6454. Even though section 4 says that "the implementation MAY + // return an implementation-defined value", it seems that Chromium + // implementation *does* include the hostname in the origin SchemeHostPort + // tuple. + if (expected_tuple.scheme() != kFileScheme || expected_tuple.host() == "") { + EXPECT_SAME_ORIGIN(origin, + this->CreateOriginFromString(this->Serialize(origin))); + } + + // Invariants that should hold for any origin. + VerifyOriginInvariants(origin); + } + + private: + ScopedSchemeRegistryForTests scoped_scheme_registry_; +}; + +TYPED_TEST_SUITE_P(AbstractOriginTest); + +TYPED_TEST_P(AbstractOriginTest, NonStandardSchemeWithAndroidWebViewHack) { + EnableNonStandardSchemesForAndroidWebView(); + + // Regression test for https://crbug.com/896059. + auto origin = this->CreateOriginFromString("unknown-scheme://"); + EXPECT_FALSE(this->IsOpaque(origin)); + EXPECT_EQ("unknown-scheme", this->GetScheme(origin)); + EXPECT_EQ("", this->GetHost(origin)); + EXPECT_EQ(0, this->GetPort(origin)); + + // about:blank translates into an opaque origin, even in presence of + // EnableNonStandardSchemesForAndroidWebView. + origin = this->CreateOriginFromString("about:blank"); + EXPECT_TRUE(this->IsOpaque(origin)); +} + +TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromValidUrls) { + const char* kTestCases[] = { + // Built-in noaccess schemes. + "data:text/html,Hello!", + "javascript:alert(1)", + "about:blank", + + // Opaque blob URLs. + "blob:null/foo", // blob:null (actually a valid URL) + "blob:data:foo", // blob + data (which is nonstandard) + "blob:about://blank/", // blob + about (which is nonstandard) + "blob:about:blank/", // blob + about (which is nonstandard) + "blob:blob:http://www.example.com/guid-goes-here", + "blob:filesystem:ws:b/.", + "blob:filesystem:ftp://a/b", + "blob:blob:file://localhost/foo/bar", + }; + + for (const char* test_input : kTestCases) { + SCOPED_TRACE(testing::Message() << "Test input: " << test_input); + + // Verify that `origin` is opaque not just because `test_input` results is + // an invalid URL (because of a typo in the scheme name, or because of a + // technicality like having no host in a noaccess-std-with-host: scheme). + EXPECT_TRUE(this->IsValidUrl(test_input)); + + this->TestUniqueOpaqueOrigin(test_input); + } +} + +TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromInvalidUrls) { + // TODO(lukasza): Consider moving those to GURL/KURL tests that verify what + // inputs are parsed as an invalid URL. + + const char* kTestCases[] = { + // Invalid file: URLs. + "file://example.com:443/etc/passwd", // No port expected. + + // Invalid HTTP URLs. + "http", + "http:", + "http:/", + "http://", + "http://:", + "http://:1", + "http::///invalid.example.com/", + "http://example.com:65536/", // Port out of range. + "http://example.com:-1/", // Port out of range. + "http://example.com:18446744073709551616/", // Port = 2^64. + "http://example.com:18446744073709551616999/", // Lots of port digits. + + // Invalid filesystem URLs. + "filesystem:http://example.com/", // Missing /type/. + "filesystem:local:baz./type/", + "filesystem:local://hostname/type/", + "filesystem:unknown-scheme://hostname/type/", + "filesystem:filesystem:http://example.org:88/foo/bar", + + // Invalid IP addresses + "http://[]/", + "http://[2001:0db8:0000:0000:0000:0000:0000:0000:0001]/", // 9 groups. + + // Unknown scheme without a colon character (":") gives an invalid URL. + "unknown-scheme", + + // Standard schemes require a hostname (and result in an opaque origin if + // the hostname is missing). + "local-std-with-host:", + "noaccess-std-with-host:", + }; + + for (const char* test_input : kTestCases) { + SCOPED_TRACE(testing::Message() << "Test input: " << test_input); + + // All testcases here are expected to represent invalid URLs. + // an invalid URL (because of a type in scheme name, or because of a + // technicality like having no host in a noaccess-std-with-host: scheme). + EXPECT_FALSE(this->IsValidUrl(test_input)); + + // Invalid URLs should always result in an opaque origin. + this->TestUniqueOpaqueOrigin(test_input); + } +} + +TYPED_TEST_P(AbstractOriginTest, TupleOrigins) { + struct TestCase { + const char* input; + SchemeHostPort expected_tuple; + } kTestCases[] = { + // file: URLs + {"file:///etc/passwd", {"file", "", 0}}, + {"file://example.com/etc/passwd", {"file", "example.com", 0}}, + {"file:///", {"file", "", 0}}, + +#ifdef WIN32 + // TODO(https://crbug.com/1214098): Consider unifying URL parsing behavior + // on all platforms (or at least make sure that serialization always + // round-trips - see https://crbug.com/1214098). + {"file://hostname/C:/dir/file.txt", {"file", "", 0}}, +#else + {"file://hostname/C:/dir/file.txt", {"file", "hostname", 0}}, +#endif + + // HTTP URLs + {"http://example.com/", {"http", "example.com", 80}}, + {"http://example.com:80/", {"http", "example.com", 80}}, + {"http://example.com:123/", {"http", "example.com", 123}}, + {"http://example.com:0/", {"http", "example.com", 0}}, + {"http://example.com:65535/", {"http", "example.com", 65535}}, + {"https://example.com/", {"https", "example.com", 443}}, + {"https://example.com:443/", {"https", "example.com", 443}}, + {"https://example.com:123/", {"https", "example.com", 123}}, + {"https://example.com:0/", {"https", "example.com", 0}}, + {"https://example.com:65535/", {"https", "example.com", 65535}}, + {"http://user:pass@example.com/", {"http", "example.com", 80}}, + {"http://example.com:123/?query", {"http", "example.com", 123}}, + {"https://example.com/#1234", {"https", "example.com", 443}}, + {"https://u:p@example.com:123/?query#1234", + {"https", "example.com", 123}}, + {"http://example/", {"http", "example", 80}}, + + // Blob URLs. + {"blob:http://example.com/guid-goes-here", {"http", "example.com", 80}}, + {"blob:http://example.com:123/guid-goes-here", + {"http", "example.com", 123}}, + {"blob:https://example.com/guid-goes-here", + {"https", "example.com", 443}}, + {"blob:http://u:p@example.com/guid-goes-here", + {"http", "example.com", 80}}, + + // Filesystem URLs. + {"filesystem:http://example.com/type/", {"http", "example.com", 80}}, + {"filesystem:http://example.com:123/type/", {"http", "example.com", 123}}, + {"filesystem:https://example.com/type/", {"https", "example.com", 443}}, + {"filesystem:https://example.com:123/type/", + {"https", "example.com", 123}}, + {"filesystem:local-std-with-host:baz./type/", + {"local-std-with-host", "baz.", 0}}, + + // IP Addresses + {"http://192.168.9.1/", {"http", "192.168.9.1", 80}}, + {"http://[2001:db8::1]/", {"http", "[2001:db8::1]", 80}}, + {"http://[2001:0db8:0000:0000:0000:0000:0000:0001]/", + {"http", "[2001:db8::1]", 80}}, + {"http://1/", {"http", "0.0.0.1", 80}}, + {"http://1:1/", {"http", "0.0.0.1", 1}}, + {"http://3232237825/", {"http", "192.168.9.1", 80}}, + + // Punycode + {"http://☃.net/", {"http", "xn--n3h.net", 80}}, + {"blob:http://☃.net/", {"http", "xn--n3h.net", 80}}, + {"local-std-with-host:↑↑↓↓←→←→ba.↑↑↓↓←→←→ba.0.bg", + {"local-std-with-host", "xn--ba-rzuadaibfa.xn--ba-rzuadaibfa.0.bg", 0}}, + + // Registered URLs + {"ftp://example.com/", {"ftp", "example.com", 21}}, + {"ws://example.com/", {"ws", "example.com", 80}}, + {"wss://example.com/", {"wss", "example.com", 443}}, + {"wss://user:pass@example.com/", {"wss", "example.com", 443}}, + }; + + for (const TestCase& test : kTestCases) { + SCOPED_TRACE(testing::Message() << "Test input: " << test.input); + + // Only valid URLs should translate into valid, non-opaque origins. + EXPECT_TRUE(this->IsValidUrl(test.input)); + + auto origin = this->CreateOriginFromString(test.input); + this->VerifyTupleOriginInvariants(origin, test.expected_tuple); + } +} + +TYPED_TEST_P(AbstractOriginTest, CustomSchemes_OpaqueOrigins) { + const char* kTestCases[] = { + // Unknown scheme + "unknown-scheme:foo", + "unknown-scheme://bar", + + // Unknown scheme that is a prefix or suffix of a registered scheme. + "loca:foo", + "ocal:foo", + "local-suffix:foo", + "prefix-local:foo", + + // Custom no-access schemes translate into an opaque origin (just like the + // built-in no-access schemes such as about:blank or data:). + "noaccess-std-with-host:foo", + "noaccess-std-with-host://bar", + "noaccess://host", + "local-noaccess://host", + "local-noaccess-std-with-host://host", + }; + + for (const char* test_input : kTestCases) { + SCOPED_TRACE(testing::Message() << "Test input: " << test_input); + + // Verify that `origin` is opaque not just because `test_input` results is + // an invalid URL (because of a typo in the scheme name, or because of a + // technicality like having no host in a noaccess-std-with-host: scheme). + EXPECT_TRUE(this->IsValidUrl(test_input)); + + this->TestUniqueOpaqueOrigin(test_input); + } +} + +TYPED_TEST_P(AbstractOriginTest, CustomSchemes_TupleOrigins) { + struct TestCase { + const char* input; + SchemeHostPort expected_tuple; + } kTestCases[] = { + // Scheme (registered in SetUp()) that's both local and standard. + // TODO: Is it really appropriate to do network-host canonicalization of + // schemes without ports? + {"local-std-with-host:20", {"local-std-with-host", "0.0.0.20", 0}}, + {"local-std-with-host:20.", {"local-std-with-host", "0.0.0.20", 0}}, + {"local-std-with-host:foo", {"local-std-with-host", "foo", 0}}, + {"local-std-with-host://bar:20", {"local-std-with-host", "bar", 0}}, + {"local-std-with-host:baz.", {"local-std-with-host", "baz.", 0}}, + {"local-std-with-host:baz..", {"local-std-with-host", "baz..", 0}}, + {"local-std-with-host:baz..bar", {"local-std-with-host", "baz..bar", 0}}, + {"local-std-with-host:baz...", {"local-std-with-host", "baz...", 0}}, + + // Scheme (registered in SetUp()) that's local but nonstandard. These + // always have empty hostnames, but are allowed to be url::Origins. + {"local:", {"local", "", 0}}, + {"local:foo", {"local", "", 0}}, + {"local://bar", {"local", "", 0}}, + {"also-local://bar", {"also-local", "", 0}}, + + {"std-with-host://host", {"std-with-host", "host", 0}}, + {"local://host", {"local", "", 0}}, + {"local-std-with-host://host", {"local-std-with-host", "host", 0}}, + }; + + for (const TestCase& test : kTestCases) { + SCOPED_TRACE(testing::Message() << "Test input: " << test.input); + + // Only valid URLs should translate into valid, non-opaque origins. + EXPECT_TRUE(this->IsValidUrl(test.input)); + + auto origin = this->CreateOriginFromString(test.input); + this->VerifyTupleOriginInvariants(origin, test.expected_tuple); + } +} + +REGISTER_TYPED_TEST_SUITE_P(AbstractOriginTest, + NonStandardSchemeWithAndroidWebViewHack, + OpaqueOriginsFromValidUrls, + OpaqueOriginsFromInvalidUrls, + TupleOrigins, + CustomSchemes_OpaqueOrigins, + CustomSchemes_TupleOrigins); + +} // namespace url + +#endif // URL_ORIGIN_ABSTRACT_TESTS_H_
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc index 2f342c5..a9d3a4f 100644 --- a/url/origin_unittest.cc +++ b/url/origin_unittest.cc
@@ -69,13 +69,13 @@ return Origin::Nonce(nonce); } - gurl_base::Optional<gurl_base::UnguessableToken> GetNonce(const Origin& origin) { + absl::optional<gurl_base::UnguessableToken> GetNonce(const Origin& origin) { return origin.GetNonceForSerialization(); } // Wrappers around url::Origin methods to expose it to tests. - gurl_base::Optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization( + absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization( gurl_base::StringPiece precursor_scheme, gurl_base::StringPiece precursor_host, uint16_t precursor_port, @@ -84,16 +84,16 @@ precursor_scheme, precursor_host, precursor_port, nonce); } - gurl_base::Optional<std::string> SerializeWithNonce(const Origin& origin) { + absl::optional<std::string> SerializeWithNonce(const Origin& origin) { return origin.SerializeWithNonce(); } - gurl_base::Optional<std::string> SerializeWithNonceAndInitIfNeeded( + absl::optional<std::string> SerializeWithNonceAndInitIfNeeded( Origin& origin) { return origin.SerializeWithNonceAndInitIfNeeded(); } - gurl_base::Optional<Origin> Deserialize(const std::string& value) { + absl::optional<Origin> Deserialize(const std::string& value) { return Origin::Deserialize(value); } @@ -286,7 +286,7 @@ for (const auto& test : cases) { SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" << test.port); - gurl_base::Optional<url::Origin> origin = + absl::optional<url::Origin> origin = url::Origin::UnsafelyCreateTupleOriginWithoutNormalization( test.scheme, test.host, test.port); ASSERT_TRUE(origin); @@ -299,7 +299,7 @@ ExpectParsedUrlsEqual(GURL(origin->Serialize()), origin->GetURL()); gurl_base::UnguessableToken nonce = gurl_base::UnguessableToken::Create(); - gurl_base::Optional<url::Origin> opaque_origin = + absl::optional<url::Origin> opaque_origin = UnsafelyCreateOpaqueOriginWithoutNormalization( test.scheme, test.host, test.port, CreateNonce(nonce)); ASSERT_TRUE(opaque_origin); @@ -355,7 +355,7 @@ // Opaque origins with unknown precursors are allowed. gurl_base::UnguessableToken token = gurl_base::UnguessableToken::Create(); - gurl_base::Optional<url::Origin> anonymous_opaque = + absl::optional<url::Origin> anonymous_opaque = UnsafelyCreateOpaqueOriginWithoutNormalization("", "", 0, CreateNonce(token)); ASSERT_TRUE(anonymous_opaque) @@ -667,10 +667,10 @@ for (const GURL& url : valid_urls) { SCOPED_TRACE(url.spec()); Origin origin = Origin::Create(url); - gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin); + absl::optional<std::string> serialized = SerializeWithNonce(origin); ASSERT_TRUE(serialized); - gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); ASSERT_TRUE(deserialized.has_value()); EXPECT_TRUE(DoEqualityComparisons(origin, deserialized.value(), true)); @@ -679,11 +679,11 @@ } TEST_F(OriginTest, DeserializeInvalid) { - EXPECT_EQ(gurl_base::nullopt, Deserialize(std::string())); - EXPECT_EQ(gurl_base::nullopt, Deserialize("deadbeef")); - EXPECT_EQ(gurl_base::nullopt, Deserialize("0123456789")); - EXPECT_EQ(gurl_base::nullopt, Deserialize("https://a.com")); - EXPECT_EQ(gurl_base::nullopt, Deserialize("https://192.168.1.1")); + EXPECT_EQ(absl::nullopt, Deserialize(std::string())); + EXPECT_EQ(absl::nullopt, Deserialize("deadbeef")); + EXPECT_EQ(absl::nullopt, Deserialize("0123456789")); + EXPECT_EQ(absl::nullopt, Deserialize("https://a.com")); + EXPECT_EQ(absl::nullopt, Deserialize("https://192.168.1.1")); } TEST_F(OriginTest, SerializeTBDNonce) { @@ -695,8 +695,8 @@ for (const GURL& url : invalid_urls) { SCOPED_TRACE(url.spec()); Origin origin = Origin::Create(url); - gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin); - gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + absl::optional<std::string> serialized = SerializeWithNonce(origin); + absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); ASSERT_TRUE(deserialized.has_value()); // Can't use DoEqualityComparisons here since empty nonces are never == @@ -706,10 +706,10 @@ // Same basic test as above, but without a GURL to create tuple_. Origin opaque; - gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque); + absl::optional<std::string> serialized = SerializeWithNonce(opaque); ASSERT_TRUE(serialized); - gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); ASSERT_TRUE(deserialized.has_value()); // Can't use DoEqualityComparisons here since empty nonces are never == unless @@ -720,9 +720,9 @@ for (const GURL& url : invalid_urls) { SCOPED_TRACE(url.spec()); Origin origin = Origin::Create(url); - gurl_base::Optional<std::string> serialized = + absl::optional<std::string> serialized = SerializeWithNonceAndInitIfNeeded(origin); - gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); ASSERT_TRUE(deserialized.has_value()); // The nonce should have been initialized prior to Serialization(). @@ -734,10 +734,10 @@ Origin opaque; GetNonce(opaque); - gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque); + absl::optional<std::string> serialized = SerializeWithNonce(opaque); ASSERT_TRUE(serialized); - gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); ASSERT_TRUE(deserialized.has_value()); EXPECT_TRUE(DoEqualityComparisons(opaque, deserialized.value(), true));
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc index 21b473d..c337da3 100644 --- a/url/scheme_host_port.cc +++ b/url/scheme_host_port.cc
@@ -14,6 +14,7 @@ #include "polyfills/base/notreached.h" #include "base/numerics/safe_conversions.h" #include "base/strings/string_number_conversions.h" +#include "base/strings/string_piece.h" #include "url/gurl.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -159,8 +160,8 @@ SchemeHostPort::SchemeHostPort(gurl_base::StringPiece scheme, gurl_base::StringPiece host, uint16_t port) - : SchemeHostPort(scheme.as_string(), - host.as_string(), + : SchemeHostPort(std::string(scheme), + std::string(host), port, ConstructPolicy::CHECK_CANONICALIZATION) {}
diff --git a/url/third_party/mozilla/DIR_METADATA b/url/third_party/mozilla/DIR_METADATA deleted file mode 100644 index fb07a25..0000000 --- a/url/third_party/mozilla/DIR_METADATA +++ /dev/null
@@ -1,11 +0,0 @@ -# Metadata information for this directory. -# -# For more information on DIR_METADATA files, see: -# https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/README.md -# -# For the schema of this file, see Metadata message: -# https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/proto/dir_metadata.proto - -monorail { - component: "Internals>Core" -} \ No newline at end of file
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc index 4fd3a8e..e8a1edb 100644 --- a/url/third_party/mozilla/url_parse.cc +++ b/url/third_party/mozilla/url_parse.cc
@@ -48,7 +48,7 @@ namespace { // Returns true if the given character is a valid digit to use in a port. -inline bool IsPortDigit(gurl_base::char16 ch) { +inline bool IsPortDigit(char16_t ch) { return ch >= '0' && ch <= '9'; } @@ -812,13 +812,13 @@ return DoExtractScheme(url, url_len, scheme); } -bool ExtractScheme(const gurl_base::char16* url, int url_len, Component* scheme) { +bool ExtractScheme(const char16_t* url, int url_len, Component* scheme) { return DoExtractScheme(url, url_len, scheme); } // This handles everything that may be an authority terminator, including // backslash. For special backslash handling see DoParseAfterScheme. -bool IsAuthorityTerminator(gurl_base::char16 ch) { +bool IsAuthorityTerminator(char16_t ch) { return IsURLSlash(ch) || ch == '?' || ch == '#'; } @@ -828,7 +828,7 @@ DoExtractFileName(url, path, file_name); } -void ExtractFileName(const gurl_base::char16* url, +void ExtractFileName(const char16_t* url, const Component& path, Component* file_name) { DoExtractFileName(url, path, file_name); @@ -841,7 +841,7 @@ return DoExtractQueryKeyValue(url, query, key, value); } -bool ExtractQueryKeyValue(const gurl_base::char16* url, +bool ExtractQueryKeyValue(const char16_t* url, Component* query, Component* key, Component* value) { @@ -857,7 +857,7 @@ DoParseAuthority(spec, auth, username, password, hostname, port_num); } -void ParseAuthority(const gurl_base::char16* spec, +void ParseAuthority(const char16_t* spec, const Component& auth, Component* username, Component* password, @@ -870,7 +870,7 @@ return DoParsePort(url, port); } -int ParsePort(const gurl_base::char16* url, const Component& port) { +int ParsePort(const char16_t* url, const Component& port) { return DoParsePort(url, port); } @@ -878,7 +878,7 @@ DoParseStandardURL(url, url_len, parsed); } -void ParseStandardURL(const gurl_base::char16* url, int url_len, Parsed* parsed) { +void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseStandardURL(url, url_len, parsed); } @@ -889,7 +889,7 @@ DoParsePathURL(url, url_len, trim_path_end, parsed); } -void ParsePathURL(const gurl_base::char16* url, +void ParsePathURL(const char16_t* url, int url_len, bool trim_path_end, Parsed* parsed) { @@ -900,7 +900,7 @@ DoParseFileSystemURL(url, url_len, parsed); } -void ParseFileSystemURL(const gurl_base::char16* url, int url_len, Parsed* parsed) { +void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseFileSystemURL(url, url_len, parsed); } @@ -908,7 +908,7 @@ DoParseMailtoURL(url, url_len, parsed); } -void ParseMailtoURL(const gurl_base::char16* url, int url_len, Parsed* parsed) { +void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseMailtoURL(url, url_len, parsed); } @@ -920,7 +920,7 @@ ParsePath(spec, path, filepath, query, ref); } -void ParsePathInternal(const gurl_base::char16* spec, +void ParsePathInternal(const char16_t* spec, const Component& path, Component* filepath, Component* query, @@ -935,7 +935,7 @@ DoParseAfterScheme(spec, spec_len, after_scheme, parsed); } -void ParseAfterScheme(const gurl_base::char16* spec, +void ParseAfterScheme(const char16_t* spec, int spec_len, int after_scheme, Parsed* parsed) {
diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h index 54b2af2..1ec0ef8 100644 --- a/url/third_party/mozilla/url_parse.h +++ b/url/third_party/mozilla/url_parse.h
@@ -6,7 +6,6 @@ #define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_ #include "polyfills/base/component_export.h" -#include "base/strings/string16.h" namespace url { @@ -202,7 +201,7 @@ void clear_inner_parsed() { if (inner_parsed_) { delete inner_parsed_; - inner_parsed_ = NULL; + inner_parsed_ = nullptr; } } @@ -230,7 +229,7 @@ COMPONENT_EXPORT(URL) void ParseStandardURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseStandardURL(const gurl_base::char16* url, int url_len, Parsed* parsed); +void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed); // PathURL is for when the scheme is known not to have an authority (host) // section but that aren't file URLs either. The scheme is parsed, and @@ -242,7 +241,7 @@ bool trim_path_end, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParsePathURL(const gurl_base::char16* url, +void ParsePathURL(const char16_t* url, int url_len, bool trim_path_end, Parsed* parsed); @@ -252,19 +251,19 @@ COMPONENT_EXPORT(URL) void ParseFileURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseFileURL(const gurl_base::char16* url, int url_len, Parsed* parsed); +void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed); // Filesystem URLs are structured differently than other URLs. COMPONENT_EXPORT(URL) void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseFileSystemURL(const gurl_base::char16* url, int url_len, Parsed* parsed); +void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed); // MailtoURL is for mailto: urls. They are made up scheme,path,query COMPONENT_EXPORT(URL) void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); COMPONENT_EXPORT(URL) -void ParseMailtoURL(const gurl_base::char16* url, int url_len, Parsed* parsed); +void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed); // Helper functions ----------------------------------------------------------- @@ -291,11 +290,11 @@ COMPONENT_EXPORT(URL) bool ExtractScheme(const char* url, int url_len, Component* scheme); COMPONENT_EXPORT(URL) -bool ExtractScheme(const gurl_base::char16* url, int url_len, Component* scheme); +bool ExtractScheme(const char16_t* url, int url_len, Component* scheme); // Returns true if ch is a character that terminates the authority segment // of a URL. -COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(gurl_base::char16 ch); +COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(char16_t ch); // Does a best effort parse of input |spec|, in range |auth|. If a particular // component is not found, it will be set to invalid. @@ -307,7 +306,7 @@ Component* hostname, Component* port_num); COMPONENT_EXPORT(URL) -void ParseAuthority(const gurl_base::char16* spec, +void ParseAuthority(const char16_t* spec, const Component& auth, Component* username, Component* password, @@ -323,7 +322,7 @@ enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; COMPONENT_EXPORT(URL) int ParsePort(const char* url, const Component& port); COMPONENT_EXPORT(URL) -int ParsePort(const gurl_base::char16* url, const Component& port); +int ParsePort(const char16_t* url, const Component& port); // Extracts the range of the file name in the given url. The path must // already have been computed by the parse function, and the matching URL @@ -340,7 +339,7 @@ const Component& path, Component* file_name); COMPONENT_EXPORT(URL) -void ExtractFileName(const gurl_base::char16* url, +void ExtractFileName(const char16_t* url, const Component& path, Component* file_name); @@ -365,7 +364,7 @@ Component* key, Component* value); COMPONENT_EXPORT(URL) -bool ExtractQueryKeyValue(const gurl_base::char16* url, +bool ExtractQueryKeyValue(const char16_t* url, Component* query, Component* key, Component* value);
diff --git a/url/url_canon.cc b/url/url_canon.cc index 1860234..dce7847 100644 --- a/url/url_canon.cc +++ b/url/url_canon.cc
@@ -10,6 +10,6 @@ template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>; template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) - CanonOutputT<gurl_base::char16>; + CanonOutputT<char16_t>; } // namespace url
diff --git a/url/url_canon.h b/url/url_canon.h index 84b3549..457f58a 100644 --- a/url/url_canon.h +++ b/url/url_canon.h
@@ -8,9 +8,10 @@ #include <stdlib.h> #include <string.h> +#include <string> + #include "polyfills/base/component_export.h" #include "polyfills/base/export_template.h" -#include "base/strings/string16.h" #include "url/third_party/mozilla/url_parse.h" namespace url { @@ -178,18 +179,18 @@ extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL)) CanonOutputT<char>; extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL)) - CanonOutputT<gurl_base::char16>; + CanonOutputT<char16_t>; // Normally, all canonicalization output is in narrow characters. We support // the templates so it can also be used internally if a wide buffer is // required. typedef CanonOutputT<char> CanonOutput; -typedef CanonOutputT<gurl_base::char16> CanonOutputW; +typedef CanonOutputT<char16_t> CanonOutputW; template<int fixed_capacity> class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {}; -template<int fixed_capacity> -class RawCanonOutputW : public RawCanonOutputT<gurl_base::char16, fixed_capacity> {}; +template <int fixed_capacity> +class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {}; // Character set converter ---------------------------------------------------- // @@ -215,7 +216,7 @@ // decimal, (such as "你") with escaping of the ampersand, number // sign, and semicolon (in the previous example it would be // "%26%2320320%3B"). This rule is based on what IE does in this situation. - virtual void ConvertFromUTF16(const gurl_base::char16* input, + virtual void ConvertFromUTF16(const char16_t* input, int input_len, CanonOutput* output) = 0; }; @@ -273,11 +274,11 @@ int* output_len, bool* potentially_dangling_markup); COMPONENT_EXPORT(URL) -const gurl_base::char16* RemoveURLWhitespace(const gurl_base::char16* input, - int input_len, - CanonOutputT<gurl_base::char16>* buffer, - int* output_len, - bool* potentially_dangling_markup); +const char16_t* RemoveURLWhitespace(const char16_t* input, + int input_len, + CanonOutputT<char16_t>* buffer, + int* output_len, + bool* potentially_dangling_markup); // IDN ------------------------------------------------------------------------ @@ -291,7 +292,7 @@ // // On error, returns false. The output in this case is undefined. COMPONENT_EXPORT(URL) -bool IDNToASCII(const gurl_base::char16* src, int src_len, CanonOutputW* output); +bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output); // Piece-by-piece canonicalizers ---------------------------------------------- // @@ -323,7 +324,7 @@ CanonOutput* output, Component* out_scheme); COMPONENT_EXPORT(URL) -bool CanonicalizeScheme(const gurl_base::char16* spec, +bool CanonicalizeScheme(const char16_t* spec, const Component& scheme, CanonOutput* output, Component* out_scheme); @@ -347,9 +348,9 @@ Component* out_username, Component* out_password); COMPONENT_EXPORT(URL) -bool CanonicalizeUserInfo(const gurl_base::char16* username_source, +bool CanonicalizeUserInfo(const char16_t* username_source, const Component& username, - const gurl_base::char16* password_source, + const char16_t* password_source, const Component& password, CanonOutput* output, Component* out_username, @@ -411,7 +412,7 @@ CanonOutput* output, Component* out_host); COMPONENT_EXPORT(URL) -bool CanonicalizeHost(const gurl_base::char16* spec, +bool CanonicalizeHost(const char16_t* spec, const Component& host, CanonOutput* output, Component* out_host); @@ -426,7 +427,7 @@ CanonOutput* output, CanonHostInfo* host_info); COMPONENT_EXPORT(URL) -void CanonicalizeHostVerbose(const gurl_base::char16* spec, +void CanonicalizeHostVerbose(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info); @@ -456,7 +457,7 @@ const Component& host, CanonOutput* output); COMPONENT_EXPORT(URL) -bool CanonicalizeHostSubstring(const gurl_base::char16* spec, +bool CanonicalizeHostSubstring(const char16_t* spec, const Component& host, CanonOutput* output); @@ -476,7 +477,7 @@ CanonOutput* output, CanonHostInfo* host_info); COMPONENT_EXPORT(URL) -void CanonicalizeIPAddress(const gurl_base::char16* spec, +void CanonicalizeIPAddress(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info); @@ -493,7 +494,7 @@ CanonOutput* output, Component* out_port); COMPONENT_EXPORT(URL) -bool CanonicalizePort(const gurl_base::char16* spec, +bool CanonicalizePort(const char16_t* spec, const Component& port, int default_port_for_scheme, CanonOutput* output, @@ -519,11 +520,24 @@ CanonOutput* output, Component* out_path); COMPONENT_EXPORT(URL) -bool CanonicalizePath(const gurl_base::char16* spec, +bool CanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path); +// Like CanonicalizePath(), but does not assume that its operating on the +// entire path. It therefore does not prepend a slash, etc. +COMPONENT_EXPORT(URL) +bool CanonicalizePartialPath(const char* spec, + const Component& path, + CanonOutput* output, + Component* out_path); +COMPONENT_EXPORT(URL) +bool CanonicalizePartialPath(const char16_t* spec, + const Component& path, + CanonOutput* output, + Component* out_path); + // Canonicalizes the input as a file path. This is like CanonicalizePath except // that it also handles Windows drive specs. For example, the path can begin // with "c|\" and it will get properly canonicalized to "C:/". @@ -536,7 +550,7 @@ CanonOutput* output, Component* out_path); COMPONENT_EXPORT(URL) -bool FileCanonicalizePath(const gurl_base::char16* spec, +bool FileCanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path); @@ -560,7 +574,7 @@ CanonOutput* output, Component* out_query); COMPONENT_EXPORT(URL) -void CanonicalizeQuery(const gurl_base::char16* spec, +void CanonicalizeQuery(const char16_t* spec, const Component& query, CharsetConverter* converter, CanonOutput* output, @@ -578,7 +592,7 @@ CanonOutput* output, Component* out_path); COMPONENT_EXPORT(URL) -void CanonicalizeRef(const gurl_base::char16* spec, +void CanonicalizeRef(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path); @@ -603,7 +617,7 @@ CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeStandardURL(const gurl_base::char16* spec, +bool CanonicalizeStandardURL(const char16_t* spec, int spec_len, const Parsed& parsed, SchemeType scheme_type, @@ -620,7 +634,7 @@ CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeFileURL(const gurl_base::char16* spec, +bool CanonicalizeFileURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* query_converter, @@ -636,7 +650,7 @@ CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeFileSystemURL(const gurl_base::char16* spec, +bool CanonicalizeFileSystemURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* query_converter, @@ -652,12 +666,25 @@ CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizePathURL(const gurl_base::char16* spec, +bool CanonicalizePathURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, Parsed* new_parsed); +// Use to canonicalize just the path component of a "path" URL; e.g. the +// path of a javascript URL. +COMPONENT_EXPORT(URL) +void CanonicalizePathURLPath(const char* source, + const Component& component, + CanonOutput* output, + Component* new_component); +COMPONENT_EXPORT(URL) +void CanonicalizePathURLPath(const char16_t* source, + const Component& component, + CanonOutput* output, + Component* new_component); + // Use for mailto URLs. This "canonicalizes" the URL into a path and query // component. It does not attempt to merge "to" fields. It uses UTF-8 for // the query encoding if there is a query. This is because a mailto URL is @@ -670,7 +697,7 @@ CanonOutput* output, Parsed* new_parsed); COMPONENT_EXPORT(URL) -bool CanonicalizeMailtoURL(const gurl_base::char16* spec, +bool CanonicalizeMailtoURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, @@ -869,7 +896,7 @@ COMPONENT_EXPORT(URL) bool ReplaceStandardURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, SchemeType scheme_type, CharsetConverter* query_converter, CanonOutput* output, @@ -887,7 +914,7 @@ COMPONENT_EXPORT(URL) bool ReplaceFileSystemURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed); @@ -904,7 +931,7 @@ COMPONENT_EXPORT(URL) bool ReplaceFileURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed); @@ -920,7 +947,7 @@ COMPONENT_EXPORT(URL) bool ReplacePathURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed); @@ -935,7 +962,7 @@ COMPONENT_EXPORT(URL) bool ReplaceMailtoURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed); @@ -963,7 +990,7 @@ COMPONENT_EXPORT(URL) bool IsRelativeURL(const char* base, const Parsed& base_parsed, - const gurl_base::char16* fragment, + const char16_t* fragment, int fragment_len, bool is_base_hierarchical, bool* is_relative, @@ -1000,7 +1027,7 @@ bool ResolveRelativeURL(const char* base_url, const Parsed& base_parsed, bool base_is_file, - const gurl_base::char16* relative_url, + const char16_t* relative_url, const Component& relative_component, CharsetConverter* query_converter, CanonOutput* output,
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc index 8482c35..b45cea0 100644 --- a/url/url_canon_etc.cc +++ b/url/url_canon_etc.cc
@@ -299,11 +299,6 @@ // Now iterate through all the characters, converting to UTF-8 and validating. int end = ref.end(); for (int i = ref.begin; i < end; i++) { - if (spec[i] == 0) { - // IE just strips NULLs, so we do too. - continue; - } - UCHAR current_char = static_cast<UCHAR>(spec[i]); if (current_char < 0x80) { if (kShouldEscapeCharInFragment[current_char]) @@ -329,16 +324,16 @@ potentially_dangling_markup); } -const gurl_base::char16* RemoveURLWhitespace(const gurl_base::char16* input, - int input_len, - CanonOutputT<gurl_base::char16>* buffer, - int* output_len, - bool* potentially_dangling_markup) { +const char16_t* RemoveURLWhitespace(const char16_t* input, + int input_len, + CanonOutputT<char16_t>* buffer, + int* output_len, + bool* potentially_dangling_markup) { return DoRemoveURLWhitespace(input, input_len, buffer, output_len, potentially_dangling_markup); } -char CanonicalSchemeChar(gurl_base::char16 ch) { +char CanonicalSchemeChar(char16_t ch) { if (ch >= 0x80) return 0; // Non-ASCII is not supported by schemes. return kSchemeCanonical[ch]; @@ -351,11 +346,11 @@ return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme); } -bool CanonicalizeScheme(const gurl_base::char16* spec, +bool CanonicalizeScheme(const char16_t* spec, const Component& scheme, CanonOutput* output, Component* out_scheme) { - return DoScheme<gurl_base::char16, gurl_base::char16>(spec, scheme, output, out_scheme); + return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme); } bool CanonicalizeUserInfo(const char* username_source, @@ -370,16 +365,16 @@ output, out_username, out_password); } -bool CanonicalizeUserInfo(const gurl_base::char16* username_source, +bool CanonicalizeUserInfo(const char16_t* username_source, const Component& username, - const gurl_base::char16* password_source, + const char16_t* password_source, const Component& password, CanonOutput* output, Component* out_username, Component* out_password) { - return DoUserInfo<gurl_base::char16, gurl_base::char16>( - username_source, username, password_source, password, - output, out_username, out_password); + return DoUserInfo<char16_t, char16_t>(username_source, username, + password_source, password, output, + out_username, out_password); } bool CanonicalizePort(const char* spec, @@ -392,13 +387,13 @@ output, out_port); } -bool CanonicalizePort(const gurl_base::char16* spec, +bool CanonicalizePort(const char16_t* spec, const Component& port, int default_port_for_scheme, CanonOutput* output, Component* out_port) { - return DoPort<gurl_base::char16, gurl_base::char16>(spec, port, default_port_for_scheme, - output, out_port); + return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output, + out_port); } void CanonicalizeRef(const char* spec, @@ -408,11 +403,11 @@ DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref); } -void CanonicalizeRef(const gurl_base::char16* spec, +void CanonicalizeRef(const char16_t* spec, const Component& ref, CanonOutput* output, Component* out_ref) { - DoCanonicalizeRef<gurl_base::char16, gurl_base::char16>(spec, ref, output, out_ref); + DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref); } } // namespace url
diff --git a/url/url_canon_filesystemurl.cc b/url/url_canon_filesystemurl.cc index 9a642cd..b36198a 100644 --- a/url/url_canon_filesystemurl.cc +++ b/url/url_canon_filesystemurl.cc
@@ -94,14 +94,14 @@ new_parsed); } -bool CanonicalizeFileSystemURL(const gurl_base::char16* spec, +bool CanonicalizeFileSystemURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* charset_converter, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeFileSystemURL<gurl_base::char16, gurl_base::char16>( - spec, URLComponentSource<gurl_base::char16>(spec), parsed, charset_converter, + return DoCanonicalizeFileSystemURL<char16_t, char16_t>( + spec, URLComponentSource<char16_t>(spec), parsed, charset_converter, output, new_parsed); } @@ -120,7 +120,7 @@ bool ReplaceFileSystemURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* charset_converter, CanonOutput* output, Parsed* new_parsed) {
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc index 4622c6e..8f6c2f8 100644 --- a/url/url_canon_fileurl.cc +++ b/url/url_canon_fileurl.cc
@@ -25,29 +25,47 @@ int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) { // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, - // (with backslashes instead of slashes as well). - int num_slashes = CountConsecutiveSlashes(spec, begin, end); - int after_slashes = begin + num_slashes; + // /./c:/foo, (with backslashes instead of slashes as well). The code + // first guesses the beginning of the drive letter, then verifies that the + // path up to that point can be canonicalised as "/". If it can, then the + // found drive letter is indeed a drive letter, otherwise the path has no + // drive letter in it. + if (begin > end) // Nothing to search in. + return begin; // Found no letter, so didn't consum any characters. - if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end)) - return begin; // Haven't consumed any characters + // If there is something that looks like a drive letter in the spec between + // being and end, store its position in drive_letter_pos. + int drive_letter_pos = + DoesContainWindowsDriveSpecUntil(spec, begin, end, end); + if (drive_letter_pos < begin) + return begin; // Found no letter, so didn't consum any characters. - // A drive spec is the start of a path, so we need to add a slash for the - // authority terminator (typically the third slash). - output->push_back('/'); + // Check if the path up to the drive letter candidate can be canonicalized as + // "/". + Component sub_path = MakeRange(begin, drive_letter_pos); + Component output_path; + const int initial_length = output->length(); + bool success = CanonicalizePath(spec, sub_path, output, &output_path); + if (!success || output_path.len != 1 || + output->at(output_path.begin) != '/') { + // Undo writing the canonicalized path. + output->set_length(initial_length); + return begin; // Found no letter, so didn't consum any characters. + } - // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid - // and that it is followed by a colon/pipe. + // By now, "/" has been written to the output and a valid drive letter is + // confirmed at position drive_letter_pos, followed by a valid drive letter + // separator (a colon or a pipe). - // Normalize Windows drive letters to uppercase - if (gurl_base::IsAsciiLower(spec[after_slashes])) - output->push_back(static_cast<char>(spec[after_slashes] - 'a' + 'A')); + // Normalize Windows drive letters to uppercase. + if (gurl_base::IsAsciiLower(spec[drive_letter_pos])) + output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A')); else - output->push_back(static_cast<char>(spec[after_slashes])); + output->push_back(static_cast<char>(spec[drive_letter_pos])); // Normalize the character following it to a colon rather than pipe. output->push_back(':'); - return after_slashes + 2; + return drive_letter_pos + 2; } #endif // WIN32 @@ -133,15 +151,15 @@ output, new_parsed); } -bool CanonicalizeFileURL(const gurl_base::char16* spec, +bool CanonicalizeFileURL(const char16_t* spec, int spec_len, const Parsed& parsed, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeFileURL<gurl_base::char16, gurl_base::char16>( - URLComponentSource<gurl_base::char16>(spec), parsed, query_converter, - output, new_parsed); + return DoCanonicalizeFileURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, query_converter, output, + new_parsed); } bool FileCanonicalizePath(const char* spec, @@ -152,12 +170,12 @@ output, out_path); } -bool FileCanonicalizePath(const gurl_base::char16* spec, +bool FileCanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path) { - return DoFileCanonicalizePath<gurl_base::char16, gurl_base::char16>(spec, path, - output, out_path); + return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output, + out_path); } bool ReplaceFileURL(const char* base, @@ -175,7 +193,7 @@ bool ReplaceFileURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed) {
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc index 28a7c38..b278b15 100644 --- a/url/url_canon_host.cc +++ b/url/url_canon_host.cc
@@ -3,8 +3,10 @@ // found in the LICENSE file. #include "polyfills/base/check.h" +#include "polyfills/base/metrics/histogram_macros.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" +#include "url/url_canon_ip.h" namespace url { @@ -80,7 +82,7 @@ const int kTempHostBufferLen = 1024; typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer; -typedef RawCanonOutputT<gurl_base::char16, kTempHostBufferLen> StackBufferW; +typedef RawCanonOutputT<char16_t, kTempHostBufferLen> StackBufferW; // Scans a host name and fills in the output flags according to what we find. // |has_non_ascii| will be true if there are any non-7-bit characters, and @@ -174,7 +176,7 @@ } // Canonicalizes a host that requires IDN conversion. Returns true on success -bool DoIDNHost(const gurl_base::char16* src, int src_len, CanonOutput* output) { +bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) { int original_output_len = output->length(); // So we can rewind below. // We need to escape URL before doing IDN conversion, since punicode strings @@ -239,6 +241,7 @@ // input or the unescaped version written to |*output| if necessary. const char* utf8_source; int utf8_source_len; + bool are_all_escaped_valid = true; if (has_escaped) { // Unescape before converting to UTF-16 for IDN. We write this into the // output because it most likely does not require IDNization, and we can @@ -247,14 +250,16 @@ // unescaped input requires IDN. if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) { // Error with some escape sequence. We'll call the current output - // complete. DoSimpleHost will have written some "reasonable" output. - return false; + // complete. DoSimpleHost will have written some "reasonable" output + // for the invalid escapes, but the output could be non-ASCII and + // needs to go through re-encoding below. + are_all_escaped_valid = false; } // Unescaping may have left us with ASCII input, in which case the // unescaped version we wrote to output is complete. if (!has_non_ascii) { - return true; + return are_all_escaped_valid; } // Save the pointer into the data was just converted (it may be appended to @@ -286,14 +291,18 @@ // This will call DoSimpleHost which will do normal ASCII canonicalization // and also check for IP addresses in the outpt. - return DoIDNHost(utf16.data(), utf16.length(), output); + return DoIDNHost(utf16.data(), utf16.length(), output) && + are_all_escaped_valid; } // UTF-16 convert host to its ASCII version. The set up is already ready for // the backend, so we just pass through. The has_escaped flag should be set if // the input string requires unescaping. -bool DoComplexHost(const gurl_base::char16* host, int host_len, - bool has_non_ascii, bool has_escaped, CanonOutput* output) { +bool DoComplexHost(const char16_t* host, + int host_len, + bool has_non_ascii, + bool has_escaped, + CanonOutput* output) { if (has_escaped) { // Yikes, we have escaped characters with wide input. The escaped // characters should be interpreted as UTF-8. To solve this problem, @@ -370,6 +379,16 @@ if (host_info->IsIPAddress()) { output->set_length(output_begin); output->Append(canon_ip.data(), canon_ip.length()); + } else if (host_info->family == CanonHostInfo::NEUTRAL) { + // Only need to call CheckHostnameSafety() for valid hosts that aren't IP + // addresses and aren't broken. + HostSafetyStatus host_safety_status = CheckHostnameSafety(spec, host); + // Don't record kOK. Ratio of OK to not-OK statuses is not meaningful at + // this layer, and hostnames are canonicalized a lot. + if (host_safety_status != HostSafetyStatus::kOk) { + UMA_HISTOGRAM_ENUMERATION("Net.Url.HostSafetyStatus", + host_safety_status); + } } } else { // Canonicalization failed. Set BROKEN to notify the caller. @@ -391,12 +410,12 @@ return (host_info.family != CanonHostInfo::BROKEN); } -bool CanonicalizeHost(const gurl_base::char16* spec, +bool CanonicalizeHost(const char16_t* spec, const Component& host, CanonOutput* output, Component* out_host) { CanonHostInfo host_info; - DoHost<gurl_base::char16, gurl_base::char16>(spec, host, output, &host_info); + DoHost<char16_t, char16_t>(spec, host, output, &host_info); *out_host = host_info.out_host; return (host_info.family != CanonHostInfo::BROKEN); } @@ -408,11 +427,11 @@ DoHost<char, unsigned char>(spec, host, output, host_info); } -void CanonicalizeHostVerbose(const gurl_base::char16* spec, +void CanonicalizeHostVerbose(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info) { - DoHost<gurl_base::char16, gurl_base::char16>(spec, host, output, host_info); + DoHost<char16_t, char16_t>(spec, host, output, host_info); } bool CanonicalizeHostSubstring(const char* spec, @@ -421,10 +440,10 @@ return DoHostSubstring<char, unsigned char>(spec, host, output); } -bool CanonicalizeHostSubstring(const gurl_base::char16* spec, +bool CanonicalizeHostSubstring(const char16_t* spec, const Component& host, CanonOutput* output) { - return DoHostSubstring<gurl_base::char16, gurl_base::char16>(spec, host, output); + return DoHostSubstring<char16_t, char16_t>(spec, host, output); } } // namespace url
diff --git a/url/url_canon_icu.cc b/url/url_canon_icu.cc index 93c9247..b4f8f81 100644 --- a/url/url_canon_icu.cc +++ b/url/url_canon_icu.cc
@@ -9,7 +9,6 @@ #include <string.h> #include "polyfills/base/check.h" -#include "base/i18n/uchar.h" #include <unicode/ucnv.h> #include <unicode/ucnv_cb.h> #include <unicode/utypes.h> @@ -81,7 +80,7 @@ ICUCharsetConverter::~ICUCharsetConverter() = default; -void ICUCharsetConverter::ConvertFromUTF16(const gurl_base::char16* input, +void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input, int input_len, CanonOutput* output) { // Install our error handler. It will be called for character that can not @@ -95,9 +94,8 @@ do { UErrorCode err = U_ZERO_ERROR; char* dest = &output->data()[begin_offset]; - int required_capacity = - ucnv_fromUChars(converter_, dest, dest_capacity, - gurl_base::i18n::ToUCharPtr(input), input_len, &err); + int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity, + input, input_len, &err); if (err != U_BUFFER_OVERFLOW_ERROR) { output->set_length(begin_offset + required_capacity); return;
diff --git a/url/url_canon_icu.h b/url/url_canon_icu.h index 33fc863..34bb99e 100644 --- a/url/url_canon_icu.h +++ b/url/url_canon_icu.h
@@ -26,7 +26,7 @@ ~ICUCharsetConverter() override; - void ConvertFromUTF16(const gurl_base::char16* input, + void ConvertFromUTF16(const char16_t* input, int input_len, CanonOutput* output) override;
diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc index 3f3025b..7cd5cae 100644 --- a/url/url_canon_icu_unittest.cc +++ b/url/url_canon_icu_unittest.cc
@@ -67,7 +67,7 @@ std::string str; StdStringCanonOutput output(&str); - gurl_base::string16 input_str( + std::u16string input_str( test_utils::TruncateWStringToUTF16(icu_cases[i].input)); int input_len = static_cast<int>(input_str.length()); converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); @@ -84,7 +84,7 @@ ICUCharsetConverter converter(conv.converter()); for (int i = static_size - 2; i <= static_size + 2; i++) { // Make a string with the appropriate length. - gurl_base::string16 input; + std::u16string input; for (int ch = 0; ch < i; ch++) input.push_back('a'); @@ -138,7 +138,7 @@ } if (query_cases[i].input16) { - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(query_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len);
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc index 961c3b0..ab56e7b 100644 --- a/url/url_canon_internal.cc +++ b/url/url_canon_internal.cc
@@ -85,7 +85,7 @@ // may get resized while we're overriding a subsequent component. Instead, the // caller should use the beginning of the |utf8_buffer| as the string pointer // for all components once all overrides have been prepared. -bool PrepareUTF16OverrideComponent(const gurl_base::char16* override_source, +bool PrepareUTF16OverrideComponent(const char16_t* override_source, const Component& override_component, CanonOutput* utf8_buffer, Component* dest_component) { @@ -233,7 +233,7 @@ 0, // 0xE0 - 0xFF }; -const gurl_base::char16 kUnicodeReplacementCharacter = 0xfffd; +const char16_t kUnicodeReplacementCharacter = 0xfffd; void AppendStringOfType(const char* source, int length, SharedCharTypes type, @@ -241,11 +241,11 @@ DoAppendStringOfType<char, unsigned char>(source, length, type, output); } -void AppendStringOfType(const gurl_base::char16* source, int length, +void AppendStringOfType(const char16_t* source, + int length, SharedCharTypes type, CanonOutput* output) { - DoAppendStringOfType<gurl_base::char16, gurl_base::char16>( - source, length, type, output); + DoAppendStringOfType<char16_t, char16_t>(source, length, type, output); } bool ReadUTFChar(const char* str, int* begin, int length, @@ -261,7 +261,9 @@ return true; } -bool ReadUTFChar(const gurl_base::char16* str, int* begin, int length, +bool ReadUTFChar(const char16_t* str, + int* begin, + int length, unsigned* code_point_out) { // This depends on ints and int32s being the same thing. If they're not, it // will fail to compile. @@ -279,13 +281,15 @@ DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output); } -void AppendInvalidNarrowString(const gurl_base::char16* spec, int begin, int end, +void AppendInvalidNarrowString(const char16_t* spec, + int begin, + int end, CanonOutput* output) { - DoAppendInvalidNarrowString<gurl_base::char16, gurl_base::char16>( - spec, begin, end, output); + DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output); } -bool ConvertUTF16ToUTF8(const gurl_base::char16* input, int input_len, +bool ConvertUTF16ToUTF8(const char16_t* input, + int input_len, CanonOutput* output) { bool success = true; for (int i = 0; i < input_len; i++) { @@ -296,8 +300,9 @@ return success; } -bool ConvertUTF8ToUTF16(const char* input, int input_len, - CanonOutputT<gurl_base::char16>* output) { +bool ConvertUTF8ToUTF16(const char* input, + int input_len, + CanonOutputT<char16_t>* output) { bool success = true; for (int i = 0; i < input_len; i++) { unsigned code_point; @@ -339,14 +344,14 @@ } bool SetupUTF16OverrideComponents(const char* base, - const Replacements<gurl_base::char16>& repl, + const Replacements<char16_t>& repl, CanonOutput* utf8_buffer, URLComponentSource<char>* source, Parsed* parsed) { bool success = true; // Get the source and parsed structures of the things we are replacing. - const URLComponentSource<gurl_base::char16>& repl_source = repl.sources(); + const URLComponentSource<char16_t>& repl_source = repl.sources(); const Parsed& repl_parsed = repl.components(); success &= PrepareUTF16OverrideComponent( @@ -408,7 +413,7 @@ return 0; } -int _itow_s(int value, gurl_base::char16* buffer, size_t size_in_chars, int radix) { +int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) { if (radix != 10) return EINVAL; @@ -422,7 +427,7 @@ } for (int i = 0; i < written; ++i) { - buffer[i] = static_cast<gurl_base::char16>(temp[i]); + buffer[i] = char16_t{temp[i]}; } buffer[written] = '\0'; return 0;
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h index 4a282b1..11e0f7a 100644 --- a/url/url_canon_internal.h +++ b/url/url_canon_internal.h
@@ -79,7 +79,8 @@ void AppendStringOfType(const char* source, int length, SharedCharTypes type, CanonOutput* output); -void AppendStringOfType(const gurl_base::char16* source, int length, +void AppendStringOfType(const char16_t* source, + int length, SharedCharTypes type, CanonOutput* output); @@ -123,7 +124,7 @@ // required for relative URL resolving to test for scheme equality. // // Returns 0 if the input character is not a valid scheme character. -char CanonicalSchemeChar(gurl_base::char16 ch); +char CanonicalSchemeChar(char16_t ch); // Write a single character, escaped, to the output. This always escapes: it // does no checking that thee character requires escaping. @@ -138,7 +139,7 @@ } // The character we'll substitute for undecodable or invalid characters. -extern const gurl_base::char16 kUnicodeReplacementCharacter; +extern const char16_t kUnicodeReplacementCharacter; // UTF-8 functions ------------------------------------------------------------ @@ -229,19 +230,19 @@ // can be incremented in a loop and will be ready for the next character. // (for a single-16-bit-word character, it will not be changed). COMPONENT_EXPORT(URL) -bool ReadUTFChar(const gurl_base::char16* str, +bool ReadUTFChar(const char16_t* str, int* begin, int length, unsigned* code_point_out); // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method. inline void AppendUTF16Value(unsigned code_point, - CanonOutputT<gurl_base::char16>* output) { + CanonOutputT<char16_t>* output) { if (code_point > 0xffff) { - output->push_back(static_cast<gurl_base::char16>((code_point >> 10) + 0xd7c0)); - output->push_back(static_cast<gurl_base::char16>((code_point & 0x3ff) | 0xdc00)); + output->push_back(static_cast<char16_t>((code_point >> 10) + 0xd7c0)); + output->push_back(static_cast<char16_t>((code_point & 0x3ff) | 0xdc00)); } else { - output->push_back(static_cast<gurl_base::char16>(code_point)); + output->push_back(static_cast<char16_t>(code_point)); } } @@ -266,8 +267,10 @@ // // Assumes that ch[begin] is within range in the array, but does not assume // that any following characters are. -inline bool AppendUTF8EscapedChar(const gurl_base::char16* str, int* begin, - int length, CanonOutput* output) { +inline bool AppendUTF8EscapedChar(const char16_t* str, + int* begin, + int length, + CanonOutput* output) { // UTF-16 input. ReadUTFChar will handle invalid characters for us and give // us the kUnicodeReplacementCharacter, so we don't have to do special // checking after failure, just pass through the failure to the caller. @@ -301,7 +304,7 @@ inline bool Is8BitChar(char c) { return true; // this case is specialized to avoid a warning } -inline bool Is8BitChar(gurl_base::char16 c) { +inline bool Is8BitChar(char16_t c) { return c <= 255; } @@ -337,7 +340,9 @@ // the escaping rules are not guaranteed! void AppendInvalidNarrowString(const char* spec, int begin, int end, CanonOutput* output); -void AppendInvalidNarrowString(const gurl_base::char16* spec, int begin, int end, +void AppendInvalidNarrowString(const char16_t* spec, + int begin, + int end, CanonOutput* output); // Misc canonicalization helpers ---------------------------------------------- @@ -351,17 +356,17 @@ // return false in the failure case, and the caller should not continue as // normal. COMPONENT_EXPORT(URL) -bool ConvertUTF16ToUTF8(const gurl_base::char16* input, +bool ConvertUTF16ToUTF8(const char16_t* input, int input_len, CanonOutput* output); COMPONENT_EXPORT(URL) bool ConvertUTF8ToUTF16(const char* input, int input_len, - CanonOutputT<gurl_base::char16>* output); + CanonOutputT<char16_t>* output); // Converts from UTF-16 to 8-bit using the character set converter. If the // converter is NULL, this will use UTF-8. -void ConvertUTF16ToQueryEncoding(const gurl_base::char16* input, +void ConvertUTF16ToQueryEncoding(const char16_t* input, const Component& query, CharsetConverter* converter, CanonOutput* output); @@ -397,21 +402,21 @@ // although we will have still done the override with "invalid characters" in // place of errors. bool SetupUTF16OverrideComponents(const char* base, - const Replacements<gurl_base::char16>& repl, + const Replacements<char16_t>& repl, CanonOutput* utf8_buffer, URLComponentSource<char>* source, Parsed* parsed); // Implemented in url_canon_path.cc, these are required by the relative URL // resolver as well, so we declare them here. -bool CanonicalizePartialPath(const char* spec, - const Component& path, - int path_begin_in_output, - CanonOutput* output); -bool CanonicalizePartialPath(const gurl_base::char16* spec, - const Component& path, - int path_begin_in_output, - CanonOutput* output); +bool CanonicalizePartialPathInternal(const char* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output); +bool CanonicalizePartialPathInternal(const char16_t* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output); #ifndef WIN32 @@ -419,7 +424,7 @@ COMPONENT_EXPORT(URL) int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix); COMPONENT_EXPORT(URL) -int _itow_s(int value, gurl_base::char16* buffer, size_t size_in_chars, int radix); +int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix); // Secure template overloads for these functions template<size_t N> @@ -427,8 +432,8 @@ return _itoa_s(value, buffer, N, radix); } -template<size_t N> -inline int _itow_s(int value, gurl_base::char16 (&buffer)[N], int radix) { +template <size_t N> +inline int _itow_s(int value, char16_t (&buffer)[N], int radix) { return _itow_s(value, buffer, N, radix); }
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc index c214217..8234b4e 100644 --- a/url/url_canon_ip.cc +++ b/url/url_canon_ip.cc
@@ -9,6 +9,8 @@ #include <limits> #include "polyfills/base/check.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" #include "url/url_canon_internal.h" namespace url { @@ -593,6 +595,105 @@ return true; } +// Method to check if something looks like a number. Used instead of +// IPv4ComponentToNumber() so that it counts things that look like bad base-8 +// (e.g. 09). +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +template <typename CHAR> +bool LooksLikeANumber(const CHAR* spec, const Component& component) { + // Empty components don't look like numbers. + if (!component.is_nonempty()) + return false; + + SharedCharTypes base = CHAR_DEC; + size_t start = component.begin; + if (component.len >= 2 && spec[start] == '0' && + (spec[start + 1] == 'x' || spec[start + 1] == 'X')) { + base = CHAR_HEX; + start += 2; + } + for (int i = start; i < component.end(); i++) { + if (!IsCharOfType(spec[i], base)) + return false; + } + return true; +} + +// Calculates the "HostSafetyStatus" of the provided hostname. +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +template <typename CHAR> +HostSafetyStatus DoCheckHostnameSafety(const CHAR* spec, + const Component& host) { + if (!host.is_nonempty()) + return HostSafetyStatus::kOk; + + // Find the last two components. + + // Number of identified components. Stops after second component. Does not + // include the empty terminal component, if the host ends with a dot. + int existing_components = 0; + // Parsed component values. Populated last component first. + Component components[2]; + + // Index of the character after the end of the current component. + int cur_component_end = host.end(); + + // Ignore terminal dot, if there is one. + if (spec[cur_component_end - 1] == '.') { + cur_component_end--; + // Nothing else to do if the host is just a dot. + if (host.begin == cur_component_end) + return HostSafetyStatus::kOk; + } + + for (int i = cur_component_end; /* nothing */; i--) { + GURL_DCHECK_GE(i, host.begin); + + // If `i` is not the first character of the component, continue. + if (i != host.begin && spec[i - 1] != '.') + continue; + + // Otherwise, i is the index of the the start of a component. + components[existing_components] = Component(i, cur_component_end - i); + existing_components++; + + // Finished parsing last component. + if (i == host.begin) + break; + + // If there's anything left to parse after the 2th component, nothing more + // to do. + if (existing_components == 2) + break; + + // The next component ends before the dot at spec[i]. `i` will be + // decremented when restarting the loop, so no need to modify it. + cur_component_end = i - 1; + } + + // If the last value doesn't look like a number, no need to do more work, as + // IPv6 and hostnames with non-numeric final components are all considered OK. + if (!LooksLikeANumber(spec, components[0])) + return HostSafetyStatus::kOk; + + url::RawCanonOutputT<char> ignored_output; + CanonHostInfo host_info; + CanonicalizeIPAddress(spec, host, &ignored_output, &host_info); + // Ignore valid IPv4 addresses, and hostnames considered invalid by the IPv4 + // and IPv6 parsers. The IPv6 check doesn't provide a whole lot, but does mean + // things like "].6" will correctly be considered already invalid, so will + // return kOk. + if (host_info.family != CanonHostInfo::NEUTRAL) + return HostSafetyStatus::kOk; + + if (LooksLikeANumber(spec, components[1])) + return HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric; + + return HostSafetyStatus::kTopLevelDomainIsNumeric; +} + } // namespace void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) { @@ -650,11 +751,10 @@ return DoFindIPv4Components<char, unsigned char>(spec, host, components); } -bool FindIPv4Components(const gurl_base::char16* spec, +bool FindIPv4Components(const char16_t* spec, const Component& host, Component components[4]) { - return DoFindIPv4Components<gurl_base::char16, gurl_base::char16>( - spec, host, components); + return DoFindIPv4Components<char16_t, char16_t>(spec, host, components); } void CanonicalizeIPAddress(const char* spec, @@ -669,15 +769,15 @@ return; } -void CanonicalizeIPAddress(const gurl_base::char16* spec, +void CanonicalizeIPAddress(const char16_t* spec, const Component& host, CanonOutput* output, CanonHostInfo* host_info) { - if (DoCanonicalizeIPv4Address<gurl_base::char16, gurl_base::char16>( - spec, host, output, host_info)) + if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output, + host_info)) return; - if (DoCanonicalizeIPv6Address<gurl_base::char16, gurl_base::char16>( - spec, host, output, host_info)) + if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output, + host_info)) return; } @@ -688,12 +788,12 @@ return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components); } -CanonHostInfo::Family IPv4AddressToNumber(const gurl_base::char16* spec, +CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[4], int* num_ipv4_components) { - return DoIPv4AddressToNumber<gurl_base::char16>( - spec, host, address, num_ipv4_components); + return DoIPv4AddressToNumber<char16_t>(spec, host, address, + num_ipv4_components); } bool IPv6AddressToNumber(const char* spec, @@ -702,10 +802,19 @@ return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address); } -bool IPv6AddressToNumber(const gurl_base::char16* spec, +bool IPv6AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[16]) { - return DoIPv6AddressToNumber<gurl_base::char16, gurl_base::char16>(spec, host, address); + return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address); +} + +HostSafetyStatus CheckHostnameSafety(const char* spec, const Component& host) { + return DoCheckHostnameSafety(spec, host); +} + +HostSafetyStatus CheckHostnameSafety(const char16_t* spec, + const Component& host) { + return DoCheckHostnameSafety(spec, host); } } // namespace url
diff --git a/url/url_canon_ip.h b/url/url_canon_ip.h index 5d93f28..8980dbb 100644 --- a/url/url_canon_ip.h +++ b/url/url_canon_ip.h
@@ -6,7 +6,7 @@ #define URL_URL_CANON_IP_H_ #include "polyfills/base/component_export.h" -#include "base/strings/string16.h" +#include "base/strings/string_piece_forward.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -43,7 +43,7 @@ const Component& host, Component components[4]); COMPONENT_EXPORT(URL) -bool FindIPv4Components(const gurl_base::char16* spec, +bool FindIPv4Components(const char16_t* spec, const Component& host, Component components[4]); @@ -64,7 +64,7 @@ unsigned char address[4], int* num_ipv4_components); COMPONENT_EXPORT(URL) -CanonHostInfo::Family IPv4AddressToNumber(const gurl_base::char16* spec, +CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[4], int* num_ipv4_components); @@ -79,10 +79,52 @@ const Component& host, unsigned char address[16]); COMPONENT_EXPORT(URL) -bool IPv6AddressToNumber(const gurl_base::char16* spec, +bool IPv6AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[16]); +// Temporary enum for collecting histograms at the DNS and URL level about +// hostname validity, for potentially updating the URL spec. +// +// This is used in histograms, so old values should not be reused, and new +// values should be added at the bottom. +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +enum class HostSafetyStatus { + // Any canonical hostname that doesn't fit into any other class. IPv4 + // hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are + // broken are also considered OK. + kOk = 0, + + // The top level domain looks numeric. This is basically means it either + // parses as a number per the URL spec, or is entirely numeric ("09" doesn't + // currently parse as a number, since the leading "0" indicates an octal + // value). + kTopLevelDomainIsNumeric = 1, + + // Both the top level domain and the next level domain look like a number, + // using the above definition. This is the case that is actually concerning - + // for these domains, the eTLD+1 is purely numeric, which means putting it as + // the hostname of a URL will potentially result in an IPv4 hostname. This is + // logically a subset of kTopLevelDomainIsNumeric, but when both apply, this + // label will be returned instead. + kTwoHighestLevelDomainsAreNumeric = 2, + + kMaxValue = kTwoHighestLevelDomainsAreNumeric, +}; + +// Calculates the HostSafetyStatus of a hostname. Hostname should have been +// canonicalized. This function is only intended to be temporary, to inform +// decisions around tightening up what the URL parser considers valid hostnames. +// +// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. +COMPONENT_EXPORT(URL) +HostSafetyStatus CheckHostnameSafety(const char* hostname, + const Component& host); +COMPONENT_EXPORT(URL) +HostSafetyStatus CheckHostnameSafety(const char16_t* hostname, + const Component& host); + } // namespace url #endif // URL_URL_CANON_IP_H_
diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc index f09faa7..f4fe2b4 100644 --- a/url/url_canon_mailtourl.cc +++ b/url/url_canon_mailtourl.cc
@@ -90,13 +90,13 @@ URLComponentSource<char>(spec), parsed, output, new_parsed); } -bool CanonicalizeMailtoURL(const gurl_base::char16* spec, +bool CanonicalizeMailtoURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeMailtoURL<gurl_base::char16, gurl_base::char16>( - URLComponentSource<gurl_base::char16>(spec), parsed, output, new_parsed); + return DoCanonicalizeMailtoURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, output, new_parsed); } bool ReplaceMailtoURL(const char* base, @@ -113,7 +113,7 @@ bool ReplaceMailtoURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed) { RawCanonOutput<1024> utf8;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc index da32bd8..e043043 100644 --- a/url/url_canon_path.cc +++ b/url/url_canon_path.cc
@@ -20,7 +20,8 @@ // table below more clear when neither ESCAPE or UNESCAPE is set. PASS = 0, - // This character requires special handling in DoPartialPath. Doing this test + // This character requires special handling in DoPartialPathInternal. Doing + // this test // first allows us to filter out the common cases of regular characters that // can be directly copied. SPECIAL = 1, @@ -235,10 +236,8 @@ } } -// Appends the given path to the output. It assumes that if the input path -// starts with a slash, it should be copied to the output. If no path has -// already been appended to the output (the case when not resolving -// relative URLs), the path should begin with a slash. +// Canonicalizes and appends the given path to the output. It assumes that if +// the input path starts with a slash, it should be copied to the output. // // If there are already path components (this mode is used when appending // relative paths for resolving), it assumes that the output already has @@ -248,11 +247,11 @@ // We do not collapse multiple slashes in a row to a single slash. It seems // no web browsers do this, and we don't want incompatibilities, even though // it would be correct for most systems. -template<typename CHAR, typename UCHAR> -bool DoPartialPath(const CHAR* spec, - const Component& path, - int path_begin_in_output, - CanonOutput* output) { +template <typename CHAR, typename UCHAR> +bool DoPartialPathInternal(const CHAR* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output) { int end = path.end(); // We use this variable to minimize the amount of work done when unescaping -- @@ -279,16 +278,12 @@ // Needs special handling of some sort. int dotlen; if ((dotlen = IsDot(spec, i, end)) > 0) { - // See if this dot was preceded by a slash in the output. We - // assume that when canonicalizing paths, they will always - // start with a slash and not a dot, so we don't have to - // bounds check the output. + // See if this dot was preceded by a slash in the output. // // Note that we check this in the case of dots so we don't have to // special case slashes. Since slashes are much more common than // dots, this actually increases performance measurably (though // slightly). - GURL_DCHECK(output->length() > path_begin_in_output); if (output->length() > path_begin_in_output && output->at(output->length() - 1) == '/') { // Slash followed by a dot, check to see if this is means relative @@ -382,6 +377,21 @@ return success; } +// Perform the same logic as in DoPartialPathInternal(), but updates the +// publicly exposed CanonOutput structure similar to DoPath(). Returns +// true if successful. +template <typename CHAR, typename UCHAR> +bool DoPartialPath(const CHAR* spec, + const Component& path, + CanonOutput* output, + Component* out_path) { + out_path->begin = output->length(); + bool success = + DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output); + out_path->len = output->length() - out_path->begin; + return success; +} + template<typename CHAR, typename UCHAR> bool DoPath(const CHAR* spec, const Component& path, @@ -397,7 +407,8 @@ if (!IsURLSlash(spec[path.begin])) output->push_back('/'); - success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output); + success = + DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output); } else { // No input, canonical path is a slash. output->push_back('/'); @@ -415,28 +426,41 @@ return DoPath<char, unsigned char>(spec, path, output, out_path); } -bool CanonicalizePath(const gurl_base::char16* spec, +bool CanonicalizePath(const char16_t* spec, const Component& path, CanonOutput* output, Component* out_path) { - return DoPath<gurl_base::char16, gurl_base::char16>(spec, path, output, out_path); + return DoPath<char16_t, char16_t>(spec, path, output, out_path); } bool CanonicalizePartialPath(const char* spec, const Component& path, - int path_begin_in_output, - CanonOutput* output) { - return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output, - output); + CanonOutput* output, + Component* out_path) { + return DoPartialPath<char, unsigned char>(spec, path, output, out_path); } -bool CanonicalizePartialPath(const gurl_base::char16* spec, +bool CanonicalizePartialPath(const char16_t* spec, const Component& path, - int path_begin_in_output, - CanonOutput* output) { - return DoPartialPath<gurl_base::char16, gurl_base::char16>(spec, path, - path_begin_in_output, - output); + CanonOutput* output, + Component* out_path) { + return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path); +} + +bool CanonicalizePartialPathInternal(const char* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output) { + return DoPartialPathInternal<char, unsigned char>( + spec, path, path_begin_in_output, output); +} + +bool CanonicalizePartialPathInternal(const char16_t* spec, + const Component& path, + int path_begin_in_output, + CanonOutput* output) { + return DoPartialPathInternal<char16_t, char16_t>( + spec, path, path_begin_in_output, output); } } // namespace url
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc index 0330b06..134e132 100644 --- a/url/url_canon_pathurl.cc +++ b/url/url_canon_pathurl.cc
@@ -62,8 +62,8 @@ new_parsed->password.reset(); new_parsed->host.reset(); new_parsed->port.reset(); - // We allow path URLs to have the path, query and fragment components, but we - // will canonicalize each of the via the weaker path URL rules. + + // Canonicalize path and query via the weaker path URL rules. // // Note: parsing the path part should never cause a failure, see // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state @@ -71,8 +71,8 @@ output, &new_parsed->path); DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?', output, &new_parsed->query); - DoCanonicalizePathComponent<CHAR, UCHAR>(source.ref, parsed.ref, '#', output, - &new_parsed->ref); + + CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); return success; } @@ -88,13 +88,29 @@ URLComponentSource<char>(spec), parsed, output, new_parsed); } -bool CanonicalizePathURL(const gurl_base::char16* spec, +bool CanonicalizePathURL(const char16_t* spec, int spec_len, const Parsed& parsed, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizePathURL<gurl_base::char16, gurl_base::char16>( - URLComponentSource<gurl_base::char16>(spec), parsed, output, new_parsed); + return DoCanonicalizePathURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, output, new_parsed); +} + +void CanonicalizePathURLPath(const char* source, + const Component& component, + CanonOutput* output, + Component* new_component) { + DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0', + output, new_component); +} + +void CanonicalizePathURLPath(const char16_t* source, + const Component& component, + CanonOutput* output, + Component* new_component) { + DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0', + output, new_component); } bool ReplacePathURL(const char* base, @@ -111,7 +127,7 @@ bool ReplacePathURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CanonOutput* output, Parsed* new_parsed) { RawCanonOutput<1024> utf8;
diff --git a/url/url_canon_query.cc b/url/url_canon_query.cc index 99b8ed8..b3a1118 100644 --- a/url/url_canon_query.cc +++ b/url/url_canon_query.cc
@@ -82,7 +82,7 @@ // Runs the converter with the given UTF-16 input. We don't have to do // anything, but this overridden function allows us to use the same code // for both UTF-8 and UTF-16 input. -void RunConverter(const gurl_base::char16* spec, +void RunConverter(const char16_t* spec, const Component& query, CharsetConverter* converter, CanonOutput* output) { @@ -144,21 +144,20 @@ output, out_query); } -void CanonicalizeQuery(const gurl_base::char16* spec, +void CanonicalizeQuery(const char16_t* spec, const Component& query, CharsetConverter* converter, CanonOutput* output, Component* out_query) { - DoCanonicalizeQuery<gurl_base::char16, gurl_base::char16>(spec, query, converter, - output, out_query); + DoCanonicalizeQuery<char16_t, char16_t>(spec, query, converter, output, + out_query); } -void ConvertUTF16ToQueryEncoding(const gurl_base::char16* input, +void ConvertUTF16ToQueryEncoding(const char16_t* input, const Component& query, CharsetConverter* converter, CanonOutput* output) { - DoConvertToQueryEncoding<gurl_base::char16, gurl_base::char16>(input, query, - converter, output); + DoConvertToQueryEncoding<char16_t, char16_t>(input, query, converter, output); } } // namespace url
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc index e148128..f047d7f 100644 --- a/url/url_canon_relative.cc +++ b/url/url_canon_relative.cc
@@ -358,8 +358,8 @@ int path_begin = output->length(); CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(), output); - success &= CanonicalizePartialPath(relative_url, path, path_begin, - output); + success &= CanonicalizePartialPathInternal(relative_url, path, path_begin, + output); out_parsed->path = MakeRange(path_begin, output->length()); // Copy the rest of the stuff after the path from the relative path. @@ -581,14 +581,14 @@ bool IsRelativeURL(const char* base, const Parsed& base_parsed, - const gurl_base::char16* fragment, + const char16_t* fragment, int fragment_len, bool is_base_hierarchical, bool* is_relative, Component* relative_component) { - return DoIsRelativeURL<gurl_base::char16>( - base, base_parsed, fragment, fragment_len, is_base_hierarchical, - is_relative, relative_component); + return DoIsRelativeURL<char16_t>(base, base_parsed, fragment, fragment_len, + is_base_hierarchical, is_relative, + relative_component); } bool ResolveRelativeURL(const char* base_url, @@ -607,14 +607,14 @@ bool ResolveRelativeURL(const char* base_url, const Parsed& base_parsed, bool base_is_file, - const gurl_base::char16* relative_url, + const char16_t* relative_url, const Component& relative_component, CharsetConverter* query_converter, CanonOutput* output, Parsed* out_parsed) { - return DoResolveRelativeURL<gurl_base::char16>( - base_url, base_parsed, base_is_file, relative_url, - relative_component, query_converter, output, out_parsed); + return DoResolveRelativeURL<char16_t>(base_url, base_parsed, base_is_file, + relative_url, relative_component, + query_converter, output, out_parsed); } } // namespace url
diff --git a/url/url_canon_stdstring.h b/url/url_canon_stdstring.h index 9b7943a..6d23abf 100644 --- a/url/url_canon_stdstring.h +++ b/url/url_canon_stdstring.h
@@ -59,11 +59,11 @@ // references to std::strings. // Note: Extra const char* overloads are necessary to break ambiguities that // would otherwise exist for char literals. -template <typename STR> -class StringPieceReplacements : public Replacements<typename STR::value_type> { +template <typename CharT> +class StringPieceReplacements : public Replacements<CharT> { private: - using CharT = typename STR::value_type; - using StringPieceT = gurl_base::BasicStringPiece<STR>; + using StringT = std::basic_string<CharT>; + using StringPieceT = gurl_base::BasicStringPiece<CharT>; using ParentT = Replacements<CharT>; using SetterFun = void (ParentT::*)(const CharT*, const Component&); @@ -74,35 +74,35 @@ public: void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); } void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); } - void SetSchemeStr(const STR&&) = delete; + void SetSchemeStr(const StringT&&) = delete; void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); } void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); } - void SetUsernameStr(const STR&&) = delete; + void SetUsernameStr(const StringT&&) = delete; void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); } void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); } - void SetPasswordStr(const STR&&) = delete; + void SetPasswordStr(const StringT&&) = delete; void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); } void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); } - void SetHostStr(const STR&&) = delete; + void SetHostStr(const StringT&&) = delete; void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); } void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); } - void SetPortStr(const STR&&) = delete; + void SetPortStr(const StringT&&) = delete; void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); } void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); } - void SetPathStr(const STR&&) = delete; + void SetPathStr(const StringT&&) = delete; void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); } void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); } - void SetQueryStr(const STR&&) = delete; + void SetQueryStr(const StringT&&) = delete; void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); } void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); } - void SetRefStr(const STR&&) = delete; + void SetRefStr(const StringT&&) = delete; }; } // namespace url
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc index 005877a..c7e7454 100644 --- a/url/url_canon_stdurl.cc +++ b/url/url_canon_stdurl.cc
@@ -150,16 +150,16 @@ output, new_parsed); } -bool CanonicalizeStandardURL(const gurl_base::char16* spec, +bool CanonicalizeStandardURL(const char16_t* spec, int spec_len, const Parsed& parsed, SchemeType scheme_type, CharsetConverter* query_converter, CanonOutput* output, Parsed* new_parsed) { - return DoCanonicalizeStandardURL<gurl_base::char16, gurl_base::char16>( - URLComponentSource<gurl_base::char16>(spec), parsed, scheme_type, - query_converter, output, new_parsed); + return DoCanonicalizeStandardURL<char16_t, char16_t>( + URLComponentSource<char16_t>(spec), parsed, scheme_type, query_converter, + output, new_parsed); } // It might be nice in the future to optimize this so unchanged components don't @@ -189,7 +189,7 @@ // regular code path can be used. bool ReplaceStandardURL(const char* base, const Parsed& base_parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, SchemeType scheme_type, CharsetConverter* query_converter, CanonOutput* output,
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index e2469ca..a59c745 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc
@@ -12,6 +12,7 @@ #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" +#include "url/url_canon_ip.h" #include "url/url_canon_stdstring.h" #include "url/url_test_utils.h" @@ -186,7 +187,7 @@ out_str.clear(); StdStringCanonOutput output(&out_str); - gurl_base::string16 input_str( + std::u16string input_str( test_utils::TruncateWStringToUTF16(utf_cases[i].input16)); int input_len = static_cast<int>(input_str.length()); bool success = true; @@ -205,7 +206,7 @@ // UTF-16 -> UTF-8 std::string input8_str(utf_cases[i].input8); - gurl_base::string16 input16_str( + std::u16string input16_str( test_utils::TruncateWStringToUTF16(utf_cases[i].input16)); EXPECT_EQ(input8_str, gurl_base::UTF16ToUTF8(input16_str)); @@ -258,7 +259,7 @@ out_str.clear(); StdStringCanonOutput output2(&out_str); - gurl_base::string16 wide_input(gurl_base::UTF8ToUTF16(scheme_cases[i].input)); + std::u16string wide_input(gurl_base::UTF8ToUTF16(scheme_cases[i].input)); in_comp.len = static_cast<int>(wide_input.length()); success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2, &out_comp); @@ -529,7 +530,7 @@ // Wide version. if (host_cases[i].input16) { - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(host_cases[i].input16)); int host_len = static_cast<int>(input16.length()); Component in_comp(0, host_len); @@ -580,7 +581,7 @@ // Wide version. if (host_cases[i].input16) { - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(host_cases[i].input16)); int host_len = static_cast<int>(input16.length()); Component in_comp(0, host_len); @@ -703,7 +704,7 @@ } // 16-bit version. - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(cases[i].input16)); component = Component(0, static_cast<int>(input16.length())); @@ -856,7 +857,7 @@ } // 16-bit version. - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(cases[i].input16)); component = Component(0, static_cast<int>(input16.length())); @@ -988,7 +989,7 @@ // Now try the wide version out_str.clear(); StdStringCanonOutput output2(&out_str); - gurl_base::string16 wide_input(gurl_base::UTF8ToUTF16(user_info_cases[i].input)); + std::u16string wide_input(gurl_base::UTF8ToUTF16(user_info_cases[i].input)); success = CanonicalizeUserInfo(wide_input.c_str(), parsed.username, wide_input.c_str(), @@ -1051,7 +1052,7 @@ // Now try the wide version out_str.clear(); StdStringCanonOutput output2(&out_str); - gurl_base::string16 wide_input(gurl_base::UTF8ToUTF16(port_cases[i].input)); + std::u16string wide_input(gurl_base::UTF8ToUTF16(port_cases[i].input)); success = CanonicalizePort(wide_input.c_str(), in_comp, port_cases[i].default_port, @@ -1066,105 +1067,117 @@ } } -TEST(URLCanonTest, Path) { - DualComponentCase path_cases[] = { - // ----- path collapsing tests ----- - {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, - {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, - {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, - {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, - // double dots followed by a slash or the end of the string count - {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, - {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, - // don't count double dots when they aren't followed by a slash - {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, - // some in the middle - {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), - true}, - {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", - Component(0, 2), true}, - // we should not be able to go above the root - {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, - {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), - true}, - // escaped dots should be unescaped and treated the same as dots - {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, - {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, - {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", - "/..bar", Component(0, 6), true}, - // Multiple slashes in a row should be preserved and treated like empty - // directory names. - {"////../..", L"////../..", "//", Component(0, 2), true}, +DualComponentCase kCommonPathCases[] = { + // ----- path collapsing tests ----- + {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, + {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, + {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, + {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, + // double dots followed by a slash or the end of the string count + {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, + {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, + // don't count double dots when they aren't followed by a slash + {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, + // some in the middle + {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true}, + {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", + Component(0, 2), true}, + // we should not be able to go above the root + {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, + {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true}, + // escaped dots should be unescaped and treated the same as dots + {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, + {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, + {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", + "/..bar", Component(0, 6), true}, + // Multiple slashes in a row should be preserved and treated like empty + // directory names. + {"////../..", L"////../..", "//", Component(0, 2), true}, - // ----- escaping tests ----- - {"/foo", L"/foo", "/foo", Component(0, 4), true}, - // Valid escape sequence - {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, - // Invalid escape sequence we should pass through unchanged. - {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, - {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, - // Invalid escape sequence: bad characters should be treated the same as - // the sourrounding text, not as escaped (in this case, UTF-8). - {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, - {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), - true}, - {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", - Component(0, 22), true}, - // Regular characters that are escaped should be unescaped - {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, - // Funny characters that are unescaped should be escaped - {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true}, - {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, - // Invalid characters that are escaped should cause a failure. - {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, - // Some characters should be passed through unchanged regardless of esc. - {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), - true}, - // Characters that are properly escaped should not have the case changed - // of hex letters. - {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), - true}, - // Funny characters that are unescaped should be escaped - {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, - // Backslashes should get converted to forward slashes - {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, - // Hashes found in paths (possibly only when the caller explicitly sets - // the path on an already-parsed URL) should be escaped. - {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, - // %7f should be allowed and %3D should not be unescaped (these were wrong - // in a previous version). - {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", - "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, - // @ should be passed through unchanged (escaped or unescaped). - {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, - // Nested escape sequences should result in escaping the leading '%' if - // unescaping would result in a new escape sequence. - {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, - {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, - {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, - // Make sure truncated "nested" escapes don't result in reading off the - // string end. - {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, - // Don't unescape the leading '%' if unescaping doesn't result in a valid - // new escape sequence. - {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, - {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, - // Don't erroneously downcast a UTF-16 charater in a way that makes it - // look like part of an escape sequence. - {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, + // ----- escaping tests ----- + {"/foo", L"/foo", "/foo", Component(0, 4), true}, + // Valid escape sequence + {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, + // Invalid escape sequence we should pass through unchanged. + {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, + {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, + // Invalid escape sequence: bad characters should be treated the same as + // the surrounding text, not as escaped (in this case, UTF-8). + {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, + {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true}, + {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22), + true}, + // Regular characters that are escaped should be unescaped + {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, + // Funny characters that are unescaped should be escaped + {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true}, + {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, + // Invalid characters that are escaped should cause a failure. + {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, + // Some characters should be passed through unchanged regardless of esc. + {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), + true}, + // Characters that are properly escaped should not have the case changed + // of hex letters. + {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), + true}, + // Funny characters that are unescaped should be escaped + {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, + // Backslashes should get converted to forward slashes + {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, + // Hashes found in paths (possibly only when the caller explicitly sets + // the path on an already-parsed URL) should be escaped. + {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, + // %7f should be allowed and %3D should not be unescaped (these were wrong + // in a previous version). + {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", + "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, + // @ should be passed through unchanged (escaped or unescaped). + {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, + // Nested escape sequences should result in escaping the leading '%' if + // unescaping would result in a new escape sequence. + {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, + {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, + {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, + // Make sure truncated "nested" escapes don't result in reading off the + // string end. + {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, + // Don't unescape the leading '%' if unescaping doesn't result in a valid + // new escape sequence. + {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, + {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, + // Don't erroneously downcast a UTF-16 character in a way that makes it + // look like part of an escape sequence. + {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, - // ----- encoding tests ----- - // Basic conversions - {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", - L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", - Component(0, 37), true}, - // Invalid unicode characters should fail. We only do validation on - // UTF-16 input, so this doesn't happen on 8-bit. - {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true}, - {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, - }; + // ----- encoding tests ----- + // Basic conversions + {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", + L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + Component(0, 37), true}, + // Invalid unicode characters should fail. We only do validation on + // UTF-16 input, so this doesn't happen on 8-bit. + {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true}, + {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, +}; - for (size_t i = 0; i < gurl_base::size(path_cases); i++) { +typedef bool (*CanonFunc8Bit)(const char*, + const Component&, + CanonOutput*, + Component*); +typedef bool (*CanonFunc16Bit)(const char16_t*, + const Component&, + CanonOutput*, + Component*); + +void DoPathTest(const DualComponentCase* path_cases, + size_t num_cases, + CanonFunc8Bit canon_func_8, + CanonFunc16Bit canon_func_16) { + for (size_t i = 0; i < num_cases; i++) { + testing::Message scope_message; + scope_message << path_cases[i].input8 << "," << path_cases[i].input16; + SCOPED_TRACE(scope_message); if (path_cases[i].input8) { int len = static_cast<int>(strlen(path_cases[i].input8)); Component in_comp(0, len); @@ -1172,7 +1185,7 @@ std::string out_str; StdStringCanonOutput output(&out_str); bool success = - CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp); + canon_func_8(path_cases[i].input8, in_comp, &output, &out_comp); output.Complete(); EXPECT_EQ(path_cases[i].expected_success, success); @@ -1182,7 +1195,7 @@ } if (path_cases[i].input16) { - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(path_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); @@ -1191,7 +1204,7 @@ StdStringCanonOutput output(&out_str); bool success = - CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp); + canon_func_16(input16.c_str(), in_comp, &output, &out_comp); output.Complete(); EXPECT_EQ(path_cases[i].expected_success, success); @@ -1200,6 +1213,11 @@ EXPECT_EQ(path_cases[i].expected, out_str); } } +} + +TEST(URLCanonTest, Path) { + DoPathTest(kCommonPathCases, gurl_base::size(kCommonPathCases), CanonicalizePath, + CanonicalizePath); // Manual test: embedded NULLs should be escaped and the URL should be marked // as invalid. @@ -1215,6 +1233,18 @@ EXPECT_EQ("/ab%00c", out_str); } +TEST(URLCanonTest, PartialPath) { + DualComponentCase partial_path_cases[] = { + {".html", L".html", ".html", Component(0, 5), true}, + {"", L"", "", Component(0, 0), true}, + }; + + DoPathTest(kCommonPathCases, gurl_base::size(kCommonPathCases), + CanonicalizePartialPath, CanonicalizePartialPath); + DoPathTest(partial_path_cases, gurl_base::size(partial_path_cases), + CanonicalizePartialPath, CanonicalizePartialPath); +} + TEST(URLCanonTest, Query) { struct QueryCase { const char* input8; @@ -1258,7 +1288,7 @@ } if (query_cases[i].input16) { - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(query_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); @@ -1332,7 +1362,7 @@ // 16-bit input if (ref_cases[i].input16) { - gurl_base::string16 input16( + std::u16string input16( test_utils::TruncateWStringToUTF16(ref_cases[i].input16)); int len = static_cast<int>(input16.length()); Component in_comp(0, len); @@ -1360,8 +1390,8 @@ output.Complete(); EXPECT_EQ(1, out_comp.begin); - EXPECT_EQ(3, out_comp.len); - EXPECT_EQ("#abz", out_str); + EXPECT_EQ(6, out_comp.len); + EXPECT_EQ("#ab%00z", out_str); } TEST(URLCanonTest, CanonicalizeStandardURL) { @@ -1821,20 +1851,28 @@ // Busted refs shouldn't make the whole thing fail. {"file:///C:/asdf#\xc2", "file:///C:/asdf#%EF%BF%BD", true, Component(), Component(7, 8)}, + {"file:///./s:", "file:///S:", true, Component(), Component(7, 3)}, #else // Unix-style paths - {"file:///home/me", "file:///home/me", true, Component(), Component(7, 8)}, + {"file:///home/me", "file:///home/me", true, Component(), + Component(7, 8)}, // Windowsy ones should get still treated as Unix-style. - {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(), Component(7, 16)}, - {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, Component(), Component(7, 19)}, + {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(), + Component(7, 16)}, + {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, + Component(), Component(7, 19)}, + {"file:///./s:", "file:///s:", true, Component(), Component(7, 3)}, // file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html) - {"//", "file:///", true, Component(), Component(7, 1)}, - {"///", "file:///", true, Component(), Component(7, 1)}, - {"///test", "file:///test", true, Component(), Component(7, 5)}, - {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)}, - {"file://localhost", "file://localhost/", true, Component(7, 9), Component(16, 1)}, - {"file://localhost/", "file://localhost/", true, Component(7, 9), Component(16, 1)}, - {"file://localhost/test", "file://localhost/test", true, Component(7, 9), Component(16, 5)}, + {"//", "file:///", true, Component(), Component(7, 1)}, + {"///", "file:///", true, Component(), Component(7, 1)}, + {"///test", "file:///test", true, Component(), Component(7, 5)}, + {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)}, + {"file://localhost", "file://localhost/", true, Component(7, 9), + Component(16, 1)}, + {"file://localhost/", "file://localhost/", true, Component(7, 9), + Component(16, 1)}, + {"file://localhost/test", "file://localhost/test", true, Component(7, 9), + Component(16, 5)}, #endif // _WIN32 }; @@ -1952,6 +1990,53 @@ } } +TEST(URLCanonTest, CanonicalizePathURLPath) { + struct PathCase { + std::string input; + std::wstring input16; + std::string expected; + } path_cases[] = { + {"Foo", L"Foo", "Foo"}, + {"\":This /is interesting;?#", L"\":This /is interesting;?#", + "\":This /is interesting;?#"}, + {"\uFFFF", L"\uFFFF", "%EF%BF%BD"}, + }; + + for (size_t i = 0; i < gurl_base::size(path_cases); i++) { + // 8-bit string input + std::string out_str; + StdStringCanonOutput output(&out_str); + url::Component out_component; + CanonicalizePathURLPath(path_cases[i].input.data(), + Component(0, path_cases[i].input.size()), &output, + &out_component); + output.Complete(); + + EXPECT_EQ(path_cases[i].expected, out_str); + + EXPECT_EQ(0, out_component.begin); + EXPECT_EQ(path_cases[i].expected.size(), + static_cast<size_t>(out_component.len)); + + // 16-bit string input + std::string out_str16; + StdStringCanonOutput output16(&out_str16); + url::Component out_component16; + std::u16string input16( + test_utils::TruncateWStringToUTF16(path_cases[i].input16.data())); + CanonicalizePathURLPath(input16.c_str(), + Component(0, path_cases[i].input16.size()), + &output16, &out_component16); + output16.Complete(); + + EXPECT_EQ(path_cases[i].expected, out_str16); + + EXPECT_EQ(0, out_component16.begin); + EXPECT_EQ(path_cases[i].expected.size(), + static_cast<size_t>(out_component16.len)); + } +} + TEST(URLCanonTest, CanonicalizeMailtoURL) { struct URLCase { const char* input; @@ -2086,17 +2171,17 @@ // We fill the buffer with 0xff to ensure that it's getting properly // null-terminated. We also allocate one byte more than what we tell // _itoa_s about, and ensure that the extra byte is untouched. - gurl_base::char16 buf[6]; + char16_t buf[6]; const char fill_mem = 0xff; - const gurl_base::char16 fill_char = 0xffff; + const char16_t fill_char = 0xffff; memset(buf, fill_mem, sizeof(buf)); EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("12"), gurl_base::string16(buf)); + EXPECT_EQ(u"12", std::u16string(buf)); EXPECT_EQ(fill_char, buf[3]); // Test the edge cases - exactly the buffer size and one over EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("1234"), gurl_base::string16(buf)); + EXPECT_EQ(u"1234", std::u16string(buf)); EXPECT_EQ(fill_char, buf[5]); memset(buf, fill_mem, sizeof(buf)); @@ -2106,13 +2191,12 @@ // Test the template overload (note that this will see the full buffer) memset(buf, fill_mem, sizeof(buf)); EXPECT_EQ(0, _itow_s(12, buf, 10)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("12"), - gurl_base::string16(buf)); + EXPECT_EQ(u"12", std::u16string(buf)); EXPECT_EQ(fill_char, buf[3]); memset(buf, fill_mem, sizeof(buf)); EXPECT_EQ(0, _itow_s(12345, buf, 10)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("12345"), gurl_base::string16(buf)); + EXPECT_EQ(u"12345", std::u16string(buf)); EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10)); } @@ -2343,12 +2427,12 @@ // Override two components, the path with something short, and the query with // something long enough to trigger the bug. - Replacements<gurl_base::char16> repl; - gurl_base::string16 new_query; + Replacements<char16_t> repl; + std::u16string new_query; for (int i = 0; i < 4800; i++) new_query.push_back('a'); - gurl_base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo")); + std::u16string new_path(test_utils::TruncateWStringToUTF16(L"/foo")); repl.SetPath(new_path.c_str(), Component(0, 4)); repl.SetQuery(new_query.c_str(), Component(0, static_cast<int>(new_query.length()))); @@ -2398,41 +2482,41 @@ RawCanonOutputW<1024> output; // Basic ASCII test. - gurl_base::string16 str = gurl_base::UTF8ToUTF16("hello"); + std::u16string str = u"hello"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("hello"), gurl_base::string16(output.data())); + EXPECT_EQ(u"hello", std::u16string(output.data())); output.set_length(0); // Mixed ASCII/non-ASCII. - str = gurl_base::UTF8ToUTF16("hellö"); + str = u"hellö"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--hell-8qa"), gurl_base::string16(output.data())); + EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data())); output.set_length(0); // All non-ASCII. - str = gurl_base::UTF8ToUTF16("你好"); + str = u"你好"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--6qq79v"), gurl_base::string16(output.data())); + EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data())); output.set_length(0); // Characters that need mapping (the resulting Punycode is the encoding for // "1⁄4"). - str = gurl_base::UTF8ToUTF16("¼"); + str = u"¼"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--14-c6t"), gurl_base::string16(output.data())); + EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data())); output.set_length(0); // String to encode already starts with "xn--", and all ASCII. Should not // modify the string. - str = gurl_base::UTF8ToUTF16("xn--hell-8qa"); + str = u"xn--hell-8qa"; EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--hell-8qa"), gurl_base::string16(output.data())); + EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data())); output.set_length(0); // String to encode already starts with "xn--", and mixed ASCII/non-ASCII. // Should fail, due to a special case: if the label starts with "xn--", it // should be parsed as Punycode, which must be all ASCII. - str = gurl_base::UTF8ToUTF16("xn--hellö"); + str = u"xn--hellö"; EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output)); output.set_length(0); @@ -2440,9 +2524,120 @@ // This tests that there is still an error for the character '⁄' (U+2044), // which would be a valid ASCII character, U+0044, if the high byte were // ignored. - str = gurl_base::UTF8ToUTF16("xn--1⁄4"); + str = u"xn--1⁄4"; EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output)); output.set_length(0); } +TEST(URLCanonTest, URLSafetyStatus) { + const struct { + const char* host; + HostSafetyStatus expected_safety_status; + } kTestCases[] = { + // Empty components are ok. + {"", HostSafetyStatus::kOk}, + {".", HostSafetyStatus::kOk}, + {"..", HostSafetyStatus::kOk}, + + // Hostnames with purely non-numeric components are ok. + {"com", HostSafetyStatus::kOk}, + {"a.com", HostSafetyStatus::kOk}, + {"a.b.com", HostSafetyStatus::kOk}, + + // Hostnames with components with letters and numbers are ok. + {"1com", HostSafetyStatus::kOk}, + {"0a.0com", HostSafetyStatus::kOk}, + {"0xa.0xb.0xcom", HostSafetyStatus::kOk}, + {"com1", HostSafetyStatus::kOk}, + {"a1.com1", HostSafetyStatus::kOk}, + {"a1.b1.com1", HostSafetyStatus::kOk}, + + // Hostnames components that are numbers that are before a final + // non-numeric component are ok. + {"1.com", HostSafetyStatus::kOk}, + {"0.1.2com", HostSafetyStatus::kOk}, + + // Invalid hostnames are ok. + {"[", HostSafetyStatus::kOk}, + + // IPv6 hostnames are ok. + {"[::]", HostSafetyStatus::kOk}, + {"[2001:db8::1]", HostSafetyStatus::kOk}, + + // IPv4 hostnames are ok. + {"1.2.3.4", HostSafetyStatus::kOk}, + // IPv4 hostnames with creative representations are ok. + {"01.02.03.04", HostSafetyStatus::kOk}, + {"0x1.0x2.0x3.0x4", HostSafetyStatus::kOk}, + {"1.2", HostSafetyStatus::kOk}, + {"1.2.3", HostSafetyStatus::kOk}, + {"0", HostSafetyStatus::kOk}, + {"0x0", HostSafetyStatus::kOk}, + {"07", HostSafetyStatus::kOk}, + + // Hostnames with a final problematic top level domain. + {"a.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.123", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.123456", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.999999999999999999", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.0x1", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.0xabcdef", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.0XABCDEF", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.07", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a.09", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {".0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"foo.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"1.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"a..0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + {"1..0", HostSafetyStatus::kTopLevelDomainIsNumeric}, + + // Hostnames with problematic two highest level domains. + {"a.1.2", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, + {"a.0x1.0x2f", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, + {"a.06.09", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, + }; + + for (const auto& test_case : kTestCases) { + // Test with ASCII. + SCOPED_TRACE(test_case.host); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(test_case.host, + Component(0, strlen(test_case.host)))); + + // Test with ASCII and terminal dot, which shouldn't affect results for + // anything that doesn't already end in a dot (or anything that only has + // dots). + std::string host_with_dot = test_case.host; + host_with_dot += "."; + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(host_with_dot.c_str(), + Component(0, host_with_dot.size()))); + + // Test with ASCII and characters that are not part of the component. + std::string host_with_bonus_characters = test_case.host; + host_with_bonus_characters = "00" + host_with_bonus_characters + "00"; + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(host_with_bonus_characters.c_str(), + Component(2, strlen(test_case.host)))); + + // Test with UTF-16. + std::u16string utf16 = gurl_base::UTF8ToUTF16(test_case.host); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(utf16.c_str(), Component(0, utf16.size()))); + + // Test with UTF-16 and terminal dot. + std::u16string utf16_with_dot = gurl_base::UTF8ToUTF16(host_with_dot); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(utf16_with_dot.c_str(), + Component(0, utf16_with_dot.size()))); + + // Test with UTF-16 and characters that are not part of the component. + std::u16string utf16_with_bonus_characters = + gurl_base::UTF8ToUTF16(host_with_bonus_characters); + EXPECT_EQ(test_case.expected_safety_status, + CheckHostnameSafety(utf16_with_bonus_characters.c_str(), + Component(2, utf16.size()))); + } +} + } // namespace url
diff --git a/url/url_constants.cc b/url/url_constants.cc index 69399e4..9da6426 100644 --- a/url/url_constants.cc +++ b/url/url_constants.cc
@@ -28,6 +28,7 @@ // See also: https://www.iana.org/assignments/uri-schemes/prov/quic-transport const char kQuicTransportScheme[] = "quic-transport"; const char kTelScheme[] = "tel"; +const char kUrnScheme[] = "urn"; const char kWsScheme[] = "ws"; const char kWssScheme[] = "wss";
diff --git a/url/url_constants.h b/url/url_constants.h index 3c04d68..dcd7c90 100644 --- a/url/url_constants.h +++ b/url/url_constants.h
@@ -32,6 +32,7 @@ COMPONENT_EXPORT(URL) extern const char kMailToScheme[]; COMPONENT_EXPORT(URL) extern const char kQuicTransportScheme[]; COMPONENT_EXPORT(URL) extern const char kTelScheme[]; +COMPONENT_EXPORT(URL) extern const char kUrnScheme[]; COMPONENT_EXPORT(URL) extern const char kWsScheme[]; COMPONENT_EXPORT(URL) extern const char kWssScheme[];
diff --git a/url/url_file.h b/url/url_file.h index cfe047e..6ad79b4 100644 --- a/url/url_file.h +++ b/url/url_file.h
@@ -16,7 +16,7 @@ #ifdef WIN32 // We allow both "c:" and "c|" as drive identifiers. -inline bool IsWindowsDriveSeparator(gurl_base::char16 ch) { +inline bool IsWindowsDriveSeparator(char16_t ch) { return ch == ':' || ch == '|'; } @@ -34,23 +34,44 @@ #ifdef WIN32 +// DoesContainWindowsDriveSpecUntil returns the least number between +// start_offset and max_offset such that the spec has a valid drive +// specification starting at that offset. Otherwise it returns -1. This function +// gracefully handles, by returning -1, start_offset values that are equal to or +// larger than the spec_len, and caps max_offset appropriately to simplify +// callers. max_offset must be at least start_offset. +template <typename CHAR> +inline int DoesContainWindowsDriveSpecUntil(const CHAR* spec, + int start_offset, + int max_offset, + int spec_len) { + GURL_CHECK_LE(start_offset, max_offset); + if (start_offset > spec_len - 2) + return -1; // Not enough room. + if (max_offset > spec_len - 2) + max_offset = spec_len - 2; + for (int offset = start_offset; offset <= max_offset; ++offset) { + if (!gurl_base::IsAsciiAlpha(spec[offset])) + continue; // Doesn't contain a valid drive letter. + if (!IsWindowsDriveSeparator(spec[offset + 1])) + continue; // Isn't followed with a drive separator. + return offset; + } + return -1; +} + // Returns true if the start_offset in the given spec looks like it begins a // drive spec, for example "c:". This function explicitly handles start_offset // values that are equal to or larger than the spec_len to simplify callers. // // If this returns true, the spec is guaranteed to have a valid drive letter -// plus a colon starting at |start_offset|. -template<typename CHAR> -inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset, +// plus a drive letter separator (a colon or a pipe) starting at |start_offset|. +template <typename CHAR> +inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, + int start_offset, int spec_len) { - int remaining_len = spec_len - start_offset; - if (remaining_len < 2) - return false; // Not enough room. - if (!gurl_base::IsAsciiAlpha(spec[start_offset])) - return false; // Doesn't start with a valid drive letter. - if (!IsWindowsDriveSeparator(spec[start_offset + 1])) - return false; // Isn't followed with a drive separator. - return true; + return DoesContainWindowsDriveSpecUntil(spec, start_offset, start_offset, + spec_len) == start_offset; } // Returns true if the start_offset in the given text looks like it begins a
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc index d9256a2..4029d61 100644 --- a/url/url_idna_icu.cc +++ b/url/url_idna_icu.cc
@@ -11,7 +11,6 @@ #include <ostream> #include "polyfills/base/check_op.h" -#include "base/i18n/uchar.h" #include "base/no_destructor.h" #include <unicode/uidna.h> #include <unicode/utypes.h> @@ -22,7 +21,7 @@ namespace { -// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to +// A wrapper to use gurl_base::NoDestructor with ICU's UIDNA, a C pointer to // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). // // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned @@ -83,7 +82,7 @@ // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII // version with StringByteSink. That way, we can avoid C wrappers and additional // string conversion. -bool IDNToASCII(const gurl_base::char16* src, int src_len, CanonOutputW* output) { +bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) { GURL_DCHECK(output->length() == 0); // Output buffer is assumed empty. UIDNA* uidna = GetUIDNA(); @@ -91,17 +90,41 @@ while (true) { UErrorCode err = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; - int output_length = - uidna_nameToASCII(uidna, gurl_base::i18n::ToUCharPtr(src), src_len, - gurl_base::i18n::ToUCharPtr(output->data()), - output->capacity(), &info, &err); + int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(), + output->capacity(), &info, &err); + + // Ignore various errors for web compatibility. The options are specified + // by the WHATWG URL Standard. See + // - https://unicode.org/reports/tr46/ + // - https://url.spec.whatwg.org/#concept-domain-to-ascii + // (we set beStrict to false) + + // Disable the "CheckHyphens" option in UTS #46. See + // - https://crbug.com/804688 + // - https://github.com/whatwg/url/issues/267 + info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; + info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; + info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; + + // Disable the "VerifyDnsLength" option in UTS #46. + info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; + info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; + info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + if (U_SUCCESS(err) && info.errors == 0) { + // Per WHATWG URL, it is a failure if the ToASCII output is empty. + // + // ICU would usually return UIDNA_ERROR_EMPTY_LABEL in this case, but we + // want to continue allowing http://abc..def/ while forbidding http:///. + // + if (output_length == 0) { + return false; + } + output->set_length(output_length); return true; } - // TODO(jungshik): Look at info.errors to handle them case-by-case basis - // if necessary. if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0) return false; // Unknown error, give up.
diff --git a/url/url_parse_file.cc b/url/url_parse_file.cc index c1c878a..ceb75d8 100644 --- a/url/url_parse_file.cc +++ b/url/url_parse_file.cc
@@ -215,7 +215,7 @@ DoParseFileURL(url, url_len, parsed); } -void ParseFileURL(const gurl_base::char16* url, int url_len, Parsed* parsed) { +void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) { DoParseFileURL(url, url_len, parsed); }
diff --git a/url/url_parse_internal.h b/url/url_parse_internal.h index 6f86d86..4e2527a 100644 --- a/url/url_parse_internal.h +++ b/url/url_parse_internal.h
@@ -12,13 +12,13 @@ namespace url { // We treat slashes and backslashes the same for IE compatibility. -inline bool IsURLSlash(gurl_base::char16 ch) { +inline bool IsURLSlash(char16_t ch) { return ch == '/' || ch == '\\'; } // Returns true if we should trim this character from the URL because it is a // space or a control character. -inline bool ShouldTrimFromURL(gurl_base::char16 ch) { +inline bool ShouldTrimFromURL(char16_t ch) { return ch <= ' '; } @@ -67,13 +67,12 @@ Component* filepath, Component* query, Component* ref); -void ParsePathInternal(const gurl_base::char16* spec, +void ParsePathInternal(const char16_t* spec, const Component& path, Component* filepath, Component* query, Component* ref); - // Given a spec and a pointer to the character after the colon following the // scheme, this parses it and fills in the structure, Every item in the parsed // structure is filled EXCEPT for the scheme, which is untouched. @@ -81,7 +80,7 @@ int spec_len, int after_scheme, Parsed* parsed); -void ParseAfterScheme(const gurl_base::char16* spec, +void ParseAfterScheme(const char16_t* spec, int spec_len, int after_scheme, Parsed* parsed);
diff --git a/url/url_test_utils.h b/url/url_test_utils.h index f8d40e1..bb75c74 100644 --- a/url/url_test_utils.h +++ b/url/url_test_utils.h
@@ -10,7 +10,6 @@ #include <string> -#include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/url_canon_internal.h" @@ -24,11 +23,11 @@ // in base bacause it passes invalid UTF-16 characters which is important for // test purposes. As a result, this is not meant to handle true UTF-32 encoded // strings. -inline gurl_base::string16 TruncateWStringToUTF16(const wchar_t* src) { - gurl_base::string16 str; +inline std::u16string TruncateWStringToUTF16(const wchar_t* src) { + std::u16string str; int length = static_cast<int>(wcslen(src)); for (int i = 0; i < length; ++i) { - str.push_back(static_cast<gurl_base::char16>(src[i])); + str.push_back(static_cast<char16_t>(src[i])); } return str; }
diff --git a/url/url_util.cc b/url/url_util.cc index 13c30b3..0c35913 100644 --- a/url/url_util.cc +++ b/url/url_util.cc
@@ -11,7 +11,6 @@ #include "polyfills/base/check_op.h" #include "base/compiler_specific.h" #include "base/no_destructor.h" -#include "base/stl_util.h" #include "base/strings/string_util.h" #include "url/url_canon_internal.h" #include "url/url_constants.h" @@ -137,7 +136,8 @@ template<> struct CharToStringPiece<char> { typedef gurl_base::StringPiece Piece; }; -template<> struct CharToStringPiece<gurl_base::char16> { +template <> +struct CharToStringPiece<char16_t> { typedef gurl_base::StringPiece16 Piece; }; @@ -468,10 +468,10 @@ // the SchemeRegistry has been used. // // This normally means you're trying to set up a new scheme too late or using - // the SchemeRegistry too early in your application's init process. Make sure - // that you haven't added any static GURL initializers in tests. + // the SchemeRegistry too early in your application's init process. GURL_DCHECK(!g_scheme_registries_used.load()) - << "Trying to add a scheme after the lists have been used."; + << "Trying to add a scheme after the lists have been used. " + "Make sure that you haven't added any static GURL initializers in tests."; // If this assert triggers, it means you've called Add*Scheme after // LockSchemeRegistries has been called (see the header file for @@ -557,6 +557,15 @@ &GetSchemeRegistryWithoutLocking()->standard_schemes); } +std::vector<std::string> GetStandardSchemes() { + std::vector<std::string> result; + result.reserve(GetSchemeRegistry().standard_schemes.size()); + for (const auto& entry : GetSchemeRegistry().standard_schemes) { + result.push_back(entry.scheme); + } + return result; +} + void AddReferrerScheme(const char* new_scheme, SchemeType type) { DoAddSchemeWithType(new_scheme, type, &GetSchemeRegistryWithoutLocking()->referrer_schemes); @@ -638,13 +647,13 @@ return DoIsStandard(spec, scheme, type); } -bool GetStandardSchemeType(const gurl_base::char16* spec, +bool GetStandardSchemeType(const char16_t* spec, const Component& scheme, SchemeType* type) { return DoIsStandard(spec, scheme, type); } -bool IsStandard(const gurl_base::char16* spec, const Component& scheme) { +bool IsStandard(const char16_t* spec, const Component& scheme) { SchemeType unused_scheme_type; return DoIsStandard(spec, scheme, &unused_scheme_type); } @@ -662,7 +671,7 @@ return DoFindAndCompareScheme(str, str_len, compare, found_scheme); } -bool FindAndCompareScheme(const gurl_base::char16* str, +bool FindAndCompareScheme(const char16_t* str, int str_len, const char* compare, Component* found_scheme) { @@ -723,7 +732,7 @@ charset_converter, output, output_parsed); } -bool Canonicalize(const gurl_base::char16* spec, +bool Canonicalize(const char16_t* spec, int spec_len, bool trim_path_end, CharsetConverter* charset_converter, @@ -749,7 +758,7 @@ bool ResolveRelative(const char* base_spec, int base_spec_len, const Parsed& base_parsed, - const gurl_base::char16* relative, + const char16_t* relative, int relative_length, CharsetConverter* charset_converter, CanonOutput* output, @@ -773,7 +782,7 @@ bool ReplaceComponents(const char* spec, int spec_len, const Parsed& parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* charset_converter, CanonOutput* output, Parsed* out_parsed) { @@ -853,7 +862,7 @@ return DoCompareSchemeComponent(spec, component, compare_to); } -bool CompareSchemeComponent(const gurl_base::char16* spec, +bool CompareSchemeComponent(const char16_t* spec, const Component& component, const char* compare_to) { return DoCompareSchemeComponent(spec, component, compare_to);
diff --git a/url/url_util.h b/url/url_util.h index 1816637..00399c2 100644 --- a/url/url_util.h +++ b/url/url_util.h
@@ -10,7 +10,6 @@ #include <vector> #include "polyfills/base/component_export.h" -#include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -63,6 +62,14 @@ COMPONENT_EXPORT(URL) void AddStandardScheme(const char* new_scheme, SchemeType scheme_type); +// Returns the list of schemes registered for "standard" URLs. Note, this +// should not be used if you just need to check if your protocol is standard +// or not. Instead use the IsStandard() function above as its much more +// efficient. This function should only be used where you need to perform +// other operations against the standard scheme list. +COMPONENT_EXPORT(URL) +std::vector<std::string> GetStandardSchemes(); + // Adds an application-defined scheme to the internal list of schemes allowed // for referrers. COMPONENT_EXPORT(URL) @@ -134,7 +141,7 @@ const char* compare, Component* found_scheme); COMPONENT_EXPORT(URL) -bool FindAndCompareScheme(const gurl_base::char16* str, +bool FindAndCompareScheme(const char16_t* str, int str_len, const char* compare, Component* found_scheme); @@ -144,7 +151,7 @@ return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), compare, found_scheme); } -inline bool FindAndCompareScheme(const gurl_base::string16& str, +inline bool FindAndCompareScheme(const std::u16string& str, const char* compare, Component* found_scheme) { return FindAndCompareScheme(str.data(), static_cast<int>(str.size()), @@ -156,7 +163,7 @@ COMPONENT_EXPORT(URL) bool IsStandard(const char* spec, const Component& scheme); COMPONENT_EXPORT(URL) -bool IsStandard(const gurl_base::char16* spec, const Component& scheme); +bool IsStandard(const char16_t* spec, const Component& scheme); // Returns true if the given scheme identified by |scheme| within |spec| is in // the list of allowed schemes for referrers (see AddReferrerScheme). @@ -171,7 +178,7 @@ const Component& scheme, SchemeType* type); COMPONENT_EXPORT(URL) -bool GetStandardSchemeType(const gurl_base::char16* spec, +bool GetStandardSchemeType(const char16_t* spec, const Component& scheme, SchemeType* type); @@ -213,7 +220,7 @@ CanonOutput* output, Parsed* output_parsed); COMPONENT_EXPORT(URL) -bool Canonicalize(const gurl_base::char16* spec, +bool Canonicalize(const char16_t* spec, int spec_len, bool trim_path_end, CharsetConverter* charset_converter, @@ -243,7 +250,7 @@ bool ResolveRelative(const char* base_spec, int base_spec_len, const Parsed& base_parsed, - const gurl_base::char16* relative, + const char16_t* relative, int relative_length, CharsetConverter* charset_converter, CanonOutput* output, @@ -265,7 +272,7 @@ bool ReplaceComponents(const char* spec, int spec_len, const Parsed& parsed, - const Replacements<gurl_base::char16>& replacements, + const Replacements<char16_t>& replacements, CharsetConverter* charset_converter, CanonOutput* output, Parsed* out_parsed);
diff --git a/url/url_util_internal.h b/url/url_util_internal.h index 08f8929..b2730b6 100644 --- a/url/url_util_internal.h +++ b/url/url_util_internal.h
@@ -5,9 +5,6 @@ #ifndef URL_URL_UTIL_INTERNAL_H_ #define URL_URL_UTIL_INTERNAL_H_ -#include <string> - -#include "base/strings/string16.h" #include "url/third_party/mozilla/url_parse.h" namespace url { @@ -17,7 +14,7 @@ bool CompareSchemeComponent(const char* spec, const Component& component, const char* compare_to); -bool CompareSchemeComponent(const gurl_base::char16* spec, +bool CompareSchemeComponent(const char16_t* spec, const Component& component, const char* compare_to);
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc index a63294f..3dcfa76 100644 --- a/url/url_util_unittest.cc +++ b/url/url_util_unittest.cc
@@ -136,6 +136,16 @@ &scheme_type)); } +TEST_F(URLUtilTest, GetStandardSchemes) { + std::vector<std::string> expected = { + kHttpsScheme, kHttpScheme, kFileScheme, + kFtpScheme, kWssScheme, kWsScheme, + kFileSystemScheme, kQuicTransportScheme, "foo", + }; + AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY); + EXPECT_EQ(expected, GetStandardSchemes()); +} + TEST_F(URLUtilTest, ReplaceComponents) { Parsed parsed; RawCanonOutputT<char> output; @@ -236,34 +246,33 @@ for (size_t i = 0; i < gurl_base::size(decode_cases); i++) { const char* input = decode_cases[i].input; - RawCanonOutputT<gurl_base::char16> output; + RawCanonOutputT<char16_t> output; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8OrIsomorphic, &output); - EXPECT_EQ(decode_cases[i].output, - gurl_base::UTF16ToUTF8(gurl_base::string16(output.data(), - output.length()))); + EXPECT_EQ(decode_cases[i].output, gurl_base::UTF16ToUTF8(std::u16string( + output.data(), output.length()))); - RawCanonOutputT<gurl_base::char16> output_utf8; + RawCanonOutputT<char16_t> output_utf8; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8, &output_utf8); EXPECT_EQ(decode_cases[i].output, gurl_base::UTF16ToUTF8( - gurl_base::string16(output_utf8.data(), output_utf8.length()))); + std::u16string(output_utf8.data(), output_utf8.length()))); } // Our decode should decode %00 const char zero_input[] = "%00"; - RawCanonOutputT<gurl_base::char16> zero_output; + RawCanonOutputT<char16_t> zero_output; DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8, &zero_output); - EXPECT_NE("%00", gurl_base::UTF16ToUTF8( - gurl_base::string16(zero_output.data(), zero_output.length()))); + EXPECT_NE("%00", gurl_base::UTF16ToUTF8(std::u16string(zero_output.data(), + zero_output.length()))); // Test the error behavior for invalid UTF-8. struct Utf8DecodeCase { const char* input; - std::vector<gurl_base::char16> expected_iso; - std::vector<gurl_base::char16> expected_utf8; + std::vector<char16_t> expected_iso; + std::vector<char16_t> expected_utf8; } utf8_decode_cases[] = { // %e5%a5%bd is a valid UTF-8 sequence. U+597D {"%e4%a0%e5%a5%bd", @@ -279,17 +288,17 @@ for (const auto& test : utf8_decode_cases) { const char* input = test.input; - RawCanonOutputT<gurl_base::char16> output_iso; + RawCanonOutputT<char16_t> output_iso; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8OrIsomorphic, &output_iso); - EXPECT_EQ(gurl_base::string16(test.expected_iso.data()), - gurl_base::string16(output_iso.data(), output_iso.length())); + EXPECT_EQ(std::u16string(test.expected_iso.data()), + std::u16string(output_iso.data(), output_iso.length())); - RawCanonOutputT<gurl_base::char16> output_utf8; + RawCanonOutputT<char16_t> output_utf8; DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8, &output_utf8); - EXPECT_EQ(gurl_base::string16(test.expected_utf8.data()), - gurl_base::string16(output_utf8.data(), output_utf8.length())); + EXPECT_EQ(std::u16string(test.expected_utf8.data()), + std::u16string(output_utf8.data(), output_utf8.length())); } }