Update to upstream revision d02279b46eb30b0f90a3ac18ec306b77a6d2e21e
This updates googleurl standalone copy to the version from
Mon Jun 7 15:26:19 2021 +0000.
diff --git a/AUTHORS b/AUTHORS
index ce38168..4bb6b20 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,6 +33,8 @@
Adenilson Cavalcanti <a.cavalcanti@samsung.com>
Aditya Bhargava <heuristicist@gmail.com>
Adrian Belgun <adrian.belgun@intel.com>
+Adrian Ratiu <adrian.ratiu@collabora.corp-partner.google.com>
+Adrià Vilanova Martínez <me@avm99963.com>
Ahmet Emir Ercin <ahmetemiremir@gmail.com>
Ajay Berwal <a.berwal@samsung.com>
Ajay Berwal <ajay.berwal@samsung.com>
@@ -41,6 +43,7 @@
Aku Kotkavuo <a.kotkavuo@partner.samsung.com>
Aldo Culquicondor <alculquicondor@gmail.com>
Aleksandar Stojiljkovic <aleksandar.stojiljkovic@intel.com>
+Alex Chronopoulos <achronop@gmail.com>
Alex Gabriel <minilogo@gmail.com>
Alex Gartrell <agartrell@cmu.edu>
Alex Gaynor <alex.gaynor@gmail.com>
@@ -66,6 +69,7 @@
Ali Vathi <ali.akbar@gmail.com>
Allan Sandfeld Jensen <allan.jensen@qt.io>
Alper Çakan <alpercakan98@gmail.com>
+Alvaro Silva <alvaro.fagner@gmail.com>
Ambarish Rapte <ambarish.r@samsung.com>
Amey Jahagirdar <jahagird@amazon.com>
Amit Sarkar <amit.srkr@samsung.com>
@@ -125,6 +129,7 @@
Asish Singh <asish.singh@samsung.com>
Attila Dusnoki <dati91@gmail.com>
Avinaash Doreswamy <avi.nitk@samsung.com>
+Ayush Dubey <dubeyaayush07@gmail.com>
Ayush Khandelwal <k.ayush@samsung.com>
Azhar Shaikh <azhar.shaikh@intel.com>
Balazs Kelemen <b.kelemen@samsung.com>
@@ -150,6 +155,7 @@
Branden Archer <bma4@zips.uakron.edu>
Brendan Kirby <brendan.kirby@imgtec.com>
Brendan Long <self@brendanlong.com>
+Brendon Tiszka <btiszka@gmail.com>
Brian Clifton <clifton@brave.com>
Brian G. Merrell <bgmerrell@gmail.com>
Brian Konzman, SJ <b.g.konzman@gmail.com>
@@ -171,6 +177,7 @@
Caitlin Potter <caitpotter88@gmail.com>
Calvin Mei <calvimei@amazon.com>
Cameron Gutman <aicommander@gmail.com>
+Camille Viot <viot.camille@outlook.com>
Carlos Santa <carlos.santa@intel.com>
Catalin Badea <badea@adobe.com>
Cathie Chen <cathiechen@tencent.com>
@@ -184,6 +191,7 @@
Chang Shu <c.shu@samsung.com>
Changbin Shao <changbin.shao@intel.com>
Changjun Yang <changjun.yang@intel.com>
+ChangSeok Lee <charlie.lee921@gmail.com>
ChangSeok Oh <shivamidow@gmail.com>
Changwan Hong <changwan.hong@navercorp.com>
Changyeon Kim <cyzero.kim@samsung.com>
@@ -235,6 +243,7 @@
Dániel Bátyai <dbatyai@inf.u-szeged.hu>
Dániel Vince <vinced@inf.u-szeged.hu>
Daoming Qiu <daoming.qiu@intel.com>
+Darik Harter <darik.harter@gmail.com>
Darshini KN <kn.darshini@samsung.com>
Dave Vandyke <kzar@kzar.co.uk>
David Benjamin <davidben@mit.edu>
@@ -263,6 +272,7 @@
Derek Halman <d.halman@gmail.com>
Devlin Cronin <rdevlin.cronin@gmail.com>
Dhi Aurrahman <dio@rockybars.com>
+Di Wu <meetwudi@gmail.com>
Diana Suvorova <diana.suvorova@gmail.com>
Diego Ferreiro Val <elfogris@gmail.com>
Dillon Sellars <dill.sellars@gmail.com>
@@ -458,6 +468,7 @@
Jay Soffian <jaysoffian@gmail.com>
Jeado Ko <haibane84@gmail.com>
Jeffrey C <jeffreyca16@gmail.com>
+Jeffrey Yeung <jeffrey.yeung@poly.com>
Jeongeun Kim <je_julie.kim@samsung.com>
Jeongmin Kim <kimwjdalsl@gmail.com>
Jeongwoo Park <jwoo.park@navercorp.com>
@@ -508,7 +519,9 @@
Joe Thomas <mhx348@motorola.com>
Joel Stanley <joel@jms.id.au>
Joey Jiao <joeyjiao0810@gmail.com>
+Joey Mou <joeymou@amazon.com>
Johannes Rudolph <johannes.rudolph@googlemail.com>
+John Ingve Olsen <johningveolsen@gmail.com>
John Kleinschmidt <kleinschmidtorama@gmail.com>
John Yani <vanuan@gmail.com>
John Yoo <nearbyh13@gmail.com>
@@ -562,6 +575,8 @@
Kangil Han <kangil.han@samsung.com>
Kangyuan Shu <kangyuan.shu@intel.com>
Karan Thakkar <karanjthakkar@gmail.com>
+Karel Král <kralkareliv@gmail.com>
+Karl <karlpolicechromium@gmail.com>
Kartikey Bhatt <kartikey@amazon.com>
Kaspar Brand <googlecontrib@velox.ch>
Kaushalendra Mishra <k.mishra@samsung.com>
@@ -629,6 +644,7 @@
Li Yin <li.yin@intel.com>
Lidwine Genevet <lgenevet@cisco.com>
Lin Sun <lin.sun@intel.com>
+Lin Peng <penglin22@huawei.com>
Lingqi Chi <someway.bit@gmail.com>
Lingyun Cai <lingyun.cai@intel.com>
Lionel Landwerlin <lionel.g.landwerlin@intel.com>
@@ -646,6 +662,7 @@
Luke Seunghoe Gu <gulukesh@gmail.com>
Luke Zarko <lukezarko@gmail.com>
Luoxi Pan <l.panpax@gmail.com>
+Lu Yahan <yahan@iscas.ac.cn>
Maarten Lankhorst <m.b.lankhorst@gmail.com>
Maciej Pawlowski <m.pawlowski@eyeo.com>
Magnus Danielsson <fuzzac@gmail.com>
@@ -672,6 +689,7 @@
Mark Seaborn <mrs@mythic-beasts.com>
Mark Winter <wintermarkedward@gmail.com>
Martijn Croonen <martijn@martijnc.be>
+Martin Aberer <mail@martin-aberer.at>
Martin Bednorz <m.s.bednorz@gmail.com>
Martin Persson <mnpn03@gmail.com>
Martin Rogalla <martin@martinrogalla.com>
@@ -695,13 +713,14 @@
Matthew Willis <appamatto@gmail.com>
Matthias Reitinger <reimarvin@gmail.com>
Matthieu Rigolot <matthieu.rigolot@gmail.com>
+Matthieu Vlad Hauglustaine <matt.hauglustaine@gmail.com>
Max Perepelitsyn <pph34r@gmail.com>
Max Vujovic <mvujovic@adobe.com>
Mayank Gupta <mayank.g1@samsung.com>
Mayur Kankanwadi <mayurk.vk@samsung.com>
Md Abdullah Al Alamin <a.alamin.cse@gmail.com>
Md. Hasanur Rashid <hasanur.r@samsung.com>
-Md Jobed Hossain <jrony15@gmail.com>
+Md Jobed Hossain <jobed.h@samsung.com>
Md Sami Uddin <md.sami@samsung.com>
Michael Cirone <mikecirone@gmail.com>
Michael Constant <mconst@gmail.com>
@@ -712,6 +731,7 @@
Michael Morrison <codebythepound@gmail.com>
Michael Müller <michael@fds-team.de>
Michael Schechter <mike.schechter@gmail.com>
+Michael Smith <sideshowbarker@gmail.com>
Michaël Zasso <mic.besace@gmail.com>
Michael Zugelder <michael@zugelder.org>
Michel Promonet <michel.promonet.1@gmail.com>
@@ -747,6 +767,7 @@
Myeongjin Cho <myeongjin.cho@navercorp.com>
Myles C. Maxfield <mymax@amazon.com>
Myung-jong Kim <mjkim610@gmail.com>
+Myunghoon Kim <asdvfrqwe@gmail.com>
Nagarajan Narayanan <nagarajan.n@samsung.com>
Nagarjuna Atluri <nagarjuna.a@samsung.com>
Naiem Shaik <naiem.shaik@gmail.com>
@@ -757,10 +778,12 @@
Naveen Kumar Devaraj <devarajn@amazon.com>
Naveen Kumar S G <naveensg@samsung.com>
Nayan Kumar K <qtc746@motorola.com>
+Nayeem Hasan <nayeemhasan.nh01@gmail.com>
Neal Gompa <ngompa13@gmail.com>
Ned Williamson <nedwilliamson@gmail.com>
Nedeljko Babic <nedeljko.babic@imgtec.com>
Nidhi Jaju <nidhijaju127@gmail.com>
+Niek van der Maas <mail@niekvandermaas.nl>
Nikhil Bansal <n.bansal@samsung.com>
Nikhil Sahni <nikhil.sahni@samsung.com>
Nikita Ofitserov <himikof@gmail.com>
@@ -779,6 +802,7 @@
Olli Raula (Old name Olli Syrjälä) <olli.raula@intel.com>
Omar Sandoval <osandov@osandov.com>
Owen Yuwono <owenyuwono@gmail.com>
+Palash Verma <palashverma47@gmail.com>
Pan Deng <pan.deng@intel.com>
Parag Radke <nrqv63@motorola.com>
Paritosh Kumar <paritosh.in@samsung.com>
@@ -807,6 +831,7 @@
Peng Hu <penghu@tencent.com>
Peng Jiang <leiyi.jp@gmail.com>
Peng Xinchao <pxinchao@gmail.com>
+Peng-Yu Chen <pengyu@libstarrify.so>
Peter Bright <drpizza@quiscalusmexicanus.org>
Peter Brophy <pbrophy@adobe.com>
Peter Collingbourne <peter@pcc.me.uk>
@@ -825,6 +850,7 @@
Pierre Neter <pierreneter@gmail.com>
Pierre-Antoine LaFayette <pierre.lafayette@gmail.com>
Po-Chun Chang <pochang0403@gmail.com>
+Prakhar Shrivastav <p.shri@samsung.com>
Pramod Begur Srinath <pramod.bs@samsung.com>
Pranay Kumar <pranay.kumar@samsung.com>
Pranjal Jumde <pranjal@brave.com>
@@ -899,6 +925,7 @@
Ryan Norton <rnorton10@gmail.com>
Ryan Sleevi <ryan-chromium-dev@sleevi.com>
Ryan Yoakum <ryoakum@skobalt.com>
+Ryo Ogawa <negibokken@gmail.com>
Ryuan Choi <ryuan.choi@samsung.com>
Saikrishna Arcot <saiarcot895@gmail.com>
Sajal Khandelwal <skhandelwa22@bloomberg.net>
@@ -942,6 +969,7 @@
Sergey Shekyan <shekyan@gmail.com>
Sergey Talantov <sergey.talantov@gmail.com>
Sergio Carlos Morales Angeles <carloschilazo@gmail.com>
+Sergio Garcia Murillo <sergio.garcia.murillo@gmail.com>
Sergiy Belozorov <rryk.ua@gmail.com>
Seshadri Mahalingam <seshadri.mahalingam@gmail.com>
Seungkyu Lee <zx6658@gmail.com>
@@ -1001,11 +1029,13 @@
Steven Pennington <spenn@engr.uvic.ca>
Steven Roussey <sroussey@gmail.com>
Subrahmanya Praveen Munukutla <sataya.m@samsung.com>
+Sucheta Saraf <suchetasaraf95@gmail.com>
Suchit Agrawal <a.suchit@samsung.com>
Sudarsana Babu Nagineni <sudarsana.nagineni@intel.com>
Sudarshan Parthasarathy <sudarshan.p@samsung.com>
Sujae Jo <sujae33.jo@gmail.com>
Sujith S S <sujiths.s@samsung.com>
+Sumaid Syed <sumaidsyed@gmail.com>
Sunchang Li <johnstonli@tencent.com>
Sundoo Kim <nerdooit@gmail.com>
Suneel Kota <suneel.kota@samsung.com>
@@ -1014,6 +1044,8 @@
Sungmann Cho <sungmann.cho@navercorp.com>
Sunil Ratnu <sunil.ratnu@samsung.com>
Sunitha Srivatsa <srivats@amazon.com>
+Sunwoo Nam <jegalzz88@gmail.com>
+Surya K M <suryagowda590@gmail.com>
Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
Suvanjan Mukherjee <suvanjanmukherjee@gmail.com>
Suyambulingam R M <suyambu.rm@samsung.com>
@@ -1059,9 +1091,12 @@
Tomas Popela <tomas.popela@gmail.com>
Torsten Kurbad <google@tk-webart.de>
Toshihito Kikuchi <leamovret@gmail.com>
+Toshiaki Tanaka <zokutyou2@gmail.com>
Trent Willis <trentmwillis@gmail.com>
Trevor Perrin <unsafe@trevp.net>
Tripta Gupta <tripta.g@samsung.com>
+Tristan Fraipont <tristan.fraipont@gmail.com>
+Tudor Brindus <me@tbrindus.ca>
Tuukka Toivonen <tuukka.toivonen@intel.com>
U. Artie Eoff <ullysses.a.eoff@intel.com>
Umar Hansa <umar.hansa@gmail.com>
@@ -1158,13 +1193,16 @@
Yunchao He <yunchao.he@intel.com>
Yupei Lin <yplam@yplam.com>
Yupei Wang <perryuwang@tencent.com>
+Yuqing Cao <caoyuqing@huawei.com>
Yura Yaroshevich <yura.yaroshevich@gmail.com>
Yuri Gorobets <yuri.gorobets@gmail.com>
Yuriy Taraday <yorik.sar@gmail.com>
Yuta Kasai <kasai.yuta0810@gmail.com>
Yuvanesh Natarajan <yuvanesh.n1@samsung.com>
+Zach Bjornson <zbbjornson@gmail.com>
Zeno Albisser <zeno.albisser@digia.com>
Zeqin Chen <talonchen@tencent.com>
+Zhang Hao <15686357310a@gmail.com>
Zhaoze Zhou <zhaoze.zhou@partner.samsung.com>
Zheda Chen <zheda.chen@intel.com>
Zheng Chuang <zhengchuangscu@gmail.com>
@@ -1201,6 +1239,7 @@
Dell Technologies Inc. <*@dell.corp-partner.google.com>
Duck Duck Go, Inc. <*@duckduckgo.com>
Endless Mobile, Inc. <*@endlessm.com>
+EngFlow, Inc. <*@engflow.com>
Estimote, Inc. <*@estimote.com>
Facebook, Inc. <*@fb.com>
Facebook, Inc. <*@oculus.com>
@@ -1246,6 +1285,7 @@
Venture 3 Systems LLC <*@venture3systems.com>
Vewd Software AS <*@vewd.com>
Vivaldi Technologies AS <*@vivaldi.com>
+Wacom <*@wacom.com>
Yandex LLC <*@yandex-team.ru>
# Please DO NOT APPEND here. See comments at the top of the file.
# END organizations section.
diff --git a/WORKSPACE b/WORKSPACE
index 2823b98..809e6b9 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -3,3 +3,11 @@
# found in the LICENSE file.
workspace(name = "com_google_googleurl")
+
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
+
+git_repository(
+ name = "com_google_absl",
+ commit = "17c954d90d5661e27db8fc5f086085690a8372d9",
+ remote = "https://github.com/abseil/abseil-cpp.git",
+)
diff --git a/base/BUILD b/base/BUILD
index 63787b7..c933421 100644
--- a/base/BUILD
+++ b/base/BUILD
@@ -13,14 +13,13 @@
"containers/contiguous_iterator.h",
"containers/span.h",
"containers/util.h",
+ "cxx17_backports.h",
"debug/leak_annotations.h",
"functional/identity.h",
"functional/invoke.h",
"functional/not_fn.h",
- "i18n/uchar.h",
"macros.h",
"no_destructor.h",
- "optional.h",
"ranges/algorithm.h",
"ranges/functional.h",
"ranges/ranges.h",
@@ -32,5 +31,6 @@
deps = [
"//build:build_config",
"//polyfills",
+ "@com_google_absl//absl/types:optional",
],
)
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index fa961b0..3faca72 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -24,6 +24,13 @@
#define HAS_CPP_ATTRIBUTE(x) 0
#endif
+// A wrapper around `__has_builtin`, similar to HAS_CPP_ATTRIBUTE.
+#if defined(__has_builtin)
+#define HAS_BUILTIN(x) __has_builtin(x)
+#else
+#define HAS_BUILTIN(x) 0
+#endif
+
// Annotate a variable indicating it's ok if the variable is not used.
// (Typically used to silence a compiler warning when the assignment
// is important for some other reason.)
@@ -332,4 +339,59 @@
#endif // defined(__clang_analyzer__)
+// Use nomerge attribute to disable optimization of merging multiple same calls.
+#if defined(__clang__) && __has_attribute(nomerge)
+#define NOMERGE [[clang::nomerge]]
+#else
+#define NOMERGE
+#endif
+
+// Marks a type as being eligible for the "trivial" ABI despite having a
+// non-trivial destructor or copy/move constructor. Such types can be relocated
+// after construction by simply copying their memory, which makes them eligible
+// to be passed in registers. The canonical example is std::unique_ptr.
+//
+// Use with caution; this has some subtle effects on constructor/destructor
+// ordering and will be very incorrect if the type relies on its address
+// remaining constant. When used as a function argument (by value), the value
+// may be constructed in the caller's stack frame, passed in a register, and
+// then used and destructed in the callee's stack frame. A similar thing can
+// occur when values are returned.
+//
+// TRIVIAL_ABI is not needed for types which have a trivial destructor and
+// copy/move constructors, such as gurl_base::TimeTicks and other POD.
+//
+// It is also not likely to be effective on types too large to be passed in one
+// or two registers on typical target ABIs.
+//
+// See also:
+// https://clang.llvm.org/docs/AttributeReference.html#trivial-abi
+// https://libcxx.llvm.org/docs/DesignDocs/UniquePtrTrivialAbi.html
+#if defined(__clang__) && __has_attribute(trivial_abi)
+#define TRIVIAL_ABI [[clang::trivial_abi]]
+#else
+#define TRIVIAL_ABI
+#endif
+
+// Marks a member function as reinitializing a moved-from variable.
+// See also
+// https://clang.llvm.org/extra/clang-tidy/checks/bugprone-use-after-move.html#reinitialization
+#if defined(__clang__) && __has_attribute(reinitializes)
+#define REINITIALIZES_AFTER_MOVE [[clang::reinitializes]]
+#else
+#define REINITIALIZES_AFTER_MOVE
+#endif
+
+// Requires constant initialization. See constinit in C++20. Allows to rely on a
+// variable being initialized before execution, and not requiring a global
+// constructor.
+#if defined(__has_attribute)
+#if __has_attribute(require_constant_initialization)
+#define CONSTINIT __attribute__((require_constant_initialization))
+#endif
+#endif
+#if !defined(CONSTINIT)
+#define CONSTINIT
+#endif
+
#endif // BASE_COMPILER_SPECIFIC_H_
diff --git a/base/containers/checked_iterators.h b/base/containers/checked_iterators.h
index 30c35bd..b5fe925 100644
--- a/base/containers/checked_iterators.h
+++ b/base/containers/checked_iterators.h
@@ -11,6 +11,7 @@
#include "polyfills/base/check_op.h"
#include "base/containers/util.h"
+#include "build/build_config.h"
namespace gurl_base {
@@ -27,77 +28,24 @@
template <typename U>
friend class CheckedContiguousIterator;
- constexpr CheckedContiguousIterator() = default;
-
-#if defined(_LIBCPP_VERSION)
- // The following using declaration, single argument implicit constructor and
- // friended `__unwrap_iter` overload are required to use an optimized code
- // path when using a CheckedContiguousIterator with libc++ algorithms such as
- // std::copy(first, last, result), std::copy_backward(first, last, result),
- // std::move(first, last, result) and std::move_backward(first, last, result).
- //
- // Each of these algorithms dispatches to a std::memmove if this is safe to do
- // so, i.e. when all of `first`, `last` and `result` are iterators over
- // contiguous storage of the same type modulo const qualifiers.
- //
- // libc++ implements this for its contiguous iterators by invoking the
- // unqualified __unwrap_iter, which returns the underlying pointer for
- // iterators over std::vector and std::string, and returns the original
- // iterator otherwise.
- //
- // Thus in order to opt into this optimization for CCI, we need to provide our
- // own __unwrap_iter, returning the underlying raw pointer if it is safe to do
- // so.
- //
- // Furthermore, considering that std::copy is implemented as follows, the
- // return type of __unwrap_iter(CCI) needs to be convertible to CCI, which is
- // why an appropriate implicit single argument constructor is provided for the
- // optimized case:
- //
- // template <class InIter, class OutIter>
- // OutIter copy(InIter first, InIter last, OutIter result) {
- // return __copy(__unwrap_iter(first), __unwrap_iter(last),
- // __unwrap_iter(result));
- // }
- //
- // Unoptimized __copy() signature:
- // template <class InIter, class OutIter>
- // OutIter __copy(InIter first, InIter last, OutIter result);
- //
- // Optimized __copy() signature:
- // template <class T, class U>
- // U* __copy(T* first, T* last, U* result);
- //
- // Finally, this single argument constructor sets all internal fields to the
- // passed in pointer. This allows the resulting CCI to be used in other
- // optimized calls to std::copy (or std::move, std::copy_backward,
- // std::move_backward). However, it should not be used otherwise, since
- // invoking any of its public API will result in a GURL_CHECK failure. This also
- // means that callers should never use the single argument constructor
- // directly.
- template <typename U>
- using PtrIfSafeToMemmove = std::enable_if_t<
- std::is_trivially_copy_assignable<std::remove_const_t<U>>::value,
- U*>;
-
- template <int&... ExplicitArgumentBarrier, typename U = T>
- constexpr CheckedContiguousIterator(PtrIfSafeToMemmove<U> ptr)
- : start_(ptr), current_(ptr), end_(ptr) {}
-
- template <int&... ExplicitArgumentBarrier, typename U = T>
- friend constexpr PtrIfSafeToMemmove<U> __unwrap_iter(
- CheckedContiguousIterator iter) {
- return iter.current_;
- }
+ // Required for certain libc++ algorithm optimizations that are not available
+ // for NaCl.
+#if defined(_LIBCPP_VERSION) && !defined(OS_NACL)
+ template <typename Ptr>
+ friend struct std::pointer_traits;
#endif
+ constexpr CheckedContiguousIterator() = default;
+
constexpr CheckedContiguousIterator(T* start, const T* end)
: CheckedContiguousIterator(start, start, end) {}
+
constexpr CheckedContiguousIterator(const T* start, T* current, const T* end)
: start_(start), current_(current), end_(end) {
GURL_CHECK_LE(start, current);
GURL_CHECK_LE(current, end);
}
+
constexpr CheckedContiguousIterator(const CheckedContiguousIterator& other) =
default;
@@ -269,4 +217,49 @@
} // namespace base
+#if defined(_LIBCPP_VERSION) && !defined(OS_NACL)
+// Specialize both std::__is_cpp17_contiguous_iterator and std::pointer_traits
+// for CCI in case we compile with libc++ outside of NaCl. The former is
+// required to enable certain algorithm optimizations (e.g. std::copy can be a
+// simple std::memmove under certain circumstances), and is a precursor to
+// C++20's std::contiguous_iterator concept [1]. Once we actually use C++20 it
+// will be enough to add `using iterator_concept = std::contiguous_iterator_tag`
+// to the iterator class [2], and we can get rid of this non-standard
+// specialization.
+//
+// The latter is required to obtain the underlying raw pointer without resulting
+// in GURL_CHECK failures. The important bit is the `to_address(pointer)` overload,
+// which is the standard blessed way to customize `std::to_address(pointer)` in
+// C++20 [3].
+//
+// [1] https://wg21.link/iterator.concept.contiguous
+// [2] https://wg21.link/std.iterator.tags
+// [3] https://wg21.link/pointer.traits.optmem
+namespace std {
+
+template <typename T>
+struct __is_cpp17_contiguous_iterator<::gurl_base::CheckedContiguousIterator<T>>
+ : true_type {};
+
+template <typename T>
+struct pointer_traits<::gurl_base::CheckedContiguousIterator<T>> {
+ using pointer = ::gurl_base::CheckedContiguousIterator<T>;
+ using element_type = T;
+ using difference_type = ptrdiff_t;
+
+ template <typename U>
+ using rebind = ::gurl_base::CheckedContiguousIterator<U>;
+
+ static constexpr pointer pointer_to(element_type& r) noexcept {
+ return pointer(&r, &r);
+ }
+
+ static constexpr element_type* to_address(pointer p) noexcept {
+ return p.current_;
+ }
+};
+
+} // namespace std
+#endif
+
#endif // BASE_CONTAINERS_CHECKED_ITERATORS_H_
diff --git a/base/containers/contains.h b/base/containers/contains.h
new file mode 100644
index 0000000..55b1fb5
--- /dev/null
+++ b/base/containers/contains.h
@@ -0,0 +1,98 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_CONTAINERS_CONTAINS_H_
+#define BASE_CONTAINERS_CONTAINS_H_
+
+#include <type_traits>
+#include <utility>
+
+#include "base/ranges/algorithm.h"
+#include "base/ranges/ranges.h"
+#include "base/template_util.h"
+
+namespace gurl_base {
+
+namespace internal {
+
+// Small helper to detect whether a given type has a nested `key_type` typedef.
+// Used below to catch misuses of the API for associative containers.
+template <typename T, typename SFINAE = void>
+struct HasKeyType : std::false_type {};
+
+template <typename T>
+struct HasKeyType<T, void_t<typename T::key_type>> : std::true_type {};
+
+// Probe whether a `contains` member function exists and return the result of
+// `container.contains(value)` if this is a valid expression. This is the
+// highest priority option.
+template <typename Container, typename Value>
+constexpr auto ContainsImpl(const Container& container,
+ const Value& value,
+ priority_tag<2>)
+ -> decltype(container.contains(value)) {
+ return container.contains(value);
+}
+
+// Probe whether a `find` member function exists and whether its return value
+// can be compared with `container.end()`. Intended for STL style maps and sets
+// that lack a `contains` member function.
+template <typename Container, typename Value>
+constexpr auto ContainsImpl(const Container& container,
+ const Value& value,
+ priority_tag<1>)
+ -> decltype(container.find(value) != container.end()) {
+ return container.find(value) != container.end();
+}
+
+// Probe whether a `find` member function exists and whether its return value
+// can be compared with `Container::npos`. Intended for STL style strings that
+// lack a `contains` member function.
+template <typename Container, typename Value>
+constexpr auto ContainsImpl(const Container& container,
+ const Value& value,
+ priority_tag<1>)
+ -> decltype(container.find(value) != Container::npos) {
+ return container.find(value) != Container::npos;
+}
+
+// Generic fallback option, using a linear search over `container` to find
+// `value`. Has the lowest priority. This will not compile for associative
+// containers, as this likely is a performance bug.
+template <typename Container, typename Value>
+constexpr bool ContainsImpl(const Container& container,
+ const Value& value,
+ priority_tag<0>) {
+ static_assert(
+ !HasKeyType<Container>::value,
+ "Error: About to perform linear search on an associative container. "
+ "Either use a more generic comparator (e.g. std::less<>) or, if a linear "
+ "search is desired, provide an explicit projection parameter.");
+ return ranges::find(container, value) != ranges::end(container);
+}
+
+} // namespace internal
+
+// A general purpose utility to check whether `container` contains `value`. This
+// will probe whether a `contains` or `find` member function on `container`
+// exists, and fall back to a generic linear search over `container`.
+template <typename Container, typename Value>
+constexpr bool Contains(const Container& container, const Value& value) {
+ return internal::ContainsImpl(container, value, internal::priority_tag<2>());
+}
+
+// Overload that allows to provide an additional projection invocable. This
+// projection will be applied to every element in `container` before comparing
+// it with `value`. This will always perform a linear search.
+template <typename Container, typename Value, typename Proj>
+constexpr bool Contains(const Container& container,
+ const Value& value,
+ Proj proj) {
+ return ranges::find(container, value, std::move(proj)) !=
+ ranges::end(container);
+}
+
+} // namespace base
+
+#endif // BASE_CONTAINERS_CONTAINS_H_
diff --git a/base/containers/contiguous_iterator.h b/base/containers/contiguous_iterator.h
index 48b2755..ca8e7b3 100644
--- a/base/containers/contiguous_iterator.h
+++ b/base/containers/contiguous_iterator.h
@@ -95,8 +95,7 @@
// - https://wg21.link/n4284
template <typename T>
struct IsContiguousIterator
- : internal::IsContiguousIteratorImpl<
- std::remove_cv_t<std::remove_reference_t<T>>> {};
+ : internal::IsContiguousIteratorImpl<remove_cvref_t<T>> {};
} // namespace base
diff --git a/base/containers/span.h b/base/containers/span.h
index 53b6965..1a54de1 100644
--- a/base/containers/span.h
+++ b/base/containers/span.h
@@ -47,7 +47,7 @@
struct ExtentImpl<gurl_base::span<T, N>> : size_constant<N> {};
template <typename T>
-using Extent = ExtentImpl<std::remove_cv_t<std::remove_reference_t<T>>>;
+using Extent = ExtentImpl<remove_cvref_t<T>>;
template <typename T>
struct IsSpanImpl : std::false_type {};
@@ -263,7 +263,27 @@
template <typename It,
typename = internal::EnableIfCompatibleContiguousIterator<It, T>>
constexpr span(It first, size_t count) noexcept
- : ExtentStorage(count), data_(gurl_base::to_address(first)) {
+ : ExtentStorage(count),
+ // The use of to_address() here is to handle the case where the iterator
+ // `first` is pointing to the container's `end()`. In that case we can
+ // not use the address returned from the iterator, or dereference it
+ // through the iterator's `operator*`, but we can store it. We must assume
+ // in this case that `count` is 0, since the iterator does not point to
+ // valid data. Future hardening of iterators may disallow pulling the
+ // address from `end()`, as demonstrated by asserts() in libstdc++:
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93960.
+ //
+ // The span API dictates that the `data()` is accessible when size is 0,
+ // since the pointer may be valid, so we cannot prevent storing and
+ // giving out an invalid pointer here without breaking API compatibility
+ // and our unit tests. Thus protecting against this can likely only be
+ // successful from inside iterators themselves, where the context about
+ // the pointer is known.
+ //
+ // We can not protect here generally against an invalid iterator/count
+ // being passed in, since we have no context to determine if the
+ // iterator or count are valid.
+ data_(gurl_base::to_address(first)) {
GURL_CHECK(Extent == dynamic_extent || Extent == count);
}
diff --git a/base/cxx17_backports.h b/base/cxx17_backports.h
new file mode 100644
index 0000000..6378a78
--- /dev/null
+++ b/base/cxx17_backports.h
@@ -0,0 +1,94 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_CXX17_BACKPORTS_H_
+#define BASE_CXX17_BACKPORTS_H_
+
+#include <array>
+#include <initializer_list>
+#include <memory>
+#include <string>
+
+namespace gurl_base {
+
+// C++14 implementation of C++17's std::size():
+// http://en.cppreference.com/w/cpp/iterator/size
+template <typename Container>
+constexpr auto size(const Container& c) -> decltype(c.size()) {
+ return c.size();
+}
+
+template <typename T, size_t N>
+constexpr size_t size(const T (&array)[N]) noexcept {
+ return N;
+}
+
+// C++14 implementation of C++17's std::empty():
+// http://en.cppreference.com/w/cpp/iterator/empty
+template <typename Container>
+constexpr auto empty(const Container& c) -> decltype(c.empty()) {
+ return c.empty();
+}
+
+template <typename T, size_t N>
+constexpr bool empty(const T (&array)[N]) noexcept {
+ return false;
+}
+
+template <typename T>
+constexpr bool empty(std::initializer_list<T> il) noexcept {
+ return il.size() == 0;
+}
+
+// C++14 implementation of C++17's std::data():
+// http://en.cppreference.com/w/cpp/iterator/data
+template <typename Container>
+constexpr auto data(Container& c) -> decltype(c.data()) {
+ return c.data();
+}
+
+// std::basic_string::data() had no mutable overload prior to C++17 [1].
+// Hence this overload is provided.
+// Note: str[0] is safe even for empty strings, as they are guaranteed to be
+// null-terminated [2].
+//
+// [1] http://en.cppreference.com/w/cpp/string/basic_string/data
+// [2] http://en.cppreference.com/w/cpp/string/basic_string/operator_at
+template <typename CharT, typename Traits, typename Allocator>
+CharT* data(std::basic_string<CharT, Traits, Allocator>& str) {
+ return std::addressof(str[0]);
+}
+
+template <typename Container>
+constexpr auto data(const Container& c) -> decltype(c.data()) {
+ return c.data();
+}
+
+template <typename T, size_t N>
+constexpr T* data(T (&array)[N]) noexcept {
+ return array;
+}
+
+template <typename T>
+constexpr const T* data(std::initializer_list<T> il) noexcept {
+ return il.begin();
+}
+
+// std::array::data() was not constexpr prior to C++17 [1].
+// Hence these overloads are provided.
+//
+// [1] https://en.cppreference.com/w/cpp/container/array/data
+template <typename T, size_t N>
+constexpr T* data(std::array<T, N>& array) noexcept {
+ return !array.empty() ? &array[0] : nullptr;
+}
+
+template <typename T, size_t N>
+constexpr const T* data(const std::array<T, N>& array) noexcept {
+ return !array.empty() ? &array[0] : nullptr;
+}
+
+} // namespace base
+
+#endif // BASE_CXX17_BACKPORTS_H_
diff --git a/base/optional.h b/base/optional.h
deleted file mode 100644
index c946364..0000000
--- a/base/optional.h
+++ /dev/null
@@ -1,953 +0,0 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef BASE_OPTIONAL_H_
-#define BASE_OPTIONAL_H_
-
-#include <functional>
-#include <type_traits>
-#include <utility>
-
-#include "polyfills/base/check.h"
-#include "base/template_util.h"
-
-namespace gurl_base {
-
-// Specification:
-// http://en.cppreference.com/w/cpp/utility/optional/nullopt_t
-struct nullopt_t {
- constexpr explicit nullopt_t(int) {}
-};
-
-// Specification:
-// http://en.cppreference.com/w/cpp/utility/optional/nullopt
-constexpr nullopt_t nullopt(0);
-
-// Forward declaration, which is refered by following helpers.
-template <typename T>
-class Optional;
-
-namespace internal {
-
-struct DummyUnionMember {};
-
-template <typename T, bool = std::is_trivially_destructible<T>::value>
-struct OptionalStorageBase {
- // Provide non-defaulted default ctor to make sure it's not deleted by
- // non-trivial T::T() in the union.
- constexpr OptionalStorageBase() : dummy_() {}
-
- template <class... Args>
- constexpr explicit OptionalStorageBase(in_place_t, Args&&... args)
- : is_populated_(true), value_(std::forward<Args>(args)...) {}
-
- // When T is not trivially destructible we must call its
- // destructor before deallocating its memory.
- // Note that this hides the (implicitly declared) move constructor, which
- // would be used for constexpr move constructor in OptionalStorage<T>.
- // It is needed iff T is trivially move constructible. However, the current
- // is_trivially_{copy,move}_constructible implementation requires
- // is_trivially_destructible (which looks a bug, cf:
- // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51452 and
- // http://cplusplus.github.io/LWG/lwg-active.html#2116), so it is not
- // necessary for this case at the moment. Please see also the destructor
- // comment in "is_trivially_destructible = true" specialization below.
- ~OptionalStorageBase() {
- if (is_populated_)
- value_.~T();
- }
-
- template <class... Args>
- void Init(Args&&... args) {
- GURL_DCHECK(!is_populated_);
- ::new (std::addressof(value_)) T(std::forward<Args>(args)...);
- is_populated_ = true;
- }
-
- bool is_populated_ = false;
- union {
- // |dummy_| exists so that the union will always be initialized, even when
- // it doesn't contain a value. Union members must be initialized for the
- // constructor to be 'constexpr'. Having a special trivial class for it is
- // better than e.g. using char, because the latter will have to be
- // zero-initialized, and the compiler can't optimize this write away, since
- // it assumes this might be a programmer's invariant. This can also cause
- // problems for conservative GC in Oilpan. Compiler is free to split shared
- // and non-shared parts of the union in separate memory locations (or
- // registers). If conservative GC is triggered at this moment, the stack
- // scanning routine won't find the correct object pointed from
- // Optional<HeapObject*>. This dummy valueless struct lets the compiler know
- // that we don't care about the value of this union member.
- DummyUnionMember dummy_;
- T value_;
- };
-};
-
-template <typename T>
-struct OptionalStorageBase<T, true /* trivially destructible */> {
- // Provide non-defaulted default ctor to make sure it's not deleted by
- // non-trivial T::T() in the union.
- constexpr OptionalStorageBase() : dummy_() {}
-
- template <class... Args>
- constexpr explicit OptionalStorageBase(in_place_t, Args&&... args)
- : is_populated_(true), value_(std::forward<Args>(args)...) {}
-
- // When T is trivially destructible (i.e. its destructor does nothing) there
- // is no need to call it. Implicitly defined destructor is trivial, because
- // both members (bool and union containing only variants which are trivially
- // destructible) are trivially destructible.
- // Explicitly-defaulted destructor is also trivial, but do not use it here,
- // because it hides the implicit move constructor. It is needed to implement
- // constexpr move constructor in OptionalStorage iff T is trivially move
- // constructible. Note that, if T is trivially move constructible, the move
- // constructor of OptionalStorageBase<T> is also implicitly defined and it is
- // trivially move constructor. If T is not trivially move constructible,
- // "not declaring move constructor without destructor declaration" here means
- // "delete move constructor", which works because any move constructor of
- // OptionalStorage will not refer to it in that case.
-
- template <class... Args>
- void Init(Args&&... args) {
- GURL_DCHECK(!is_populated_);
- ::new (std::addressof(value_)) T(std::forward<Args>(args)...);
- is_populated_ = true;
- }
-
- bool is_populated_ = false;
- union {
- // |dummy_| exists so that the union will always be initialized, even when
- // it doesn't contain a value. Union members must be initialized for the
- // constructor to be 'constexpr'. Having a special trivial class for it is
- // better than e.g. using char, because the latter will have to be
- // zero-initialized, and the compiler can't optimize this write away, since
- // it assumes this might be a programmer's invariant. This can also cause
- // problems for conservative GC in Oilpan. Compiler is free to split shared
- // and non-shared parts of the union in separate memory locations (or
- // registers). If conservative GC is triggered at this moment, the stack
- // scanning routine won't find the correct object pointed from
- // Optional<HeapObject*>. This dummy valueless struct lets the compiler know
- // that we don't care about the value of this union member.
- DummyUnionMember dummy_;
- T value_;
- };
-};
-
-// Implement conditional constexpr copy and move constructors. These are
-// constexpr if is_trivially_{copy,move}_constructible<T>::value is true
-// respectively. If each is true, the corresponding constructor is defined as
-// "= default;", which generates a constexpr constructor (In this case,
-// the condition of constexpr-ness is satisfied because the base class also has
-// compiler generated constexpr {copy,move} constructors). Note that
-// placement-new is prohibited in constexpr.
-template <typename T,
- bool = is_trivially_copy_constructible<T>::value,
- bool = std::is_trivially_move_constructible<T>::value>
-struct OptionalStorage : OptionalStorageBase<T> {
- // This is no trivially {copy,move} constructible case. Other cases are
- // defined below as specializations.
-
- // Accessing the members of template base class requires explicit
- // declaration.
- using OptionalStorageBase<T>::is_populated_;
- using OptionalStorageBase<T>::value_;
- using OptionalStorageBase<T>::Init;
-
- // Inherit constructors (specifically, the in_place constructor).
- using OptionalStorageBase<T>::OptionalStorageBase;
-
- // User defined constructor deletes the default constructor.
- // Define it explicitly.
- OptionalStorage() = default;
-
- OptionalStorage(const OptionalStorage& other) {
- if (other.is_populated_)
- Init(other.value_);
- }
-
- OptionalStorage(OptionalStorage&& other) noexcept(
- std::is_nothrow_move_constructible<T>::value) {
- if (other.is_populated_)
- Init(std::move(other.value_));
- }
-};
-
-template <typename T>
-struct OptionalStorage<T,
- true /* trivially copy constructible */,
- false /* trivially move constructible */>
- : OptionalStorageBase<T> {
- using OptionalStorageBase<T>::is_populated_;
- using OptionalStorageBase<T>::value_;
- using OptionalStorageBase<T>::Init;
- using OptionalStorageBase<T>::OptionalStorageBase;
-
- OptionalStorage() = default;
- OptionalStorage(const OptionalStorage& other) = default;
-
- OptionalStorage(OptionalStorage&& other) noexcept(
- std::is_nothrow_move_constructible<T>::value) {
- if (other.is_populated_)
- Init(std::move(other.value_));
- }
-};
-
-template <typename T>
-struct OptionalStorage<T,
- false /* trivially copy constructible */,
- true /* trivially move constructible */>
- : OptionalStorageBase<T> {
- using OptionalStorageBase<T>::is_populated_;
- using OptionalStorageBase<T>::value_;
- using OptionalStorageBase<T>::Init;
- using OptionalStorageBase<T>::OptionalStorageBase;
-
- OptionalStorage() = default;
- OptionalStorage(OptionalStorage&& other) = default;
-
- OptionalStorage(const OptionalStorage& other) {
- if (other.is_populated_)
- Init(other.value_);
- }
-};
-
-template <typename T>
-struct OptionalStorage<T,
- true /* trivially copy constructible */,
- true /* trivially move constructible */>
- : OptionalStorageBase<T> {
- // If both trivially {copy,move} constructible are true, it is not necessary
- // to use user-defined constructors. So, just inheriting constructors
- // from the base class works.
- using OptionalStorageBase<T>::OptionalStorageBase;
-};
-
-// Base class to support conditionally usable copy-/move- constructors
-// and assign operators.
-template <typename T>
-class OptionalBase {
- // This class provides implementation rather than public API, so everything
- // should be hidden. Often we use composition, but we cannot in this case
- // because of C++ language restriction.
- protected:
- constexpr OptionalBase() = default;
- constexpr OptionalBase(const OptionalBase& other) = default;
- constexpr OptionalBase(OptionalBase&& other) = default;
-
- template <class... Args>
- constexpr explicit OptionalBase(in_place_t, Args&&... args)
- : storage_(in_place, std::forward<Args>(args)...) {}
-
- // Implementation of converting constructors.
- template <typename U>
- explicit OptionalBase(const OptionalBase<U>& other) {
- if (other.storage_.is_populated_)
- storage_.Init(other.storage_.value_);
- }
-
- template <typename U>
- explicit OptionalBase(OptionalBase<U>&& other) {
- if (other.storage_.is_populated_)
- storage_.Init(std::move(other.storage_.value_));
- }
-
- ~OptionalBase() = default;
-
- OptionalBase& operator=(const OptionalBase& other) {
- CopyAssign(other);
- return *this;
- }
-
- OptionalBase& operator=(OptionalBase&& other) noexcept(
- std::is_nothrow_move_assignable<T>::value&&
- std::is_nothrow_move_constructible<T>::value) {
- MoveAssign(std::move(other));
- return *this;
- }
-
- template <typename U>
- void CopyAssign(const OptionalBase<U>& other) {
- if (other.storage_.is_populated_)
- InitOrAssign(other.storage_.value_);
- else
- FreeIfNeeded();
- }
-
- template <typename U>
- void MoveAssign(OptionalBase<U>&& other) {
- if (other.storage_.is_populated_)
- InitOrAssign(std::move(other.storage_.value_));
- else
- FreeIfNeeded();
- }
-
- template <typename U>
- void InitOrAssign(U&& value) {
- if (storage_.is_populated_)
- storage_.value_ = std::forward<U>(value);
- else
- storage_.Init(std::forward<U>(value));
- }
-
- void FreeIfNeeded() {
- if (!storage_.is_populated_)
- return;
- storage_.value_.~T();
- storage_.is_populated_ = false;
- }
-
- // For implementing conversion, allow access to other typed OptionalBase
- // class.
- template <typename U>
- friend class OptionalBase;
-
- OptionalStorage<T> storage_;
-};
-
-// The following {Copy,Move}{Constructible,Assignable} structs are helpers to
-// implement constructor/assign-operator overloading. Specifically, if T is
-// is not movable but copyable, Optional<T>'s move constructor should not
-// participate in overload resolution. This inheritance trick implements that.
-template <bool is_copy_constructible>
-struct CopyConstructible {};
-
-template <>
-struct CopyConstructible<false> {
- constexpr CopyConstructible() = default;
- constexpr CopyConstructible(const CopyConstructible&) = delete;
- constexpr CopyConstructible(CopyConstructible&&) = default;
- CopyConstructible& operator=(const CopyConstructible&) = default;
- CopyConstructible& operator=(CopyConstructible&&) = default;
-};
-
-template <bool is_move_constructible>
-struct MoveConstructible {};
-
-template <>
-struct MoveConstructible<false> {
- constexpr MoveConstructible() = default;
- constexpr MoveConstructible(const MoveConstructible&) = default;
- constexpr MoveConstructible(MoveConstructible&&) = delete;
- MoveConstructible& operator=(const MoveConstructible&) = default;
- MoveConstructible& operator=(MoveConstructible&&) = default;
-};
-
-template <bool is_copy_assignable>
-struct CopyAssignable {};
-
-template <>
-struct CopyAssignable<false> {
- constexpr CopyAssignable() = default;
- constexpr CopyAssignable(const CopyAssignable&) = default;
- constexpr CopyAssignable(CopyAssignable&&) = default;
- CopyAssignable& operator=(const CopyAssignable&) = delete;
- CopyAssignable& operator=(CopyAssignable&&) = default;
-};
-
-template <bool is_move_assignable>
-struct MoveAssignable {};
-
-template <>
-struct MoveAssignable<false> {
- constexpr MoveAssignable() = default;
- constexpr MoveAssignable(const MoveAssignable&) = default;
- constexpr MoveAssignable(MoveAssignable&&) = default;
- MoveAssignable& operator=(const MoveAssignable&) = default;
- MoveAssignable& operator=(MoveAssignable&&) = delete;
-};
-
-// Helper to conditionally enable converting constructors and assign operators.
-template <typename T, typename U>
-using IsConvertibleFromOptional =
- disjunction<std::is_constructible<T, Optional<U>&>,
- std::is_constructible<T, const Optional<U>&>,
- std::is_constructible<T, Optional<U>&&>,
- std::is_constructible<T, const Optional<U>&&>,
- std::is_convertible<Optional<U>&, T>,
- std::is_convertible<const Optional<U>&, T>,
- std::is_convertible<Optional<U>&&, T>,
- std::is_convertible<const Optional<U>&&, T>>;
-
-template <typename T, typename U>
-using IsAssignableFromOptional =
- disjunction<IsConvertibleFromOptional<T, U>,
- std::is_assignable<T&, Optional<U>&>,
- std::is_assignable<T&, const Optional<U>&>,
- std::is_assignable<T&, Optional<U>&&>,
- std::is_assignable<T&, const Optional<U>&&>>;
-
-// Forward compatibility for C++17.
-// Introduce one more deeper nested namespace to avoid leaking using std::swap.
-namespace swappable_impl {
-using std::swap;
-
-struct IsSwappableImpl {
- // Tests if swap can be called. Check<T&>(0) returns true_type iff swap
- // is available for T. Otherwise, Check's overload resolution falls back
- // to Check(...) declared below thanks to SFINAE, so returns false_type.
- template <typename T>
- static auto Check(int)
- -> decltype(swap(std::declval<T>(), std::declval<T>()), std::true_type());
-
- template <typename T>
- static std::false_type Check(...);
-};
-} // namespace swappable_impl
-
-template <typename T>
-struct IsSwappable : decltype(swappable_impl::IsSwappableImpl::Check<T&>(0)) {};
-
-// Forward compatibility for C++20.
-template <typename T>
-using RemoveCvRefT = std::remove_cv_t<std::remove_reference_t<T>>;
-
-} // namespace internal
-
-// On Windows, by default, empty-base class optimization does not work,
-// which means even if the base class is empty struct, it still consumes one
-// byte for its body. __declspec(empty_bases) enables the optimization.
-// cf)
-// https://blogs.msdn.microsoft.com/vcblog/2016/03/30/optimizing-the-layout-of-empty-base-classes-in-vs2015-update-2-3/
-#ifdef OS_WIN
-#define OPTIONAL_DECLSPEC_EMPTY_BASES __declspec(empty_bases)
-#else
-#define OPTIONAL_DECLSPEC_EMPTY_BASES
-#endif
-
-// gurl_base::Optional is a Chromium version of the C++17 optional class:
-// std::optional documentation:
-// http://en.cppreference.com/w/cpp/utility/optional
-// Chromium documentation:
-// https://chromium.googlesource.com/chromium/src/+/master/docs/optional.md
-//
-// These are the differences between the specification and the implementation:
-// - Constructors do not use 'constexpr' as it is a C++14 extension.
-// - 'constexpr' might be missing in some places for reasons specified locally.
-// - No exceptions are thrown, because they are banned from Chromium.
-// Marked noexcept for only move constructor and move assign operators.
-// - All the non-members are in the 'base' namespace instead of 'std'.
-//
-// Note that T cannot have a constructor T(Optional<T>) etc. Optional<T> checks
-// T's constructor (specifically via IsConvertibleFromOptional), and in the
-// check whether T can be constructible from Optional<T>, which is recursive
-// so it does not work. As of Feb 2018, std::optional C++17 implementation in
-// both clang and gcc has same limitation. MSVC SFINAE looks to have different
-// behavior, but anyway it reports an error, too.
-template <typename T>
-class OPTIONAL_DECLSPEC_EMPTY_BASES Optional
- : public internal::OptionalBase<T>,
- public internal::CopyConstructible<std::is_copy_constructible<T>::value>,
- public internal::MoveConstructible<std::is_move_constructible<T>::value>,
- public internal::CopyAssignable<std::is_copy_constructible<T>::value &&
- std::is_copy_assignable<T>::value>,
- public internal::MoveAssignable<std::is_move_constructible<T>::value &&
- std::is_move_assignable<T>::value> {
- private:
- // Disable some versions of T that are ill-formed.
- // See: https://timsong-cpp.github.io/cppwp/n4659/optional#syn-1
- static_assert(
- !std::is_same<internal::RemoveCvRefT<T>, in_place_t>::value,
- "instantiation of gurl_base::Optional with in_place_t is ill-formed");
- static_assert(!std::is_same<internal::RemoveCvRefT<T>, nullopt_t>::value,
- "instantiation of gurl_base::Optional with nullopt_t is ill-formed");
- static_assert(
- !std::is_reference<T>::value,
- "instantiation of gurl_base::Optional with a reference type is ill-formed");
- // See: https://timsong-cpp.github.io/cppwp/n4659/optional#optional-3
- static_assert(std::is_destructible<T>::value,
- "instantiation of gurl_base::Optional with a non-destructible type "
- "is ill-formed");
- // Arrays are explicitly disallowed because for arrays of known bound
- // is_destructible is of undefined value.
- // See: https://en.cppreference.com/w/cpp/types/is_destructible
- static_assert(
- !std::is_array<T>::value,
- "instantiation of gurl_base::Optional with an array type is ill-formed");
-
- public:
-#undef OPTIONAL_DECLSPEC_EMPTY_BASES
- using value_type = T;
-
- // Defer default/copy/move constructor implementation to OptionalBase.
- constexpr Optional() = default;
- constexpr Optional(const Optional& other) = default;
- constexpr Optional(Optional&& other) noexcept(
- std::is_nothrow_move_constructible<T>::value) = default;
-
- constexpr Optional(nullopt_t) {} // NOLINT(runtime/explicit)
-
- // Converting copy constructor. "explicit" only if
- // std::is_convertible<const U&, T>::value is false. It is implemented by
- // declaring two almost same constructors, but that condition in enable_if_t
- // is different, so that either one is chosen, thanks to SFINAE.
- template <
- typename U,
- std::enable_if_t<std::is_constructible<T, const U&>::value &&
- !internal::IsConvertibleFromOptional<T, U>::value &&
- std::is_convertible<const U&, T>::value,
- bool> = false>
- Optional(const Optional<U>& other) : internal::OptionalBase<T>(other) {}
-
- template <
- typename U,
- std::enable_if_t<std::is_constructible<T, const U&>::value &&
- !internal::IsConvertibleFromOptional<T, U>::value &&
- !std::is_convertible<const U&, T>::value,
- bool> = false>
- explicit Optional(const Optional<U>& other)
- : internal::OptionalBase<T>(other) {}
-
- // Converting move constructor. Similar to converting copy constructor,
- // declaring two (explicit and non-explicit) constructors.
- template <
- typename U,
- std::enable_if_t<std::is_constructible<T, U&&>::value &&
- !internal::IsConvertibleFromOptional<T, U>::value &&
- std::is_convertible<U&&, T>::value,
- bool> = false>
- Optional(Optional<U>&& other) : internal::OptionalBase<T>(std::move(other)) {}
-
- template <
- typename U,
- std::enable_if_t<std::is_constructible<T, U&&>::value &&
- !internal::IsConvertibleFromOptional<T, U>::value &&
- !std::is_convertible<U&&, T>::value,
- bool> = false>
- explicit Optional(Optional<U>&& other)
- : internal::OptionalBase<T>(std::move(other)) {}
-
- template <class... Args>
- constexpr explicit Optional(in_place_t, Args&&... args)
- : internal::OptionalBase<T>(in_place, std::forward<Args>(args)...) {}
-
- template <
- class U,
- class... Args,
- class = std::enable_if_t<std::is_constructible<value_type,
- std::initializer_list<U>&,
- Args...>::value>>
- constexpr explicit Optional(in_place_t,
- std::initializer_list<U> il,
- Args&&... args)
- : internal::OptionalBase<T>(in_place, il, std::forward<Args>(args)...) {}
-
- // Forward value constructor. Similar to converting constructors,
- // conditionally explicit.
- template <
- typename U = value_type,
- std::enable_if_t<
- std::is_constructible<T, U&&>::value &&
- !std::is_same<internal::RemoveCvRefT<U>, in_place_t>::value &&
- !std::is_same<internal::RemoveCvRefT<U>, Optional<T>>::value &&
- std::is_convertible<U&&, T>::value,
- bool> = false>
- constexpr Optional(U&& value)
- : internal::OptionalBase<T>(in_place, std::forward<U>(value)) {}
-
- template <
- typename U = value_type,
- std::enable_if_t<
- std::is_constructible<T, U&&>::value &&
- !std::is_same<internal::RemoveCvRefT<U>, in_place_t>::value &&
- !std::is_same<internal::RemoveCvRefT<U>, Optional<T>>::value &&
- !std::is_convertible<U&&, T>::value,
- bool> = false>
- constexpr explicit Optional(U&& value)
- : internal::OptionalBase<T>(in_place, std::forward<U>(value)) {}
-
- ~Optional() = default;
-
- // Defer copy-/move- assign operator implementation to OptionalBase.
- Optional& operator=(const Optional& other) = default;
- Optional& operator=(Optional&& other) noexcept(
- std::is_nothrow_move_assignable<T>::value&&
- std::is_nothrow_move_constructible<T>::value) = default;
-
- Optional& operator=(nullopt_t) {
- FreeIfNeeded();
- return *this;
- }
-
- // Perfect-forwarded assignment.
- template <typename U>
- std::enable_if_t<
- !std::is_same<internal::RemoveCvRefT<U>, Optional<T>>::value &&
- std::is_constructible<T, U>::value &&
- std::is_assignable<T&, U>::value &&
- (!std::is_scalar<T>::value ||
- !std::is_same<std::decay_t<U>, T>::value),
- Optional&>
- operator=(U&& value) {
- InitOrAssign(std::forward<U>(value));
- return *this;
- }
-
- // Copy assign the state of other.
- template <typename U>
- std::enable_if_t<!internal::IsAssignableFromOptional<T, U>::value &&
- std::is_constructible<T, const U&>::value &&
- std::is_assignable<T&, const U&>::value,
- Optional&>
- operator=(const Optional<U>& other) {
- CopyAssign(other);
- return *this;
- }
-
- // Move assign the state of other.
- template <typename U>
- std::enable_if_t<!internal::IsAssignableFromOptional<T, U>::value &&
- std::is_constructible<T, U>::value &&
- std::is_assignable<T&, U>::value,
- Optional&>
- operator=(Optional<U>&& other) {
- MoveAssign(std::move(other));
- return *this;
- }
-
- constexpr const T* operator->() const {
- GURL_CHECK(storage_.is_populated_);
- return std::addressof(storage_.value_);
- }
-
- constexpr T* operator->() {
- GURL_CHECK(storage_.is_populated_);
- return std::addressof(storage_.value_);
- }
-
- constexpr const T& operator*() const & {
- GURL_CHECK(storage_.is_populated_);
- return storage_.value_;
- }
-
- constexpr T& operator*() & {
- GURL_CHECK(storage_.is_populated_);
- return storage_.value_;
- }
-
- constexpr const T&& operator*() const && {
- GURL_CHECK(storage_.is_populated_);
- return std::move(storage_.value_);
- }
-
- constexpr T&& operator*() && {
- GURL_CHECK(storage_.is_populated_);
- return std::move(storage_.value_);
- }
-
- constexpr explicit operator bool() const { return storage_.is_populated_; }
-
- constexpr bool has_value() const { return storage_.is_populated_; }
-
- constexpr T& value() & {
- GURL_CHECK(storage_.is_populated_);
- return storage_.value_;
- }
-
- constexpr const T& value() const & {
- GURL_CHECK(storage_.is_populated_);
- return storage_.value_;
- }
-
- constexpr T&& value() && {
- GURL_CHECK(storage_.is_populated_);
- return std::move(storage_.value_);
- }
-
- constexpr const T&& value() const && {
- GURL_CHECK(storage_.is_populated_);
- return std::move(storage_.value_);
- }
-
- template <class U>
- constexpr T value_or(U&& default_value) const& {
- // TODO(mlamouri): add the following assert when possible:
- // static_assert(std::is_copy_constructible<T>::value,
- // "T must be copy constructible");
- static_assert(std::is_convertible<U, T>::value,
- "U must be convertible to T");
- return storage_.is_populated_
- ? storage_.value_
- : static_cast<T>(std::forward<U>(default_value));
- }
-
- template <class U>
- constexpr T value_or(U&& default_value) && {
- // TODO(mlamouri): add the following assert when possible:
- // static_assert(std::is_move_constructible<T>::value,
- // "T must be move constructible");
- static_assert(std::is_convertible<U, T>::value,
- "U must be convertible to T");
- return storage_.is_populated_
- ? std::move(storage_.value_)
- : static_cast<T>(std::forward<U>(default_value));
- }
-
- void swap(Optional& other) {
- if (!storage_.is_populated_ && !other.storage_.is_populated_)
- return;
-
- if (storage_.is_populated_ != other.storage_.is_populated_) {
- if (storage_.is_populated_) {
- other.storage_.Init(std::move(storage_.value_));
- FreeIfNeeded();
- } else {
- storage_.Init(std::move(other.storage_.value_));
- other.FreeIfNeeded();
- }
- return;
- }
-
- GURL_DCHECK(storage_.is_populated_ && other.storage_.is_populated_);
- using std::swap;
- swap(**this, *other);
- }
-
- void reset() { FreeIfNeeded(); }
-
- template <class... Args>
- T& emplace(Args&&... args) {
- FreeIfNeeded();
- storage_.Init(std::forward<Args>(args)...);
- return storage_.value_;
- }
-
- template <class U, class... Args>
- std::enable_if_t<
- std::is_constructible<T, std::initializer_list<U>&, Args&&...>::value,
- T&>
- emplace(std::initializer_list<U> il, Args&&... args) {
- FreeIfNeeded();
- storage_.Init(il, std::forward<Args>(args)...);
- return storage_.value_;
- }
-
- private:
- // Accessing template base class's protected member needs explicit
- // declaration to do so.
- using internal::OptionalBase<T>::CopyAssign;
- using internal::OptionalBase<T>::FreeIfNeeded;
- using internal::OptionalBase<T>::InitOrAssign;
- using internal::OptionalBase<T>::MoveAssign;
- using internal::OptionalBase<T>::storage_;
-};
-
-// Here after defines comparation operators. The definition follows
-// http://en.cppreference.com/w/cpp/utility/optional/operator_cmp
-// while bool() casting is replaced by has_value() to meet the chromium
-// style guide.
-template <class T, class U>
-constexpr bool operator==(const Optional<T>& lhs, const Optional<U>& rhs) {
- if (lhs.has_value() != rhs.has_value())
- return false;
- if (!lhs.has_value())
- return true;
- return *lhs == *rhs;
-}
-
-template <class T, class U>
-constexpr bool operator!=(const Optional<T>& lhs, const Optional<U>& rhs) {
- if (lhs.has_value() != rhs.has_value())
- return true;
- if (!lhs.has_value())
- return false;
- return *lhs != *rhs;
-}
-
-template <class T, class U>
-constexpr bool operator<(const Optional<T>& lhs, const Optional<U>& rhs) {
- if (!rhs.has_value())
- return false;
- if (!lhs.has_value())
- return true;
- return *lhs < *rhs;
-}
-
-template <class T, class U>
-constexpr bool operator<=(const Optional<T>& lhs, const Optional<U>& rhs) {
- if (!lhs.has_value())
- return true;
- if (!rhs.has_value())
- return false;
- return *lhs <= *rhs;
-}
-
-template <class T, class U>
-constexpr bool operator>(const Optional<T>& lhs, const Optional<U>& rhs) {
- if (!lhs.has_value())
- return false;
- if (!rhs.has_value())
- return true;
- return *lhs > *rhs;
-}
-
-template <class T, class U>
-constexpr bool operator>=(const Optional<T>& lhs, const Optional<U>& rhs) {
- if (!rhs.has_value())
- return true;
- if (!lhs.has_value())
- return false;
- return *lhs >= *rhs;
-}
-
-template <class T>
-constexpr bool operator==(const Optional<T>& opt, nullopt_t) {
- return !opt;
-}
-
-template <class T>
-constexpr bool operator==(nullopt_t, const Optional<T>& opt) {
- return !opt;
-}
-
-template <class T>
-constexpr bool operator!=(const Optional<T>& opt, nullopt_t) {
- return opt.has_value();
-}
-
-template <class T>
-constexpr bool operator!=(nullopt_t, const Optional<T>& opt) {
- return opt.has_value();
-}
-
-template <class T>
-constexpr bool operator<(const Optional<T>& opt, nullopt_t) {
- return false;
-}
-
-template <class T>
-constexpr bool operator<(nullopt_t, const Optional<T>& opt) {
- return opt.has_value();
-}
-
-template <class T>
-constexpr bool operator<=(const Optional<T>& opt, nullopt_t) {
- return !opt;
-}
-
-template <class T>
-constexpr bool operator<=(nullopt_t, const Optional<T>& opt) {
- return true;
-}
-
-template <class T>
-constexpr bool operator>(const Optional<T>& opt, nullopt_t) {
- return opt.has_value();
-}
-
-template <class T>
-constexpr bool operator>(nullopt_t, const Optional<T>& opt) {
- return false;
-}
-
-template <class T>
-constexpr bool operator>=(const Optional<T>& opt, nullopt_t) {
- return true;
-}
-
-template <class T>
-constexpr bool operator>=(nullopt_t, const Optional<T>& opt) {
- return !opt;
-}
-
-template <class T, class U>
-constexpr bool operator==(const Optional<T>& opt, const U& value) {
- return opt.has_value() ? *opt == value : false;
-}
-
-template <class T, class U>
-constexpr bool operator==(const U& value, const Optional<T>& opt) {
- return opt.has_value() ? value == *opt : false;
-}
-
-template <class T, class U>
-constexpr bool operator!=(const Optional<T>& opt, const U& value) {
- return opt.has_value() ? *opt != value : true;
-}
-
-template <class T, class U>
-constexpr bool operator!=(const U& value, const Optional<T>& opt) {
- return opt.has_value() ? value != *opt : true;
-}
-
-template <class T, class U>
-constexpr bool operator<(const Optional<T>& opt, const U& value) {
- return opt.has_value() ? *opt < value : true;
-}
-
-template <class T, class U>
-constexpr bool operator<(const U& value, const Optional<T>& opt) {
- return opt.has_value() ? value < *opt : false;
-}
-
-template <class T, class U>
-constexpr bool operator<=(const Optional<T>& opt, const U& value) {
- return opt.has_value() ? *opt <= value : true;
-}
-
-template <class T, class U>
-constexpr bool operator<=(const U& value, const Optional<T>& opt) {
- return opt.has_value() ? value <= *opt : false;
-}
-
-template <class T, class U>
-constexpr bool operator>(const Optional<T>& opt, const U& value) {
- return opt.has_value() ? *opt > value : false;
-}
-
-template <class T, class U>
-constexpr bool operator>(const U& value, const Optional<T>& opt) {
- return opt.has_value() ? value > *opt : true;
-}
-
-template <class T, class U>
-constexpr bool operator>=(const Optional<T>& opt, const U& value) {
- return opt.has_value() ? *opt >= value : false;
-}
-
-template <class T, class U>
-constexpr bool operator>=(const U& value, const Optional<T>& opt) {
- return opt.has_value() ? value >= *opt : true;
-}
-
-template <class T>
-constexpr Optional<std::decay_t<T>> make_optional(T&& value) {
- return Optional<std::decay_t<T>>(std::forward<T>(value));
-}
-
-template <class T, class... Args>
-constexpr Optional<T> make_optional(Args&&... args) {
- return Optional<T>(in_place, std::forward<Args>(args)...);
-}
-
-template <class T, class U, class... Args>
-constexpr Optional<T> make_optional(std::initializer_list<U> il,
- Args&&... args) {
- return Optional<T>(in_place, il, std::forward<Args>(args)...);
-}
-
-// Partial specialization for a function template is not allowed. Also, it is
-// not allowed to add overload function to std namespace, while it is allowed
-// to specialize the template in std. Thus, swap() (kind of) overloading is
-// defined in base namespace, instead.
-template <class T>
-std::enable_if_t<std::is_move_constructible<T>::value &&
- internal::IsSwappable<T>::value>
-swap(Optional<T>& lhs, Optional<T>& rhs) {
- lhs.swap(rhs);
-}
-
-} // namespace base
-
-namespace std {
-
-template <class T>
-struct hash<gurl_base::Optional<T>> {
- size_t operator()(const gurl_base::Optional<T>& opt) const {
- return opt == gurl_base::nullopt ? 0 : std::hash<T>()(*opt);
- }
-};
-
-} // namespace std
-
-#endif // BASE_OPTIONAL_H_
diff --git a/base/ranges/algorithm.h b/base/ranges/algorithm.h
index e6432f8..b405d2f 100644
--- a/base/ranges/algorithm.h
+++ b/base/ranges/algorithm.h
@@ -924,6 +924,17 @@
Pred pred = {},
Proj1 proj1 = {},
Proj2 proj2 = {}) {
+ if (gurl_base::is_constant_evaluated()) {
+ for (; first1 != last1 && first2 != last2; ++first1, ++first2) {
+ if (!gurl_base::invoke(pred, gurl_base::invoke(proj1, *first1),
+ gurl_base::invoke(proj2, *first2))) {
+ return false;
+ }
+ }
+
+ return first1 == last1 && first2 == last2;
+ }
+
return std::equal(first1, last1, first2, last2,
internal::ProjectedBinaryPredicate(pred, proj1, proj2));
}
diff --git a/base/stl_util.h b/base/stl_util.h
index 29f200e..609b71a 100644
--- a/base/stl_util.h
+++ b/base/stl_util.h
@@ -10,13 +10,12 @@
#include <algorithm>
#include <deque>
#include <forward_list>
-#include <functional>
-#include <initializer_list>
#include <iterator>
#include <list>
#include <map>
#include <set>
#include <string>
+#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
@@ -24,10 +23,9 @@
#include <vector>
#include "polyfills/base/check.h"
-#include "base/containers/contains.h"
-#include "base/optional.h"
+#include "base/cxx17_backports.h"
#include "base/ranges/algorithm.h"
-#include "base/template_util.h"
+#include "absl/types/optional.h"
namespace gurl_base {
@@ -54,93 +52,6 @@
} // namespace internal
-// C++14 implementation of C++17's std::size():
-// http://en.cppreference.com/w/cpp/iterator/size
-template <typename Container>
-constexpr auto size(const Container& c) -> decltype(c.size()) {
- return c.size();
-}
-
-template <typename T, size_t N>
-constexpr size_t size(const T (&array)[N]) noexcept {
- return N;
-}
-
-// C++14 implementation of C++17's std::empty():
-// http://en.cppreference.com/w/cpp/iterator/empty
-template <typename Container>
-constexpr auto empty(const Container& c) -> decltype(c.empty()) {
- return c.empty();
-}
-
-template <typename T, size_t N>
-constexpr bool empty(const T (&array)[N]) noexcept {
- return false;
-}
-
-template <typename T>
-constexpr bool empty(std::initializer_list<T> il) noexcept {
- return il.size() == 0;
-}
-
-// C++14 implementation of C++17's std::data():
-// http://en.cppreference.com/w/cpp/iterator/data
-template <typename Container>
-constexpr auto data(Container& c) -> decltype(c.data()) {
- return c.data();
-}
-
-// std::basic_string::data() had no mutable overload prior to C++17 [1].
-// Hence this overload is provided.
-// Note: str[0] is safe even for empty strings, as they are guaranteed to be
-// null-terminated [2].
-//
-// [1] http://en.cppreference.com/w/cpp/string/basic_string/data
-// [2] http://en.cppreference.com/w/cpp/string/basic_string/operator_at
-template <typename CharT, typename Traits, typename Allocator>
-CharT* data(std::basic_string<CharT, Traits, Allocator>& str) {
- return std::addressof(str[0]);
-}
-
-template <typename Container>
-constexpr auto data(const Container& c) -> decltype(c.data()) {
- return c.data();
-}
-
-template <typename T, size_t N>
-constexpr T* data(T (&array)[N]) noexcept {
- return array;
-}
-
-template <typename T>
-constexpr const T* data(std::initializer_list<T> il) noexcept {
- return il.begin();
-}
-
-// std::array::data() was not constexpr prior to C++17 [1].
-// Hence these overloads are provided.
-//
-// [1] https://en.cppreference.com/w/cpp/container/array/data
-template <typename T, size_t N>
-constexpr T* data(std::array<T, N>& array) noexcept {
- return !array.empty() ? &array[0] : nullptr;
-}
-
-template <typename T, size_t N>
-constexpr const T* data(const std::array<T, N>& array) noexcept {
- return !array.empty() ? &array[0] : nullptr;
-}
-
-// C++14 implementation of C++17's std::as_const():
-// https://en.cppreference.com/w/cpp/utility/as_const
-template <typename T>
-constexpr std::add_const_t<T>& as_const(T& t) noexcept {
- return t;
-}
-
-template <typename T>
-void as_const(const T&& t) = delete;
-
// Simplified C++14 implementation of C++20's std::to_address.
// Note: This does not consider specializations of pointer_traits<>::to_address,
// since that member function may only be present in C++20 and later.
@@ -158,6 +69,17 @@
return to_address(p.operator->());
}
+// Implementation of C++23's std::to_underlying.
+//
+// Note: This has an additional `std::is_enum<EnumT>` requirement to be SFINAE
+// friendly prior to C++20.
+//
+// Reference: https://en.cppreference.com/w/cpp/utility/to_underlying
+template <typename EnumT, typename = std::enable_if_t<std::is_enum<EnumT>{}>>
+constexpr std::underlying_type_t<EnumT> to_underlying(EnumT e) noexcept {
+ return static_cast<std::underlying_type_t<EnumT>>(e);
+}
+
// Returns a const reference to the underlying container of a container adapter.
// Works for std::priority_queue, std::queue, and std::stack.
template <class A>
@@ -618,21 +540,21 @@
// Helper for returning the optional value's address, or nullptr.
template <class T>
-T* OptionalOrNullptr(gurl_base::Optional<T>& optional) {
+T* OptionalOrNullptr(absl::optional<T>& optional) {
return optional.has_value() ? &optional.value() : nullptr;
}
template <class T>
-const T* OptionalOrNullptr(const gurl_base::Optional<T>& optional) {
+const T* OptionalOrNullptr(const absl::optional<T>& optional) {
return optional.has_value() ? &optional.value() : nullptr;
}
-// Helper for creating an Optional<T> from a potentially nullptr T*.
+// Helper for creating an optional<T> from a potentially nullptr T*.
template <class T>
-gurl_base::Optional<T> OptionalFromPtr(const T* value) {
+absl::optional<T> OptionalFromPtr(const T* value) {
if (value)
- return gurl_base::Optional<T>(*value);
- return gurl_base::nullopt;
+ return absl::optional<T>(*value);
+ return absl::nullopt;
}
} // namespace base
diff --git a/base/strings/BUILD b/base/strings/BUILD
index a1346bd..257f8f9 100644
--- a/base/strings/BUILD
+++ b/base/strings/BUILD
@@ -15,7 +15,6 @@
],
hdrs = [
"char_traits.h",
- "string16.h",
"string_piece_forward.h",
"string_piece.h",
"string_util.h",
diff --git a/base/strings/abseil_string_conversions.cc b/base/strings/abseil_string_conversions.cc
index e7c746e..5915b53 100644
--- a/base/strings/abseil_string_conversions.cc
+++ b/base/strings/abseil_string_conversions.cc
@@ -9,7 +9,7 @@
#include "base/containers/span.h"
#include "base/ranges/algorithm.h"
#include "base/strings/string_piece.h"
-#include "third_party/abseil-cpp/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
namespace gurl_base {
diff --git a/base/strings/abseil_string_conversions.h b/base/strings/abseil_string_conversions.h
index c821d93..1cee63d 100644
--- a/base/strings/abseil_string_conversions.h
+++ b/base/strings/abseil_string_conversions.h
@@ -10,7 +10,7 @@
#include "polyfills/base/base_export.h"
#include "base/containers/span.h"
#include "base/strings/string_piece.h"
-#include "third_party/abseil-cpp/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
namespace gurl_base {
diff --git a/base/strings/abseil_string_conversions_unittest.cc b/base/strings/abseil_string_conversions_unittest.cc
index e5f70f0..6c8816d 100644
--- a/base/strings/abseil_string_conversions_unittest.cc
+++ b/base/strings/abseil_string_conversions_unittest.cc
@@ -10,7 +10,7 @@
#include "base/strings/string_piece.h"
#include "base/strings/string_piece_forward.h"
#include "testing/gtest/include/gtest/gtest.h"
-#include "third_party/abseil-cpp/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
namespace gurl_base {
diff --git a/base/strings/char_traits.h b/base/strings/char_traits.h
index 13f5833..fe01c53 100644
--- a/base/strings/char_traits.h
+++ b/base/strings/char_traits.h
@@ -29,6 +29,10 @@
// Returns the length of |s|, assuming null termination (and not including the
// terminating null).
static constexpr size_t length(const T* s) noexcept;
+
+ // Searches for character |c| within the first |n| characters of the sequence
+ // pointed to by |s|.
+ static constexpr const T* find(const T* s, size_t n, T c);
};
template <typename T>
@@ -56,6 +60,15 @@
return i;
}
+template <typename T>
+constexpr const T* CharTraits<T>::find(const T* s, size_t n, T c) {
+ for (; n; --n, ++s) {
+ if (std::char_traits<T>::eq(*s, c))
+ return s;
+ }
+ return nullptr;
+}
+
// char and wchar_t specialization of CharTraits that can use clang's constexpr
// instrinsics, where available.
#if HAS_FEATURE(cxx_constexpr_string_builtins)
@@ -70,6 +83,10 @@
static constexpr size_t length(const char* s) noexcept {
return __builtin_strlen(s);
}
+
+ static constexpr const char* find(const char* s, size_t n, char c) {
+ return __builtin_char_memchr(s, c, n);
+ }
};
template <>
@@ -83,6 +100,10 @@
static constexpr size_t length(const wchar_t* s) noexcept {
return __builtin_wcslen(s);
}
+
+ static constexpr const wchar_t* find(const wchar_t* s, size_t n, wchar_t c) {
+ return __builtin_wmemchr(s, c, n);
+ }
};
#endif
diff --git a/base/strings/char_traits_unittest.cc b/base/strings/char_traits_unittest.cc
index d0fdc07..d735d4a 100644
--- a/base/strings/char_traits_unittest.cc
+++ b/base/strings/char_traits_unittest.cc
@@ -3,7 +3,6 @@
// found in the LICENSE file.
#include "base/strings/char_traits.h"
-#include "base/strings/string16.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace gurl_base {
diff --git a/base/strings/escape.cc b/base/strings/escape.cc
index cf46fca..ff6f6d8 100644
--- a/base/strings/escape.cc
+++ b/base/strings/escape.cc
@@ -4,6 +4,7 @@
#include "base/strings/escape.h"
+#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/third_party/icu/icu_utf.h"
@@ -262,7 +263,7 @@
adjustments->clear();
// Do not unescape anything, return the |escaped_text| text.
if (rules == UnescapeRule::NONE)
- return escaped_text.as_string();
+ return std::string(escaped_text);
// The output of the unescaping is always smaller than the input, so we can
// reserve the input size to make sure we have enough buffer and don't have
@@ -335,11 +336,11 @@
return UnescapeURLWithAdjustmentsImpl(escaped_text, rules, nullptr);
}
-string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
+std::u16string UnescapeAndDecodeUTF8URLComponentWithAdjustments(
StringPiece text,
UnescapeRule::Type rules,
OffsetAdjuster::Adjustments* adjustments) {
- string16 result;
+ std::u16string result;
OffsetAdjuster::Adjustments unescape_adjustments;
std::string unescaped_url(
UnescapeURLWithAdjustmentsImpl(text, rules, &unescape_adjustments));
diff --git a/base/strings/escape.h b/base/strings/escape.h
index 0bb6aea..96ce110 100644
--- a/base/strings/escape.h
+++ b/base/strings/escape.h
@@ -11,7 +11,6 @@
#include <string>
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_offset_string_conversions.h"
@@ -78,10 +77,10 @@
// Unescapes the given substring as a URL, and then tries to interpret the
// result as being encoded as UTF-8. If the result is convertible into UTF-8, it
// will be returned as converted. If it is not, the original escaped string will
-// be converted into a string16 and returned. |adjustments| provides
+// be converted into a std::u16string and returned. |adjustments| provides
// information on how the original string was adjusted to get the string
// returned.
-BASE_EXPORT string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
+BASE_EXPORT std::u16string UnescapeAndDecodeUTF8URLComponentWithAdjustments(
StringPiece text,
UnescapeRule::Type rules,
OffsetAdjuster::Adjustments* adjustments);
diff --git a/base/strings/escape_unittest.cc b/base/strings/escape_unittest.cc
index e6c0b1a..b8a5fd6 100644
--- a/base/strings/escape_unittest.cc
+++ b/base/strings/escape_unittest.cc
@@ -8,7 +8,6 @@
#include "base/strings/escape.h"
#include "base/strings/string_util.h"
-#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -250,7 +249,7 @@
// The adjustments argument is covered by the next test.
//
// TODO: Need to test unescape_spaces and unescape_percent.
- string16 decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments(
+ std::u16string decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments(
unescape_case.input, UnescapeRule::NORMAL, nullptr);
EXPECT_EQ(WideToUTF16(unescape_case.decoded), decoded);
}
diff --git a/base/strings/latin1_string_conversions.cc b/base/strings/latin1_string_conversions.cc
index 5569015..e4b4020 100644
--- a/base/strings/latin1_string_conversions.cc
+++ b/base/strings/latin1_string_conversions.cc
@@ -6,14 +6,14 @@
namespace gurl_base {
-string16 Latin1OrUTF16ToUTF16(size_t length,
- const Latin1Char* latin1,
- const char16* utf16) {
+std::u16string Latin1OrUTF16ToUTF16(size_t length,
+ const Latin1Char* latin1,
+ const char16_t* utf16) {
if (!length)
- return string16();
+ return std::u16string();
if (latin1)
- return string16(latin1, latin1 + length);
- return string16(utf16, utf16 + length);
+ return std::u16string(latin1, latin1 + length);
+ return std::u16string(utf16, utf16 + length);
}
} // namespace base
diff --git a/base/strings/latin1_string_conversions.h b/base/strings/latin1_string_conversions.h
index 3d60980..7b67073 100644
--- a/base/strings/latin1_string_conversions.h
+++ b/base/strings/latin1_string_conversions.h
@@ -10,7 +10,6 @@
#include <string>
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
namespace gurl_base {
@@ -20,14 +19,15 @@
typedef unsigned char Latin1Char;
// This somewhat odd function is designed to help us convert from Blink Strings
-// to string16. A Blink string is either backed by an array of Latin-1
+// to std::u16string. A Blink string is either backed by an array of Latin-1
// characters or an array of UTF-16 characters. This function is called by
-// WebString::operator string16() to convert one or the other character array
-// to string16. This function is defined here rather than in WebString.h to
-// avoid binary bloat in all the callers of the conversion operator.
-BASE_EXPORT string16 Latin1OrUTF16ToUTF16(size_t length,
- const Latin1Char* latin1,
- const char16* utf16);
+// WebString::operator std::u16string() to convert one or the other character
+// array to std::u16string. This function is defined here rather than in
+// WebString.h to avoid binary bloat in all the callers of the conversion
+// operator.
+BASE_EXPORT std::u16string Latin1OrUTF16ToUTF16(size_t length,
+ const Latin1Char* latin1,
+ const char16_t* utf16);
} // namespace base
diff --git a/base/strings/nullable_string16.cc b/base/strings/nullable_string16.cc
deleted file mode 100644
index 618800d..0000000
--- a/base/strings/nullable_string16.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/strings/nullable_string16.h"
-
-#include <ostream>
-#include <utility>
-
-namespace gurl_base {
-NullableString16::NullableString16() = default;
-NullableString16::NullableString16(const NullableString16& other) = default;
-NullableString16::NullableString16(NullableString16&& other) = default;
-
-NullableString16::NullableString16(const string16& string, bool is_null) {
- if (!is_null)
- string_.emplace(string);
-}
-
-NullableString16::NullableString16(Optional<string16> optional_string16)
- : string_(std::move(optional_string16)) {}
-
-NullableString16::~NullableString16() = default;
-NullableString16& NullableString16::operator=(const NullableString16& other) =
- default;
-NullableString16& NullableString16::operator=(NullableString16&& other) =
- default;
-
-std::ostream& operator<<(std::ostream& out, const NullableString16& value) {
- return value.is_null() ? out << "(null)" : out << value.string();
-}
-
-} // namespace base
diff --git a/base/strings/nullable_string16.h b/base/strings/nullable_string16.h
deleted file mode 100644
index f2ca7bd..0000000
--- a/base/strings/nullable_string16.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef BASE_STRINGS_NULLABLE_STRING16_H_
-#define BASE_STRINGS_NULLABLE_STRING16_H_
-
-#include <iosfwd>
-
-#include "polyfills/base/base_export.h"
-#include "base/optional.h"
-#include "base/strings/string16.h"
-#include "base/strings/string_util.h"
-
-namespace gurl_base {
-
-// This class is a simple wrapper for string16 which also contains a null
-// state. This should be used only where the difference between null and
-// empty is meaningful.
-class BASE_EXPORT NullableString16 {
- public:
- NullableString16();
- NullableString16(const NullableString16& other);
- NullableString16(NullableString16&& other);
- NullableString16(const string16& string, bool is_null);
- explicit NullableString16(Optional<string16> optional_string16);
- ~NullableString16();
-
- NullableString16& operator=(const NullableString16& other);
- NullableString16& operator=(NullableString16&& other);
-
- const string16& string() const {
- return string_ ? *string_ : EmptyString16();
- }
- bool is_null() const { return !string_; }
- const Optional<string16>& as_optional_string16() const { return string_; }
-
- private:
- Optional<string16> string_;
-};
-
-inline bool operator==(const NullableString16& a, const NullableString16& b) {
- return a.as_optional_string16() == b.as_optional_string16();
-}
-
-inline bool operator!=(const NullableString16& a, const NullableString16& b) {
- return !(a == b);
-}
-
-BASE_EXPORT std::ostream& operator<<(std::ostream& out,
- const NullableString16& value);
-
-} // namespace base
-
-#endif // BASE_STRINGS_NULLABLE_STRING16_H_
diff --git a/base/strings/nullable_string16_unittest.cc b/base/strings/nullable_string16_unittest.cc
deleted file mode 100644
index e3d063f..0000000
--- a/base/strings/nullable_string16_unittest.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/strings/nullable_string16.h"
-#include "base/strings/utf_string_conversions.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-namespace gurl_base {
-
-TEST(NullableString16Test, DefaultConstructor) {
- NullableString16 s;
- EXPECT_TRUE(s.is_null());
- EXPECT_EQ(string16(), s.string());
-}
-
-TEST(NullableString16Test, Equals) {
- NullableString16 a(ASCIIToUTF16("hello"), false);
- NullableString16 b(ASCIIToUTF16("hello"), false);
- EXPECT_EQ(a, b);
-}
-
-TEST(NullableString16Test, NotEquals) {
- NullableString16 a(ASCIIToUTF16("hello"), false);
- NullableString16 b(ASCIIToUTF16("world"), false);
- EXPECT_NE(a, b);
-}
-
-TEST(NullableString16Test, NotEqualsNull) {
- NullableString16 a(ASCIIToUTF16("hello"), false);
- NullableString16 b;
- EXPECT_NE(a, b);
-}
-
-} // namespace base
diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc
index 65ec075..d7c9a47 100644
--- a/base/strings/pattern.cc
+++ b/base/strings/pattern.cc
@@ -131,7 +131,7 @@
};
struct NextCharUTF16 {
- base_icu::UChar32 operator()(const char16** p, const char16* end) {
+ base_icu::UChar32 operator()(const char16_t** p, const char16_t* end) {
base_icu::UChar32 c;
int offset = 0;
CBU16_NEXT(*p, offset, end - *p, c);
diff --git a/base/strings/pattern_unittest.cc b/base/strings/pattern_unittest.cc
index 540f784..20383e8 100644
--- a/base/strings/pattern_unittest.cc
+++ b/base/strings/pattern_unittest.cc
@@ -37,10 +37,8 @@
EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
// Test UTF16 character matching.
- EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
- UTF8ToUTF16("*.com")));
- EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
- UTF8ToUTF16("He??o\\*1*")));
+ EXPECT_TRUE(MatchPattern(u"www.google.com", u"*.com"));
+ EXPECT_TRUE(MatchPattern(u"Hello*1234", u"He??o\\*1*"));
// Some test cases that might cause naive implementations to exhibit
// exponential run time or fail.
diff --git a/base/strings/strcat.cc b/base/strings/strcat.cc
index 6b007c7..c6b8faf 100644
--- a/base/strings/strcat.cc
+++ b/base/strings/strcat.cc
@@ -14,7 +14,7 @@
return internal::StrCatT(pieces);
}
-string16 StrCat(span<const StringPiece16> pieces) {
+std::u16string StrCat(span<const StringPiece16> pieces) {
return internal::StrCatT(pieces);
}
@@ -22,24 +22,24 @@
return internal::StrCatT(pieces);
}
-string16 StrCat(span<const string16> pieces) {
+std::u16string StrCat(span<const std::u16string> pieces) {
return internal::StrCatT(pieces);
}
void StrAppend(std::string* dest, span<const StringPiece> pieces) {
- internal::StrAppendT(dest, pieces);
+ internal::StrAppendT(*dest, pieces);
}
-void StrAppend(string16* dest, span<const StringPiece16> pieces) {
- internal::StrAppendT(dest, pieces);
+void StrAppend(std::u16string* dest, span<const StringPiece16> pieces) {
+ internal::StrAppendT(*dest, pieces);
}
void StrAppend(std::string* dest, span<const std::string> pieces) {
- internal::StrAppendT(dest, pieces);
+ internal::StrAppendT(*dest, pieces);
}
-void StrAppend(string16* dest, span<const string16> pieces) {
- internal::StrAppendT(dest, pieces);
+void StrAppend(std::u16string* dest, span<const std::u16string> pieces) {
+ internal::StrAppendT(*dest, pieces);
}
} // namespace base
diff --git a/base/strings/strcat.h b/base/strings/strcat.h
index 2d85304..fe35447 100644
--- a/base/strings/strcat.h
+++ b/base/strings/strcat.h
@@ -61,18 +61,19 @@
BASE_EXPORT std::string StrCat(span<const StringPiece> pieces)
WARN_UNUSED_RESULT;
-BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces)
+BASE_EXPORT std::u16string StrCat(span<const StringPiece16> pieces)
WARN_UNUSED_RESULT;
BASE_EXPORT std::string StrCat(span<const std::string> pieces)
WARN_UNUSED_RESULT;
-BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT;
+BASE_EXPORT std::u16string StrCat(span<const std::u16string> pieces)
+ WARN_UNUSED_RESULT;
// Initializer list forwards to the array version.
inline std::string StrCat(std::initializer_list<StringPiece> pieces) {
return StrCat(make_span(pieces));
}
-inline string16 StrCat(std::initializer_list<StringPiece16> pieces) {
+inline std::u16string StrCat(std::initializer_list<StringPiece16> pieces) {
return StrCat(make_span(pieces));
}
@@ -85,9 +86,11 @@
// because it avoids a temporary string allocation and copy.
BASE_EXPORT void StrAppend(std::string* dest, span<const StringPiece> pieces);
-BASE_EXPORT void StrAppend(string16* dest, span<const StringPiece16> pieces);
+BASE_EXPORT void StrAppend(std::u16string* dest,
+ span<const StringPiece16> pieces);
BASE_EXPORT void StrAppend(std::string* dest, span<const std::string> pieces);
-BASE_EXPORT void StrAppend(string16* dest, span<const string16> pieces);
+BASE_EXPORT void StrAppend(std::u16string* dest,
+ span<const std::u16string> pieces);
// Initializer list forwards to the array version.
inline void StrAppend(std::string* dest,
@@ -95,7 +98,7 @@
StrAppend(dest, make_span(pieces));
}
-inline void StrAppend(string16* dest,
+inline void StrAppend(std::u16string* dest,
std::initializer_list<StringPiece16> pieces) {
StrAppend(dest, make_span(pieces));
}
diff --git a/base/strings/strcat_internal.h b/base/strings/strcat_internal.h
index 24387d6..8011946 100644
--- a/base/strings/strcat_internal.h
+++ b/base/strings/strcat_internal.h
@@ -8,48 +8,64 @@
#include <string>
#include "base/containers/span.h"
+#include "base/template_util.h"
namespace gurl_base {
namespace internal {
-// Reserves an additional amount of capacity in the given string, growing by at
-// least 2x if necessary. Used by StrAppendT().
-//
-// The "at least 2x" growing rule duplicates the exponential growth of
-// std::string. The problem is that most implementations of reserve() will grow
-// exactly to the requested amount instead of exponentially growing like would
-// happen when appending normally. If we didn't do this, an append after the
-// call to StrAppend() would definitely cause a reallocation, and loops with
-// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want
-// StrAppend() to have the same semantics as std::string::append().
-template <typename String>
-void ReserveAdditionalIfNeeded(String* str,
- typename String::size_type additional) {
- const size_t required = str->size() + additional;
- // Check whether we need to reserve additional capacity at all.
- if (required <= str->capacity())
- return;
-
- str->reserve(std::max(required, str->capacity() * 2));
+// Optimized version of `std::basic_string::resize()` that skips zero
+// initialization of appended characters. Reading from the newly allocated
+// characters results in undefined behavior if they are not explicitly
+// initialized afterwards. Currently proposed for standardization as
+// std::basic_string::resize_and_overwrite: https://wg21.link/P1072R6
+template <typename CharT>
+auto Resize(std::basic_string<CharT>& str, size_t total_size, priority_tag<1>)
+ -> decltype(str.__resize_default_init(total_size)) {
+ str.__resize_default_init(total_size);
}
-template <typename DestString, typename InputString>
-void StrAppendT(DestString* dest, span<const InputString> pieces) {
- size_t additional_size = 0;
- for (const auto& cur : pieces)
- additional_size += cur.size();
- ReserveAdditionalIfNeeded(dest, additional_size);
+// Fallback to regular std::basic_string::resize() if invoking
+// __resize_default_init is ill-formed.
+template <typename CharT>
+void Resize(std::basic_string<CharT>& str, size_t total_size, priority_tag<0>) {
+ str.resize(total_size);
+}
+// Appends `pieces` to `dest`. Instead of simply calling `dest.append()`
+// `pieces.size()` times, this method first resizes `dest` to be of the desired
+// size, and then appends each piece via `std::char_traits::copy`. This achieves
+// two goals:
+// 1) Allocating the desired size all at once avoids other allocations that
+// could happen if intermediate allocations did not reserve enough capacity.
+// 2) Invoking std::char_traits::copy instead of std::basic_string::append
+// avoids having to write the terminating '\0' character n times.
+template <typename CharT, typename StringT>
+void StrAppendT(std::basic_string<CharT>& dest, span<const StringT> pieces) {
+ const size_t initial_size = dest.size();
+ size_t total_size = initial_size;
for (const auto& cur : pieces)
- dest->append(cur.data(), cur.size());
+ total_size += cur.size();
+
+ // Note: As opposed to `reserve()` calling `resize()` with an argument smaller
+ // than the current `capacity()` does not result in the string releasing spare
+ // capacity. Furthermore, common std::string implementations apply a geometric
+ // growth strategy if the current capacity is not sufficient for the newly
+ // added characters. Since this codepath is also triggered by `resize()`, we
+ // don't have to manage the std::string's capacity ourselves here to avoid
+ // performance hits in case `StrAppend()` gets called in a loop.
+ Resize(dest, total_size, priority_tag<1>());
+ CharT* dest_char = &dest[initial_size];
+ for (const auto& cur : pieces) {
+ std::char_traits<CharT>::copy(dest_char, cur.data(), cur.size());
+ dest_char += cur.size();
+ }
}
template <typename StringT>
auto StrCatT(span<const StringT> pieces) {
- std::basic_string<typename StringT::value_type, typename StringT::traits_type>
- result;
- StrAppendT(&result, pieces);
+ std::basic_string<typename StringT::value_type> result;
+ StrAppendT(result, pieces);
return result;
}
diff --git a/base/strings/strcat_unittest.cc b/base/strings/strcat_unittest.cc
index 9374c39..d6a68d3 100644
--- a/base/strings/strcat_unittest.cc
+++ b/base/strings/strcat_unittest.cc
@@ -18,14 +18,14 @@
}
TEST(StrCat, 16Bit) {
- string16 arg1 = ASCIIToUTF16("1");
- string16 arg2 = ASCIIToUTF16("22");
- string16 arg3 = ASCIIToUTF16("333");
+ std::u16string arg1 = u"1";
+ std::u16string arg2 = u"22";
+ std::u16string arg3 = u"333";
- EXPECT_EQ(ASCIIToUTF16(""), StrCat({string16()}));
- EXPECT_EQ(ASCIIToUTF16("1"), StrCat({arg1}));
- EXPECT_EQ(ASCIIToUTF16("122"), StrCat({arg1, arg2}));
- EXPECT_EQ(ASCIIToUTF16("122333"), StrCat({arg1, arg2, arg3}));
+ EXPECT_EQ(u"", StrCat({std::u16string()}));
+ EXPECT_EQ(u"1", StrCat({arg1}));
+ EXPECT_EQ(u"122", StrCat({arg1, arg2}));
+ EXPECT_EQ(u"122333", StrCat({arg1, arg2, arg3}));
}
TEST(StrAppend, 8Bit) {
@@ -45,23 +45,23 @@
}
TEST(StrAppend, 16Bit) {
- string16 arg1 = ASCIIToUTF16("1");
- string16 arg2 = ASCIIToUTF16("22");
- string16 arg3 = ASCIIToUTF16("333");
+ std::u16string arg1 = u"1";
+ std::u16string arg2 = u"22";
+ std::u16string arg3 = u"333";
- string16 result;
+ std::u16string result;
- result = ASCIIToUTF16("foo");
- StrAppend(&result, {string16()});
- EXPECT_EQ(ASCIIToUTF16("foo"), result);
+ result = u"foo";
+ StrAppend(&result, {std::u16string()});
+ EXPECT_EQ(u"foo", result);
- result = ASCIIToUTF16("foo");
+ result = u"foo";
StrAppend(&result, {arg1});
- EXPECT_EQ(ASCIIToUTF16("foo1"), result);
+ EXPECT_EQ(u"foo1", result);
- result = ASCIIToUTF16("foo");
+ result = u"foo";
StrAppend(&result, {arg1, arg2, arg3});
- EXPECT_EQ(ASCIIToUTF16("foo122333"), result);
+ EXPECT_EQ(u"foo122333", result);
}
TEST(StrAppendT, ReserveAdditionalIfNeeded) {
diff --git a/base/strings/strcat_win.cc b/base/strings/strcat_win.cc
index 60b22bc..affc99e 100644
--- a/base/strings/strcat_win.cc
+++ b/base/strings/strcat_win.cc
@@ -12,8 +12,6 @@
namespace gurl_base {
-#if defined(BASE_STRING16_IS_STD_U16STRING)
-
std::wstring StrCat(span<const WStringPiece> pieces) {
return internal::StrCatT(pieces);
}
@@ -23,13 +21,11 @@
}
void StrAppend(std::wstring* dest, span<const WStringPiece> pieces) {
- internal::StrAppendT(dest, pieces);
+ internal::StrAppendT(*dest, pieces);
}
void StrAppend(std::wstring* dest, span<const std::wstring> pieces) {
- internal::StrAppendT(dest, pieces);
+ internal::StrAppendT(*dest, pieces);
}
-#endif
-
} // namespace base
diff --git a/base/strings/strcat_win.h b/base/strings/strcat_win.h
index 68f8a58..70926bc 100644
--- a/base/strings/strcat_win.h
+++ b/base/strings/strcat_win.h
@@ -16,12 +16,7 @@
namespace gurl_base {
// The following section contains overloads of the cross-platform APIs for
-// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring
-// and gurl_base::string16 are distinct types, as otherwise this would result in an
-// ODR violation.
-// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is
-// std::u16string.
-#if defined(BASE_STRING16_IS_STD_U16STRING)
+// std::wstring and gurl_base::WStringPiece.
BASE_EXPORT void StrAppend(std::wstring* dest, span<const WStringPiece> pieces);
BASE_EXPORT void StrAppend(std::wstring* dest, span<const std::wstring> pieces);
@@ -38,7 +33,6 @@
inline std::wstring StrCat(std::initializer_list<WStringPiece> pieces) {
return StrCat(make_span(pieces));
}
-#endif // defined(BASE_STRING16_IS_STD_U16STRING)
} // namespace base
diff --git a/base/strings/string16.h b/base/strings/string16.h
deleted file mode 100644
index dc3ddc7..0000000
--- a/base/strings/string16.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef BASE_STRINGS_STRING16_H_
-#define BASE_STRINGS_STRING16_H_
-
-// WHAT:
-// Type aliases for string and character types supporting UTF-16 data. Prior to
-// C++11 there was no standard library solution for this, which is why wstring
-// was used where possible (i.e. where wchar_t holds UTF-16 encoded data).
-//
-// In C++11 we gained std::u16string, which is a cross-platform solution for
-// UTF-16 strings. This is now the string16 type where ever wchar_t does not
-// hold UTF16 data (i.e. commonly non-Windows platforms). Eventually this should
-// be used everywhere, at which point this type alias and this file should be
-// removed. https://crbug.com/911896 tracks the migration effort.
-
-#include <string>
-
-#include "build/build_config.h"
-
-#if defined(WCHAR_T_IS_UTF16)
-
-// Define a macro for wrapping construction of char16 arrays and string16s from
-// a literal string. This indirection allows for an easier migration of
-// gurl_base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one
-// character change to the macro will be necessary.
-// TODO(https://crbug.com/911896): Remove this macro once gurl_base::char16 is
-// char16_t on all platforms.
-#define STRING16_LITERAL(x) L##x
-
-namespace gurl_base {
-using char16 = wchar_t;
-using string16 = std::wstring;
-} // namespace base
-
-#else
-
-#define STRING16_LITERAL(x) u##x
-
-namespace gurl_base {
-using char16 = char16_t;
-using string16 = std::u16string;
-} // namespace base
-
-#endif // WCHAR_T_IS_UTF16
-
-#endif // BASE_STRINGS_STRING16_H_
diff --git a/base/strings/string16_unittest.cc b/base/strings/string16_unittest.cc
deleted file mode 100644
index 9cdb075..0000000
--- a/base/strings/string16_unittest.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/strings/string16.h"
-
-#include "testing/gtest/include/gtest/gtest.h"
-
-namespace gurl_base {
-
-// Ensure that STRING16_LITERAL can be used to instantiate constants of type
-// char16 and char16[], respectively.
-TEST(String16Test, String16Literal) {
- static constexpr char16 kHelloChars[] = {
- STRING16_LITERAL('H'), STRING16_LITERAL('e'), STRING16_LITERAL('l'),
- STRING16_LITERAL('l'), STRING16_LITERAL('o'), STRING16_LITERAL('\0'),
- };
-
- static constexpr char16 kHelloStr[] = STRING16_LITERAL("Hello");
- EXPECT_EQ(std::char_traits<char16>::compare(kHelloChars, kHelloStr, 6), 0);
-}
-
-} // namespace base
diff --git a/base/strings/string_number_conversions.cc b/base/strings/string_number_conversions.cc
index d1886b1..b0eeafd 100644
--- a/base/strings/string_number_conversions.cc
+++ b/base/strings/string_number_conversions.cc
@@ -9,7 +9,6 @@
#include "base/containers/span.h"
#include "polyfills/base/logging.h"
-#include "base/strings/string16.h"
#include "base/strings/string_number_conversions_internal.h"
#include "base/strings/string_piece.h"
@@ -19,56 +18,56 @@
return internal::IntToStringT<std::string>(value);
}
-string16 NumberToString16(int value) {
- return internal::IntToStringT<string16>(value);
+std::u16string NumberToString16(int value) {
+ return internal::IntToStringT<std::u16string>(value);
}
std::string NumberToString(unsigned value) {
return internal::IntToStringT<std::string>(value);
}
-string16 NumberToString16(unsigned value) {
- return internal::IntToStringT<string16>(value);
+std::u16string NumberToString16(unsigned value) {
+ return internal::IntToStringT<std::u16string>(value);
}
std::string NumberToString(long value) {
return internal::IntToStringT<std::string>(value);
}
-string16 NumberToString16(long value) {
- return internal::IntToStringT<string16>(value);
+std::u16string NumberToString16(long value) {
+ return internal::IntToStringT<std::u16string>(value);
}
std::string NumberToString(unsigned long value) {
return internal::IntToStringT<std::string>(value);
}
-string16 NumberToString16(unsigned long value) {
- return internal::IntToStringT<string16>(value);
+std::u16string NumberToString16(unsigned long value) {
+ return internal::IntToStringT<std::u16string>(value);
}
std::string NumberToString(long long value) {
return internal::IntToStringT<std::string>(value);
}
-string16 NumberToString16(long long value) {
- return internal::IntToStringT<string16>(value);
+std::u16string NumberToString16(long long value) {
+ return internal::IntToStringT<std::u16string>(value);
}
std::string NumberToString(unsigned long long value) {
return internal::IntToStringT<std::string>(value);
}
-string16 NumberToString16(unsigned long long value) {
- return internal::IntToStringT<string16>(value);
+std::u16string NumberToString16(unsigned long long value) {
+ return internal::IntToStringT<std::u16string>(value);
}
std::string NumberToString(double value) {
return internal::DoubleToStringT<std::string>(value);
}
-string16 NumberToString16(double value) {
- return internal::DoubleToStringT<string16>(value);
+std::u16string NumberToString16(double value) {
+ return internal::DoubleToStringT<std::u16string>(value);
}
bool StringToInt(StringPiece input, int* output) {
diff --git a/base/strings/string_number_conversions.h b/base/strings/string_number_conversions.h
index 7bb51b6..d2f8af3 100644
--- a/base/strings/string_number_conversions.h
+++ b/base/strings/string_number_conversions.h
@@ -13,7 +13,6 @@
#include "polyfills/base/base_export.h"
#include "base/containers/span.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
@@ -39,19 +38,19 @@
// Ignores locale! see warning above.
BASE_EXPORT std::string NumberToString(int value);
-BASE_EXPORT string16 NumberToString16(int value);
+BASE_EXPORT std::u16string NumberToString16(int value);
BASE_EXPORT std::string NumberToString(unsigned int value);
-BASE_EXPORT string16 NumberToString16(unsigned int value);
+BASE_EXPORT std::u16string NumberToString16(unsigned int value);
BASE_EXPORT std::string NumberToString(long value);
-BASE_EXPORT string16 NumberToString16(long value);
+BASE_EXPORT std::u16string NumberToString16(long value);
BASE_EXPORT std::string NumberToString(unsigned long value);
-BASE_EXPORT string16 NumberToString16(unsigned long value);
+BASE_EXPORT std::u16string NumberToString16(unsigned long value);
BASE_EXPORT std::string NumberToString(long long value);
-BASE_EXPORT string16 NumberToString16(long long value);
+BASE_EXPORT std::u16string NumberToString16(long long value);
BASE_EXPORT std::string NumberToString(unsigned long long value);
-BASE_EXPORT string16 NumberToString16(unsigned long long value);
+BASE_EXPORT std::u16string NumberToString16(unsigned long long value);
BASE_EXPORT std::string NumberToString(double value);
-BASE_EXPORT string16 NumberToString16(double value);
+BASE_EXPORT std::u16string NumberToString16(double value);
// String -> number conversions ------------------------------------------------
diff --git a/base/strings/string_number_conversions_fuzzer.cc b/base/strings/string_number_conversions_fuzzer.cc
index 012887a..e6e5c6f 100644
--- a/base/strings/string_number_conversions_fuzzer.cc
+++ b/base/strings/string_number_conversions_fuzzer.cc
@@ -44,7 +44,7 @@
const size_t size,
bool (*string_to_num)(gurl_base::StringPiece16,
NumberType*)) {
- return CheckRoundtripsT<NumberType, gurl_base::StringPiece16, gurl_base::string16>(
+ return CheckRoundtripsT<NumberType, gurl_base::StringPiece16, std::u16string>(
data, size, &gurl_base::NumberToString16, string_to_num);
}
@@ -81,7 +81,7 @@
// Test for StringPiece16 if size is even.
if (size % 2 == 0) {
gurl_base::StringPiece16 string_piece_input16(
- reinterpret_cast<const gurl_base::char16*>(data), size / 2);
+ reinterpret_cast<const char16_t*>(data), size / 2);
gurl_base::StringToInt(string_piece_input16, &out_int);
gurl_base::StringToUint(string_piece_input16, &out_uint);
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h
index 4f917f8..8223b59 100644
--- a/base/strings/string_number_conversions_internal.h
+++ b/base/strings/string_number_conversions_internal.h
@@ -18,6 +18,7 @@
#include "base/numerics/safe_math.h"
#include "base/strings/string_util.h"
#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
+#include "absl/types/optional.h"
namespace gurl_base {
@@ -58,7 +59,7 @@
// Utility to convert a character to a digit in a given base
template <int BASE, typename CHAR>
-Optional<uint8_t> CharToDigit(CHAR c) {
+absl::optional<uint8_t> CharToDigit(CHAR c) {
static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]");
if (c >= '0' && c < '0' + std::min(BASE, 10))
return c - '0';
@@ -69,7 +70,7 @@
if (c >= 'A' && c < 'A' + BASE - 10)
return c - 'A' + 10;
- return gurl_base::nullopt;
+ return absl::nullopt;
}
// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
@@ -88,9 +89,9 @@
};
template <>
-class WhitespaceHelper<char16> {
+class WhitespaceHelper<char16_t> {
public:
- static bool Invoke(char16 c) { return 0 != iswspace(c); }
+ static bool Invoke(char16_t c) { return 0 != iswspace(c); }
};
template <typename CHAR>
@@ -133,7 +134,7 @@
}
for (Iter current = begin; current != end; ++current) {
- Optional<uint8_t> new_digit = CharToDigit<kBase>(*current);
+ absl::optional<uint8_t> new_digit = CharToDigit<kBase>(*current);
if (!new_digit) {
return {value, false};
@@ -179,8 +180,8 @@
};
};
-template <typename Number, int kBase, typename Str>
-auto StringToNumber(BasicStringPiece<Str> input) {
+template <typename Number, int kBase, typename CharT>
+auto StringToNumber(BasicStringPiece<CharT> input) {
using Parser = StringToNumberParser<Number, kBase>;
using Result = typename Parser::Result;
@@ -212,16 +213,16 @@
return result;
}
-template <typename STR, typename VALUE>
-bool StringToIntImpl(BasicStringPiece<STR> input, VALUE& output) {
- auto result = StringToNumber<VALUE, 10>(input);
+template <typename T, typename VALUE, typename CharT = typename T::value_type>
+bool StringToIntImpl(T input, VALUE& output) {
+ auto result = StringToNumber<VALUE, 10, CharT>(input);
output = result.value;
return result.valid;
}
-template <typename STR, typename VALUE>
-bool HexStringToIntImpl(BasicStringPiece<STR> input, VALUE& output) {
- auto result = StringToNumber<VALUE, 16>(input);
+template <typename T, typename VALUE, typename CharT = typename T::value_type>
+bool HexStringToIntImpl(T input, VALUE& output) {
+ auto result = StringToNumber<VALUE, 16, CharT>(input);
output = result.value;
return result.valid;
}
@@ -285,9 +286,9 @@
return false;
for (uintptr_t i = 0; i < count / 2; ++i) {
// most significant 4 bits
- Optional<uint8_t> msb = CharToDigit<16>(input[i * 2]);
+ absl::optional<uint8_t> msb = CharToDigit<16>(input[i * 2]);
// least significant 4 bits
- Optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]);
+ absl::optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]);
if (!msb || !lsb) {
return false;
}
diff --git a/base/strings/string_number_conversions_unittest.cc b/base/strings/string_number_conversions_unittest.cc
index 62a31f6..f836316 100644
--- a/base/strings/string_number_conversions_unittest.cc
+++ b/base/strings/string_number_conversions_unittest.cc
@@ -145,7 +145,7 @@
EXPECT_EQ(i.success, StringToInt(i.input, &output));
EXPECT_EQ(i.output, output);
- string16 utf16_input = UTF8ToUTF16(i.input);
+ std::u16string utf16_input = UTF8ToUTF16(i.input);
output = i.output ^ 1; // Ensure StringToInt wrote something.
EXPECT_EQ(i.success, StringToInt(utf16_input, &output));
EXPECT_EQ(i.output, output);
@@ -160,14 +160,14 @@
EXPECT_FALSE(StringToInt(input_string, &output));
EXPECT_EQ(6, output);
- string16 utf16_input = UTF8ToUTF16(input_string);
+ std::u16string utf16_input = UTF8ToUTF16(input_string);
output = 0;
EXPECT_FALSE(StringToInt(utf16_input, &output));
EXPECT_EQ(6, output);
output = 0;
- const char16 negative_wide_input[] = {0xFF4D, '4', '2', 0};
- EXPECT_FALSE(StringToInt(string16(negative_wide_input), &output));
+ const char16_t negative_wide_input[] = {0xFF4D, '4', '2', 0};
+ EXPECT_FALSE(StringToInt(std::u16string(negative_wide_input), &output));
EXPECT_EQ(0, output);
}
@@ -211,7 +211,7 @@
EXPECT_EQ(i.success, StringToUint(i.input, &output));
EXPECT_EQ(i.output, output);
- string16 utf16_input = UTF8ToUTF16(i.input);
+ std::u16string utf16_input = UTF8ToUTF16(i.input);
output = i.output ^ 1; // Ensure StringToUint wrote something.
EXPECT_EQ(i.success, StringToUint(utf16_input, &output));
EXPECT_EQ(i.output, output);
@@ -226,14 +226,14 @@
EXPECT_FALSE(StringToUint(input_string, &output));
EXPECT_EQ(6U, output);
- string16 utf16_input = UTF8ToUTF16(input_string);
+ std::u16string utf16_input = UTF8ToUTF16(input_string);
output = 0;
EXPECT_FALSE(StringToUint(utf16_input, &output));
EXPECT_EQ(6U, output);
output = 0;
- const char16 negative_wide_input[] = {0xFF4D, '4', '2', 0};
- EXPECT_FALSE(StringToUint(string16(negative_wide_input), &output));
+ const char16_t negative_wide_input[] = {0xFF4D, '4', '2', 0};
+ EXPECT_FALSE(StringToUint(std::u16string(negative_wide_input), &output));
EXPECT_EQ(0U, output);
}
@@ -281,7 +281,7 @@
EXPECT_EQ(i.success, StringToInt64(i.input, &output));
EXPECT_EQ(i.output, output);
- string16 utf16_input = UTF8ToUTF16(i.input);
+ std::u16string utf16_input = UTF8ToUTF16(i.input);
output = 0;
EXPECT_EQ(i.success, StringToInt64(utf16_input, &output));
EXPECT_EQ(i.output, output);
@@ -296,7 +296,7 @@
EXPECT_FALSE(StringToInt64(input_string, &output));
EXPECT_EQ(6, output);
- string16 utf16_input = UTF8ToUTF16(input_string);
+ std::u16string utf16_input = UTF8ToUTF16(input_string);
output = 0;
EXPECT_FALSE(StringToInt64(utf16_input, &output));
EXPECT_EQ(6, output);
@@ -348,7 +348,7 @@
EXPECT_EQ(i.success, StringToUint64(i.input, &output));
EXPECT_EQ(i.output, output);
- string16 utf16_input = UTF8ToUTF16(i.input);
+ std::u16string utf16_input = UTF8ToUTF16(i.input);
output = 0;
EXPECT_EQ(i.success, StringToUint64(utf16_input, &output));
EXPECT_EQ(i.output, output);
@@ -363,7 +363,7 @@
EXPECT_FALSE(StringToUint64(input_string, &output));
EXPECT_EQ(6U, output);
- string16 utf16_input = UTF8ToUTF16(input_string);
+ std::u16string utf16_input = UTF8ToUTF16(input_string);
output = 0;
EXPECT_FALSE(StringToUint64(utf16_input, &output));
EXPECT_EQ(6U, output);
@@ -417,7 +417,7 @@
EXPECT_EQ(i.success, StringToSizeT(i.input, &output));
EXPECT_EQ(i.output, output);
- string16 utf16_input = UTF8ToUTF16(i.input);
+ std::u16string utf16_input = UTF8ToUTF16(i.input);
output = 0;
EXPECT_EQ(i.success, StringToSizeT(utf16_input, &output));
EXPECT_EQ(i.output, output);
@@ -432,7 +432,7 @@
EXPECT_FALSE(StringToSizeT(input_string, &output));
EXPECT_EQ(6U, output);
- string16 utf16_input = UTF8ToUTF16(input_string);
+ std::u16string utf16_input = UTF8ToUTF16(input_string);
output = 0;
EXPECT_FALSE(StringToSizeT(utf16_input, &output));
EXPECT_EQ(6U, output);
diff --git a/base/strings/string_number_conversions_win.cc b/base/strings/string_number_conversions_win.cc
index abae437..381f99d 100644
--- a/base/strings/string_number_conversions_win.cc
+++ b/base/strings/string_number_conversions_win.cc
@@ -39,7 +39,6 @@
return internal::DoubleToStringT<std::wstring>(value);
}
-#if defined(BASE_STRING16_IS_STD_U16STRING)
namespace internal {
template <>
@@ -74,6 +73,5 @@
return internal::StringToDoubleImpl(
input, reinterpret_cast<const uint16_t*>(input.data()), *output);
}
-#endif // defined(BASE_STRING16_IS_STD_U16STRING)
} // namespace base
diff --git a/base/strings/string_number_conversions_win.h b/base/strings/string_number_conversions_win.h
index f8b645f..d63d2d7 100644
--- a/base/strings/string_number_conversions_win.h
+++ b/base/strings/string_number_conversions_win.h
@@ -21,19 +21,13 @@
BASE_EXPORT std::wstring NumberToWString(double value);
// The following section contains overloads of the cross-platform APIs for
-// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring
-// and gurl_base::string16 are distinct types, as otherwise this would result in an
-// ODR violation.
-// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is
-// std::u16string.
-#if defined(BASE_STRING16_IS_STD_U16STRING)
+// std::wstring and gurl_base::WStringPiece.
BASE_EXPORT bool StringToInt(WStringPiece input, int* output);
BASE_EXPORT bool StringToUint(WStringPiece input, unsigned* output);
BASE_EXPORT bool StringToInt64(WStringPiece input, int64_t* output);
BASE_EXPORT bool StringToUint64(WStringPiece input, uint64_t* output);
BASE_EXPORT bool StringToSizeT(WStringPiece input, size_t* output);
BASE_EXPORT bool StringToDouble(WStringPiece input, double* output);
-#endif // defined(BASE_STRING16_IS_STD_U16STRING)
} // namespace base
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc
index fe9be5f..f9b9422 100644
--- a/base/strings/string_piece.cc
+++ b/base/strings/string_piece.cc
@@ -1,15 +1,14 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-// Copied from strings/stringpiece.cc with modifications
#include "base/strings/string_piece.h"
-#include <limits.h>
-#include <string.h>
-
#include <algorithm>
+#include <climits>
+#include <limits>
#include <ostream>
+#include <string>
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
@@ -37,8 +36,9 @@
// MSVC doesn't like complex extern templates and DLLs.
#if !defined(COMPILER_MSVC)
-template class BasicStringPiece<std::string>;
-template class BasicStringPiece<string16>;
+template class BasicStringPiece<char>;
+template class BasicStringPiece<char16_t>;
+template class BasicStringPiece<wchar_t>;
#endif
std::ostream& operator<<(std::ostream& o, StringPiece piece) {
@@ -50,24 +50,22 @@
return o << UTF16ToUTF8(piece);
}
-#if !defined(WCHAR_T_IS_UTF16)
std::ostream& operator<<(std::ostream& o, WStringPiece piece) {
return o << WideToUTF8(piece);
}
-#endif
namespace internal {
-template <typename STR>
-size_t findT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) {
+template <typename T, typename CharT = typename T::value_type>
+size_t findT(T self, T s, size_t pos) {
if (pos > self.size())
- return BasicStringPiece<STR>::npos;
+ return BasicStringPiece<CharT>::npos;
- typename BasicStringPiece<STR>::const_iterator result =
+ typename BasicStringPiece<CharT>::const_iterator result =
std::search(self.begin() + pos, self.end(), s.begin(), s.end());
const size_t xpos =
static_cast<size_t>(result - self.begin());
- return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos;
+ return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<CharT>::npos;
}
size_t find(StringPiece self, StringPiece s, size_t pos) {
@@ -78,41 +76,20 @@
return findT(self, s, pos);
}
-template <typename STR>
-size_t findT(BasicStringPiece<STR> self,
- typename STR::value_type c,
- size_t pos) {
- if (pos >= self.size())
- return BasicStringPiece<STR>::npos;
-
- typename BasicStringPiece<STR>::const_iterator result =
- std::find(self.begin() + pos, self.end(), c);
- return result != self.end() ?
- static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
-}
-
-size_t find(StringPiece self, char c, size_t pos) {
- return findT(self, c, pos);
-}
-
-size_t find(StringPiece16 self, char16 c, size_t pos) {
- return findT(self, c, pos);
-}
-
-template <typename STR>
-size_t rfindT(BasicStringPiece<STR> self, BasicStringPiece<STR> s, size_t pos) {
+template <typename T, typename CharT = typename T::value_type>
+size_t rfindT(T self, T s, size_t pos) {
if (self.size() < s.size())
- return BasicStringPiece<STR>::npos;
+ return BasicStringPiece<CharT>::npos;
if (s.empty())
return std::min(self.size(), pos);
- typename BasicStringPiece<STR>::const_iterator last =
+ typename BasicStringPiece<CharT>::const_iterator last =
self.begin() + std::min(self.size() - s.size(), pos) + s.size();
- typename BasicStringPiece<STR>::const_iterator result =
+ typename BasicStringPiece<CharT>::const_iterator result =
std::find_end(self.begin(), last, s.begin(), s.end());
- return result != last ?
- static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
+ return result != last ? static_cast<size_t>(result - self.begin())
+ : BasicStringPiece<CharT>::npos;
}
size_t rfind(StringPiece self, StringPiece s, size_t pos) {
@@ -123,31 +100,6 @@
return rfindT(self, s, pos);
}
-template <typename STR>
-size_t rfindT(BasicStringPiece<STR> self,
- typename STR::value_type c,
- size_t pos) {
- if (self.size() == 0)
- return BasicStringPiece<STR>::npos;
-
- for (size_t i = std::min(pos, self.size() - 1); ;
- --i) {
- if (self.data()[i] == c)
- return i;
- if (i == 0)
- break;
- }
- return BasicStringPiece<STR>::npos;
-}
-
-size_t rfind(StringPiece self, char c, size_t pos) {
- return rfindT(self, c, pos);
-}
-
-size_t rfind(StringPiece16 self, char16 c, size_t pos) {
- return rfindT(self, c, pos);
-}
-
// 8-bit version using lookup table.
size_t find_first_of(StringPiece self, StringPiece s, size_t pos) {
if (self.size() == 0 || s.size() == 0)
@@ -155,7 +107,7 @@
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
- return find(self, s.data()[0], pos);
+ return self.find(s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
@@ -167,29 +119,34 @@
return StringPiece::npos;
}
-// 16-bit brute force version.
-size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+// Generic brute force version.
+template <typename T, typename CharT = typename T::value_type>
+size_t find_first_ofT(T self, T s, size_t pos) {
// Use the faster std::find() if searching for a single character.
- StringPiece16::const_iterator found =
+ typename BasicStringPiece<CharT>::const_iterator found =
s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0])
: std::find_first_of(self.begin() + pos, self.end(),
s.begin(), s.end());
if (found == self.end())
- return StringPiece16::npos;
+ return BasicStringPiece<CharT>::npos;
return found - self.begin();
}
+size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+ return find_first_ofT(self, s, pos);
+}
+
// 8-bit version using lookup table.
size_t find_first_not_of(StringPiece self, StringPiece s, size_t pos) {
- if (self.size() == 0)
+ if (pos >= self.size())
return StringPiece::npos;
if (s.size() == 0)
- return 0;
+ return pos;
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
- return find_first_not_of(self, s.data()[0], pos);
+ return self.find_first_not_of(s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
@@ -201,12 +158,11 @@
return StringPiece::npos;
}
-// 16-bit brute-force version.
-BASE_EXPORT size_t find_first_not_of(StringPiece16 self,
- StringPiece16 s,
- size_t pos) {
+// Generic brute-force version.
+template <typename T, typename CharT = typename T::value_type>
+size_t find_first_not_ofT(T self, T s, size_t pos) {
if (self.size() == 0)
- return StringPiece16::npos;
+ return BasicStringPiece<CharT>::npos;
for (size_t self_i = pos; self_i < self.size(); ++self_i) {
bool found = false;
@@ -219,30 +175,11 @@
if (!found)
return self_i;
}
- return StringPiece16::npos;
+ return BasicStringPiece<CharT>::npos;
}
-template <typename STR>
-size_t find_first_not_ofT(BasicStringPiece<STR> self,
- typename STR::value_type c,
- size_t pos) {
- if (self.size() == 0)
- return BasicStringPiece<STR>::npos;
-
- for (; pos < self.size(); ++pos) {
- if (self.data()[pos] != c) {
- return pos;
- }
- }
- return BasicStringPiece<STR>::npos;
-}
-
-size_t find_first_not_of(StringPiece self, char c, size_t pos) {
- return find_first_not_ofT(self, c, pos);
-}
-
-size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos) {
- return find_first_not_ofT(self, c, pos);
+size_t find_first_not_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+ return find_first_not_ofT(self, s, pos);
}
// 8-bit version using lookup table.
@@ -252,7 +189,7 @@
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
- return rfind(self, s.data()[0], pos);
+ return self.rfind(s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
@@ -265,10 +202,11 @@
return StringPiece::npos;
}
-// 16-bit brute-force version.
-size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+// Generic brute-force version.
+template <typename T, typename CharT = typename T::value_type>
+size_t find_last_ofT(T self, T s, size_t pos) {
if (self.size() == 0)
- return StringPiece16::npos;
+ return BasicStringPiece<CharT>::npos;
for (size_t self_i = std::min(pos, self.size() - 1); ;
--self_i) {
@@ -279,7 +217,11 @@
if (self_i == 0)
break;
}
- return StringPiece16::npos;
+ return BasicStringPiece<CharT>::npos;
+}
+
+size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+ return find_last_ofT(self, s, pos);
}
// 8-bit version using lookup table.
@@ -293,7 +235,7 @@
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
- return find_last_not_of(self, s.data()[0], pos);
+ return self.find_last_not_of(s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
@@ -306,8 +248,9 @@
return StringPiece::npos;
}
-// 16-bit brute-force version.
-size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+// Generic brute-force version.
+template <typename T, typename CharT = typename T::value_type>
+size_t find_last_not_ofT(T self, T s, size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
@@ -324,32 +267,35 @@
if (self_i == 0)
break;
}
- return StringPiece16::npos;
+ return BasicStringPiece<CharT>::npos;
}
-template <typename STR>
-size_t find_last_not_ofT(BasicStringPiece<STR> self,
- typename STR::value_type c,
- size_t pos) {
- if (self.size() == 0)
- return BasicStringPiece<STR>::npos;
-
- for (size_t i = std::min(pos, self.size() - 1); ; --i) {
- if (self.data()[i] != c)
- return i;
- if (i == 0)
- break;
- }
- return BasicStringPiece<STR>::npos;
+size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) {
+ return find_last_not_ofT(self, s, pos);
}
-size_t find_last_not_of(StringPiece self, char c, size_t pos) {
- return find_last_not_ofT(self, c, pos);
+size_t find(WStringPiece self, WStringPiece s, size_t pos) {
+ return findT(self, s, pos);
}
-size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos) {
- return find_last_not_ofT(self, c, pos);
+size_t rfind(WStringPiece self, WStringPiece s, size_t pos) {
+ return rfindT(self, s, pos);
}
+size_t find_first_of(WStringPiece self, WStringPiece s, size_t pos) {
+ return find_first_ofT(self, s, pos);
+}
+
+size_t find_first_not_of(WStringPiece self, WStringPiece s, size_t pos) {
+ return find_first_not_ofT(self, s, pos);
+}
+
+size_t find_last_of(WStringPiece self, WStringPiece s, size_t pos) {
+ return find_last_ofT(self, s, pos);
+}
+
+size_t find_last_not_of(WStringPiece self, WStringPiece s, size_t pos) {
+ return find_last_not_ofT(self, s, pos);
+}
} // namespace internal
} // namespace base
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index dea85e3..03f5103 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h
@@ -1,7 +1,6 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-// Copied from strings/stringpiece.h with modifications
//
// A string-like object that points to a sized piece of memory.
//
@@ -25,6 +24,7 @@
#include <stddef.h>
#include <iosfwd>
+#include <limits>
#include <ostream>
#include <string>
#include <type_traits>
@@ -32,7 +32,6 @@
#include "polyfills/base/base_export.h"
#include "polyfills/base/check_op.h"
#include "base/strings/char_traits.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece_forward.h"
#include "build/build_config.h"
@@ -51,13 +50,9 @@
BASE_EXPORT size_t find(StringPiece self, StringPiece s, size_t pos);
BASE_EXPORT size_t find(StringPiece16 self, StringPiece16 s, size_t pos);
-BASE_EXPORT size_t find(StringPiece self, char c, size_t pos);
-BASE_EXPORT size_t find(StringPiece16 self, char16 c, size_t pos);
BASE_EXPORT size_t rfind(StringPiece self, StringPiece s, size_t pos);
BASE_EXPORT size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos);
-BASE_EXPORT size_t rfind(StringPiece self, char c, size_t pos);
-BASE_EXPORT size_t rfind(StringPiece16 self, char16 c, size_t pos);
BASE_EXPORT size_t find_first_of(StringPiece self, StringPiece s, size_t pos);
BASE_EXPORT size_t find_first_of(StringPiece16 self,
@@ -70,15 +65,11 @@
BASE_EXPORT size_t find_first_not_of(StringPiece16 self,
StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_first_not_of(StringPiece self, char c, size_t pos);
-BASE_EXPORT size_t find_first_not_of(StringPiece16 self, char16 c, size_t pos);
BASE_EXPORT size_t find_last_of(StringPiece self, StringPiece s, size_t pos);
BASE_EXPORT size_t find_last_of(StringPiece16 self,
StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_last_of(StringPiece self, char c, size_t pos);
-BASE_EXPORT size_t find_last_of(StringPiece16 self, char16 c, size_t pos);
BASE_EXPORT size_t find_last_not_of(StringPiece self,
StringPiece s,
@@ -86,466 +77,539 @@
BASE_EXPORT size_t find_last_not_of(StringPiece16 self,
StringPiece16 s,
size_t pos);
-BASE_EXPORT size_t find_last_not_of(StringPiece16 self, char16 c, size_t pos);
-BASE_EXPORT size_t find_last_not_of(StringPiece self, char c, size_t pos);
+
+BASE_EXPORT size_t find(WStringPiece self, WStringPiece s, size_t pos);
+BASE_EXPORT size_t rfind(WStringPiece self, WStringPiece s, size_t pos);
+BASE_EXPORT size_t find_first_of(WStringPiece self, WStringPiece s, size_t pos);
+BASE_EXPORT size_t find_first_not_of(WStringPiece self,
+ WStringPiece s,
+ size_t pos);
+BASE_EXPORT size_t find_last_of(WStringPiece self, WStringPiece s, size_t pos);
+BASE_EXPORT size_t find_last_not_of(WStringPiece self,
+ WStringPiece s,
+ size_t pos);
} // namespace internal
// BasicStringPiece ------------------------------------------------------------
-// Defines the types, methods, operators, and data members common to both
-// StringPiece and StringPiece16.
-//
-// This is templatized by string class type rather than character type, so
-// BasicStringPiece<std::string> or BasicStringPiece<gurl_base::string16>.
-template <typename STRING_TYPE> class BasicStringPiece {
+// Mirrors the C++17 version of std::basic_string_view<> as closely as possible,
+// except where noted below.
+template <typename CharT, typename Traits>
+class BasicStringPiece {
public:
- // Standard STL container boilerplate.
- typedef size_t size_type;
- typedef typename STRING_TYPE::traits_type traits_type;
- typedef typename STRING_TYPE::value_type value_type;
- typedef const value_type* pointer;
- typedef const value_type& reference;
- typedef const value_type& const_reference;
- typedef ptrdiff_t difference_type;
- typedef const value_type* const_iterator;
- typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+ using traits_type = Traits;
+ using value_type = CharT;
+ using pointer = CharT*;
+ using const_pointer = const CharT*;
+ using reference = CharT&;
+ using const_reference = const CharT&;
+ using const_iterator = const CharT*;
+ using iterator = const_iterator;
+ using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+ using reverse_iterator = const_reverse_iterator;
+ using size_type = size_t;
+ using difference_type = ptrdiff_t;
- static const size_type npos;
-
- public:
- // We provide non-explicit singleton constructors so users can pass
- // in a "const char*" or a "string" wherever a "StringPiece" is
- // expected (likewise for char16, string16, StringPiece16).
- constexpr BasicStringPiece() : ptr_(nullptr), length_(0) {}
- // TODO(crbug.com/1049498): Construction from nullptr is not allowed for
- // std::basic_string_view, so remove the special handling for it.
- // Note: This doesn't just use STRING_TYPE::traits_type::length(), since that
+ constexpr BasicStringPiece() noexcept : ptr_(nullptr), length_(0) {}
+ constexpr BasicStringPiece(const BasicStringPiece& other) noexcept = default;
+ constexpr BasicStringPiece& operator=(const BasicStringPiece& view) noexcept =
+ default;
+ constexpr BasicStringPiece(const CharT* s, size_type count)
+ : ptr_(s), length_(count) {}
+ // Note: This doesn't just use traits_type::length(), since that
// isn't constexpr until C++17.
- constexpr BasicStringPiece(const value_type* str)
- : ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {}
+ constexpr BasicStringPiece(const CharT* s)
+ : ptr_(s), length_(s ? CharTraits<CharT>::length(s) : 0) {
+ // Intentional STL deviation: Null-check instead of UB.
+ GURL_CHECK(s);
+ }
// Explicitly disallow construction from nullptr. Note that this does not
// catch construction from runtime strings that might be null.
// Note: The following is just a more elaborate way of spelling
// `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is
// not supported by the PNaCl toolchain.
- // TODO(crbug.com/1049498): Remove once we GURL_CHECK(str) in the constructor
- // above.
template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>>
BasicStringPiece(T) {
static_assert(sizeof(T) == 0, // Always false.
"StringPiece does not support construction from nullptr, use "
"the default constructor instead.");
}
- BasicStringPiece(const STRING_TYPE& str)
+
+ // These are necessary because std::basic_string provides construction from
+ // (an object convertible to) a std::basic_string_view, as well as an explicit
+ // cast operator to a std::basic_string_view, but (obviously) not from/to a
+ // BasicStringPiece.
+ BasicStringPiece(const std::basic_string<CharT>& str)
: ptr_(str.data()), length_(str.size()) {}
- constexpr BasicStringPiece(const value_type* offset, size_type len)
- : ptr_(offset), length_(len) {}
-
- // data() may return a pointer to a buffer with embedded NULs, and the
- // returned buffer may or may not be null terminated. Therefore it is
- // typically a mistake to pass data() to a routine that expects a NUL
- // terminated string.
- constexpr const value_type* data() const { return ptr_; }
- constexpr size_type size() const noexcept { return length_; }
- constexpr size_type length() const noexcept { return length_; }
- constexpr bool empty() const noexcept { return length_ == 0; }
-
- constexpr value_type operator[](size_type i) const {
- GURL_CHECK(i < length_);
- return ptr_[i];
- }
-
- constexpr value_type front() const {
- GURL_CHECK_NE(0UL, length_);
- return ptr_[0];
- }
-
- constexpr value_type back() const {
- GURL_CHECK_NE(0UL, length_);
- return ptr_[length_ - 1];
- }
-
- constexpr void remove_prefix(size_type n) {
- GURL_CHECK(n <= length_);
- ptr_ += n;
- length_ -= n;
- }
-
- constexpr void remove_suffix(size_type n) {
- GURL_CHECK(n <= length_);
- length_ -= n;
- }
-
- // This is the style of conversion preferred by std::string_view in C++17.
- explicit operator STRING_TYPE() const {
- return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
- }
-
- // Deprecated, use operator STRING_TYPE() instead.
- // TODO(crbug.com/1049498): Remove for all STRING_TYPEs.
- template <typename StrT = STRING_TYPE,
- typename = std::enable_if_t<std::is_same<StrT, std::string>::value>>
- STRING_TYPE as_string() const {
- return STRING_TYPE(*this);
+ explicit operator std::basic_string<CharT>() const {
+ return std::basic_string<CharT>(data(), size());
}
constexpr const_iterator begin() const noexcept { return ptr_; }
+ constexpr const_iterator cbegin() const noexcept { return ptr_; }
constexpr const_iterator end() const noexcept { return ptr_ + length_; }
+ constexpr const_iterator cend() const noexcept { return ptr_ + length_; }
constexpr const_reverse_iterator rbegin() const noexcept {
return const_reverse_iterator(ptr_ + length_);
}
+ constexpr const_reverse_iterator crbegin() const noexcept {
+ return const_reverse_iterator(ptr_ + length_);
+ }
constexpr const_reverse_iterator rend() const noexcept {
return const_reverse_iterator(ptr_);
}
+ constexpr const_reverse_iterator crend() const noexcept {
+ return const_reverse_iterator(ptr_);
+ }
- size_type max_size() const { return length_; }
- size_type capacity() const { return length_; }
+ constexpr const_reference operator[](size_type pos) const {
+ // Intentional STL deviation: Bounds-check instead of UB.
+ return at(pos);
+ }
+ constexpr const_reference at(size_type pos) const {
+ GURL_CHECK_LT(pos, size());
+ return data()[pos];
+ }
- // String operations, see https://wg21.link/string.view.ops.
- constexpr size_type copy(value_type* s,
- size_type n,
+ constexpr const_reference front() const { return operator[](0); }
+
+ constexpr const_reference back() const { return operator[](size() - 1); }
+
+ constexpr const_pointer data() const noexcept { return ptr_; }
+
+ constexpr size_type size() const noexcept { return length_; }
+ constexpr size_type length() const noexcept { return length_; }
+
+ constexpr size_type max_size() const {
+ return std::numeric_limits<size_type>::max() / sizeof(CharT);
+ }
+
+ constexpr bool empty() const noexcept WARN_UNUSED_RESULT {
+ return size() == 0;
+ }
+
+ constexpr void remove_prefix(size_type n) {
+ // Intentional STL deviation: Bounds-check instead of UB.
+ GURL_CHECK_LE(n, size());
+ ptr_ += n;
+ length_ -= n;
+ }
+
+ constexpr void remove_suffix(size_type n) {
+ // Intentional STL deviation: Bounds-check instead of UB.
+ GURL_CHECK_LE(n, size());
+ length_ -= n;
+ }
+
+ constexpr void swap(BasicStringPiece& v) noexcept {
+ // Note: Cannot use std::swap() since it is not constexpr until C++20.
+ const const_pointer ptr = ptr_;
+ ptr_ = v.ptr_;
+ v.ptr_ = ptr;
+ const size_type length = length_;
+ length_ = v.length_;
+ v.length_ = length;
+ }
+
+ constexpr size_type copy(CharT* dest,
+ size_type count,
size_type pos = 0) const {
GURL_CHECK_LE(pos, size());
- size_type rlen = std::min(n, size() - pos);
- traits_type::copy(s, data() + pos, rlen);
- return rlen;
+ const size_type rcount = std::min(count, size() - pos);
+ traits_type::copy(dest, data() + pos, rcount);
+ return rcount;
}
constexpr BasicStringPiece substr(size_type pos = 0,
- size_type n = npos) const {
+ size_type count = npos) const {
GURL_CHECK_LE(pos, size());
- return {data() + pos, std::min(n, size() - pos)};
+ const size_type rcount = std::min(count, size() - pos);
+ return {data() + pos, rcount};
}
- constexpr int compare(BasicStringPiece str) const noexcept {
- size_type rlen = std::min(size(), str.size());
- int result = CharTraits<value_type>::compare(data(), str.data(), rlen);
- if (result == 0)
- result = size() == str.size() ? 0 : (size() < str.size() ? -1 : 1);
- return result;
+ constexpr int compare(BasicStringPiece v) const noexcept {
+ const size_type rlen = std::min(size(), v.size());
+ const int result = CharTraits<CharT>::compare(data(), v.data(), rlen);
+ if (result != 0)
+ return result;
+ if (size() == v.size())
+ return 0;
+ return size() < v.size() ? -1 : 1;
}
-
- constexpr int compare(size_type pos,
- size_type n,
- BasicStringPiece str) const {
- return substr(pos, n).compare(str);
- }
-
constexpr int compare(size_type pos1,
- size_type n1,
- BasicStringPiece str,
- size_type pos2,
- size_type n2) const {
- return substr(pos1, n1).compare(str.substr(pos2, n2));
+ size_type count1,
+ BasicStringPiece v) const {
+ return substr(pos1, count1).compare(v);
}
-
- constexpr int compare(const value_type* s) const {
+ constexpr int compare(size_type pos1,
+ size_type count1,
+ BasicStringPiece v,
+ size_type pos2,
+ size_type count2) const {
+ return substr(pos1, count1).compare(v.substr(pos2, count2));
+ }
+ constexpr int compare(const CharT* s) const {
return compare(BasicStringPiece(s));
}
-
- constexpr int compare(size_type pos, size_type n, const value_type* s) const {
- return substr(pos, n).compare(BasicStringPiece(s));
+ constexpr int compare(size_type pos1,
+ size_type count1,
+ const CharT* s) const {
+ return substr(pos1, count1).compare(BasicStringPiece(s));
+ }
+ constexpr int compare(size_type pos1,
+ size_type count1,
+ const CharT* s,
+ size_type count2) const {
+ return substr(pos1, count1).compare(BasicStringPiece(s, count2));
}
- constexpr int compare(size_type pos,
- size_type n1,
- const value_type* s,
- size_type n2) const {
- return substr(pos, n1).compare(BasicStringPiece(s, n2));
- }
-
- // Searching, see https://wg21.link/string.view.find.
-
- // find: Search for a character or substring at a given offset.
- constexpr size_type find(BasicStringPiece s,
+ constexpr size_type find(BasicStringPiece v,
size_type pos = 0) const noexcept {
- return internal::find(*this, s, pos);
- }
+ if (is_constant_evaluated()) {
+ if (v.size() > size())
+ return npos;
+ for (size_type p = pos; p <= size() - v.size(); ++p) {
+ if (!compare(p, v.size(), v))
+ return p;
+ }
+ return npos;
+ }
- constexpr size_type find(value_type c, size_type pos = 0) const noexcept {
- return internal::find(*this, c, pos);
+ return internal::find(*this, v, pos);
}
+ constexpr size_type find(CharT ch, size_type pos = 0) const noexcept {
+ if (pos >= size())
+ return npos;
- constexpr size_type find(const value_type* s,
+ const const_pointer result =
+ gurl_base::CharTraits<CharT>::find(data() + pos, size() - pos, ch);
+ return result ? static_cast<size_type>(result - data()) : npos;
+ }
+ constexpr size_type find(const CharT* s,
size_type pos,
- size_type n) const {
- return find(BasicStringPiece(s, n), pos);
+ size_type count) const {
+ return find(BasicStringPiece(s, count), pos);
}
-
- constexpr size_type find(const value_type* s, size_type pos = 0) const {
+ constexpr size_type find(const CharT* s, size_type pos = 0) const {
return find(BasicStringPiece(s), pos);
}
- // rfind: Reverse find.
- constexpr size_type rfind(BasicStringPiece s,
+ constexpr size_type rfind(BasicStringPiece v,
size_type pos = npos) const noexcept {
- return internal::rfind(*this, s, pos);
- }
+ if (is_constant_evaluated()) {
+ if (v.size() > size())
+ return npos;
+ for (size_type p = std::min(size() - v.size(), pos);; --p) {
+ if (!compare(p, v.size(), v))
+ return p;
+ if (!p)
+ break;
+ }
+ return npos;
+ }
- constexpr size_type rfind(value_type c, size_type pos = npos) const noexcept {
- return internal::rfind(*this, c, pos);
+ return internal::rfind(*this, v, pos);
}
+ constexpr size_type rfind(CharT c, size_type pos = npos) const noexcept {
+ if (empty())
+ return npos;
- constexpr size_type rfind(const value_type* s,
+ for (size_t i = std::min(pos, size() - 1);; --i) {
+ if (data()[i] == c)
+ return i;
+
+ if (i == 0)
+ break;
+ }
+ return npos;
+ }
+ constexpr size_type rfind(const CharT* s,
size_type pos,
- size_type n) const {
- return rfind(BasicStringPiece(s, n), pos);
+ size_type count) const {
+ return rfind(BasicStringPiece(s, count), pos);
}
-
- constexpr size_type rfind(const value_type* s, size_type pos = npos) const {
+ constexpr size_type rfind(const CharT* s, size_type pos = npos) const {
return rfind(BasicStringPiece(s), pos);
}
- // find_first_of: Find the first occurrence of one of a set of characters.
- constexpr size_type find_first_of(BasicStringPiece s,
+ constexpr size_type find_first_of(BasicStringPiece v,
size_type pos = 0) const noexcept {
- return internal::find_first_of(*this, s, pos);
- }
+ if (is_constant_evaluated()) {
+ if (empty() || v.empty())
+ return npos;
+ for (size_type p = pos; p < size(); ++p) {
+ if (v.find(data()[p]) != npos)
+ return p;
+ }
+ return npos;
+ }
- constexpr size_type find_first_of(value_type c,
- size_type pos = 0) const noexcept {
+ return internal::find_first_of(*this, v, pos);
+ }
+ constexpr size_type find_first_of(CharT c, size_type pos = 0) const noexcept {
return find(c, pos);
}
-
- constexpr size_type find_first_of(const value_type* s,
+ constexpr size_type find_first_of(const CharT* s,
size_type pos,
- size_type n) const {
- return find_first_of(BasicStringPiece(s, n), pos);
+ size_type count) const {
+ return find_first_of(BasicStringPiece(s, count), pos);
}
-
- constexpr size_type find_first_of(const value_type* s,
- size_type pos = 0) const {
+ constexpr size_type find_first_of(const CharT* s, size_type pos = 0) const {
return find_first_of(BasicStringPiece(s), pos);
}
- // find_last_of: Find the last occurrence of one of a set of characters.
- constexpr size_type find_last_of(BasicStringPiece s,
+ constexpr size_type find_last_of(BasicStringPiece v,
size_type pos = npos) const noexcept {
- return internal::find_last_of(*this, s, pos);
- }
+ if (is_constant_evaluated()) {
+ if (empty() || v.empty())
+ return npos;
+ for (size_type p = std::min(pos, size() - 1);; --p) {
+ if (v.find(data()[p]) != npos)
+ return p;
+ if (!p)
+ break;
+ }
+ return npos;
+ }
- constexpr size_type find_last_of(value_type c,
+ return internal::find_last_of(*this, v, pos);
+ }
+ constexpr size_type find_last_of(CharT c,
size_type pos = npos) const noexcept {
return rfind(c, pos);
}
-
- constexpr size_type find_last_of(const value_type* s,
+ constexpr size_type find_last_of(const CharT* s,
size_type pos,
- size_type n) const {
- return find_last_of(BasicStringPiece(s, n), pos);
+ size_type count) const {
+ return find_last_of(BasicStringPiece(s, count), pos);
}
-
- constexpr size_type find_last_of(const value_type* s,
- size_type pos = npos) const {
+ constexpr size_type find_last_of(const CharT* s, size_type pos = npos) const {
return find_last_of(BasicStringPiece(s), pos);
}
- // find_first_not_of: Find the first occurrence not of a set of characters.
- constexpr size_type find_first_not_of(BasicStringPiece s,
+ constexpr size_type find_first_not_of(BasicStringPiece v,
size_type pos = 0) const noexcept {
- return internal::find_first_not_of(*this, s, pos);
- }
+ if (is_constant_evaluated()) {
+ if (empty())
+ return npos;
+ for (size_type p = pos; p < size(); ++p) {
+ if (v.find(data()[p]) == npos)
+ return p;
+ }
+ return npos;
+ }
- constexpr size_type find_first_not_of(value_type c,
+ return internal::find_first_not_of(*this, v, pos);
+ }
+ constexpr size_type find_first_not_of(CharT c,
size_type pos = 0) const noexcept {
- return internal::find_first_not_of(*this, c, pos);
- }
+ if (empty())
+ return npos;
- constexpr size_type find_first_not_of(const value_type* s,
+ for (; pos < size(); ++pos) {
+ if (data()[pos] != c)
+ return pos;
+ }
+ return npos;
+ }
+ constexpr size_type find_first_not_of(const CharT* s,
size_type pos,
- size_type n) const {
- return find_first_not_of(BasicStringPiece(s, n), pos);
+ size_type count) const {
+ return find_first_not_of(BasicStringPiece(s, count), pos);
}
-
- constexpr size_type find_first_not_of(const value_type* s,
+ constexpr size_type find_first_not_of(const CharT* s,
size_type pos = 0) const {
return find_first_not_of(BasicStringPiece(s), pos);
}
- // find_last_not_of: Find the last occurrence not of a set of characters.
- constexpr size_type find_last_not_of(BasicStringPiece s,
+ constexpr size_type find_last_not_of(BasicStringPiece v,
size_type pos = npos) const noexcept {
- return internal::find_last_not_of(*this, s, pos);
- }
+ if (is_constant_evaluated()) {
+ if (empty())
+ return npos;
+ for (size_type p = std::min(pos, size() - 1);; --p) {
+ if (v.find(data()[p]) == npos)
+ return p;
+ if (!p)
+ break;
+ }
+ return npos;
+ }
- constexpr size_type find_last_not_of(value_type c,
+ return internal::find_last_not_of(*this, v, pos);
+ }
+ constexpr size_type find_last_not_of(CharT c,
size_type pos = npos) const noexcept {
- return internal::find_last_not_of(*this, c, pos);
- }
+ if (empty())
+ return npos;
- constexpr size_type find_last_not_of(const value_type* s,
+ for (size_t i = std::min(pos, size() - 1);; --i) {
+ if (data()[i] != c)
+ return i;
+ if (i == 0)
+ break;
+ }
+ return npos;
+ }
+ constexpr size_type find_last_not_of(const CharT* s,
size_type pos,
- size_type n) const {
- return find_last_not_of(BasicStringPiece(s, n), pos);
+ size_type count) const {
+ return find_last_not_of(BasicStringPiece(s, count), pos);
}
-
- constexpr size_type find_last_not_of(const value_type* s,
+ constexpr size_type find_last_not_of(const CharT* s,
size_type pos = npos) const {
return find_last_not_of(BasicStringPiece(s), pos);
}
+ static constexpr size_type npos = size_type(-1);
+
protected:
- const value_type* ptr_;
+ const_pointer ptr_;
size_type length_;
};
-template <typename STRING_TYPE>
-const typename BasicStringPiece<STRING_TYPE>::size_type
-BasicStringPiece<STRING_TYPE>::npos =
- typename BasicStringPiece<STRING_TYPE>::size_type(-1);
+// static
+template <typename CharT, typename Traits>
+const typename BasicStringPiece<CharT, Traits>::size_type
+ BasicStringPiece<CharT, Traits>::npos;
// MSVC doesn't like complex extern templates and DLLs.
#if !defined(COMPILER_MSVC)
-extern template class BASE_EXPORT BasicStringPiece<std::string>;
-extern template class BASE_EXPORT BasicStringPiece<string16>;
+extern template class BASE_EXPORT BasicStringPiece<char>;
+extern template class BASE_EXPORT BasicStringPiece<char16_t>;
#endif
-// Comparison operators --------------------------------------------------------
-// operator ==
-template <typename StringT>
-constexpr bool operator==(BasicStringPiece<StringT> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits>
+constexpr bool operator==(BasicStringPiece<CharT, Traits> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
}
-
-// Here and below we make use of std::common_type_t to emulate an identity type
-// transformation. This creates a non-deduced context, so that we can compare
-// StringPieces with types that implicitly convert to StringPieces. See
-// https://wg21.link/n3766 for details.
+// Here and below we make use of std::common_type_t to emulate
+// std::type_identity (part of C++20). This creates a non-deduced context, so
+// that we can compare StringPieces with types that implicitly convert to
+// StringPieces. See https://wg21.link/n3766 for details.
// Furthermore, we require dummy template parameters for these overloads to work
// around a name mangling issue on Windows.
-template <typename StringT, int = 1>
+template <typename CharT, typename Traits, int = 1>
constexpr bool operator==(
- BasicStringPiece<StringT> lhs,
- std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
+ BasicStringPiece<CharT, Traits> lhs,
+ std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
+ return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
+}
+template <typename CharT, typename Traits, int = 2>
+constexpr bool operator==(
+ std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
}
-template <typename StringT, int = 2>
-constexpr bool operator==(std::common_type_t<BasicStringPiece<StringT>> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
- return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
-}
-
-// operator !=
-template <typename StringT>
-constexpr bool operator!=(BasicStringPiece<StringT> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits>
+constexpr bool operator!=(BasicStringPiece<CharT, Traits> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return !(lhs == rhs);
}
-
-template <typename StringT, int = 1>
+template <typename CharT, typename Traits, int = 1>
constexpr bool operator!=(
- BasicStringPiece<StringT> lhs,
- std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
+ BasicStringPiece<CharT, Traits> lhs,
+ std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
+ return !(lhs == rhs);
+}
+template <typename CharT, typename Traits, int = 2>
+constexpr bool operator!=(
+ std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return !(lhs == rhs);
}
-template <typename StringT, int = 2>
-constexpr bool operator!=(std::common_type_t<BasicStringPiece<StringT>> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
- return !(lhs == rhs);
-}
-
-// operator <
-template <typename StringT>
-constexpr bool operator<(BasicStringPiece<StringT> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits>
+constexpr bool operator<(BasicStringPiece<CharT, Traits> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return lhs.compare(rhs) < 0;
}
-
-template <typename StringT, int = 1>
+template <typename CharT, typename Traits, int = 1>
constexpr bool operator<(
- BasicStringPiece<StringT> lhs,
- std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
+ BasicStringPiece<CharT, Traits> lhs,
+ std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
return lhs.compare(rhs) < 0;
}
-template <typename StringT, int = 2>
-constexpr bool operator<(std::common_type_t<BasicStringPiece<StringT>> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits, int = 2>
+constexpr bool operator<(
+ std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return lhs.compare(rhs) < 0;
}
-// operator >
-template <typename StringT>
-constexpr bool operator>(BasicStringPiece<StringT> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits>
+constexpr bool operator>(BasicStringPiece<CharT, Traits> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return rhs < lhs;
}
-
-template <typename StringT, int = 1>
+template <typename CharT, typename Traits, int = 1>
constexpr bool operator>(
- BasicStringPiece<StringT> lhs,
- std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
+ BasicStringPiece<CharT, Traits> lhs,
+ std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
+ return rhs < lhs;
+}
+template <typename CharT, typename Traits, int = 2>
+constexpr bool operator>(
+ std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return rhs < lhs;
}
-template <typename StringT, int = 2>
-constexpr bool operator>(std::common_type_t<BasicStringPiece<StringT>> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
- return rhs < lhs;
-}
-
-// operator <=
-template <typename StringT>
-constexpr bool operator<=(BasicStringPiece<StringT> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits>
+constexpr bool operator<=(BasicStringPiece<CharT, Traits> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return !(rhs < lhs);
}
-
-template <typename StringT, int = 1>
+template <typename CharT, typename Traits, int = 1>
constexpr bool operator<=(
- BasicStringPiece<StringT> lhs,
- std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
+ BasicStringPiece<CharT, Traits> lhs,
+ std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
+ return !(rhs < lhs);
+}
+template <typename CharT, typename Traits, int = 2>
+constexpr bool operator<=(
+ std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return !(rhs < lhs);
}
-template <typename StringT, int = 2>
-constexpr bool operator<=(std::common_type_t<BasicStringPiece<StringT>> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
- return !(rhs < lhs);
-}
-
-// operator >=
-template <typename StringT>
-constexpr bool operator>=(BasicStringPiece<StringT> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits>
+constexpr bool operator>=(BasicStringPiece<CharT, Traits> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return !(lhs < rhs);
}
-
-template <typename StringT, int = 1>
+template <typename CharT, typename Traits, int = 1>
constexpr bool operator>=(
- BasicStringPiece<StringT> lhs,
- std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
+ BasicStringPiece<CharT, Traits> lhs,
+ std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
return !(lhs < rhs);
}
-
-template <typename StringT, int = 2>
-constexpr bool operator>=(std::common_type_t<BasicStringPiece<StringT>> lhs,
- BasicStringPiece<StringT> rhs) noexcept {
+template <typename CharT, typename Traits, int = 2>
+constexpr bool operator>=(
+ std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
+ BasicStringPiece<CharT, Traits> rhs) noexcept {
return !(lhs < rhs);
}
BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece piece);
+// Not in the STL: convenience functions to output non-UTF-8 strings to an
+// 8-bit-width stream.
BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece16 piece);
-
-#if !defined(WCHAR_T_IS_UTF16)
BASE_EXPORT std::ostream& operator<<(std::ostream& o, WStringPiece piece);
-#endif
-// Hashing ---------------------------------------------------------------------
+// Intentionally omitted (since Chromium does not use character literals):
+// operator""sv.
-// We provide appropriate hash functions so StringPiece and StringPiece16 can
-// be used as keys in hash sets and maps.
-
-// This hash function is copied from base/strings/string16.h. We don't use the
-// ones already defined for string and string16 directly because it would
-// require the string constructors to be called, which we don't want.
-
+// Stand-ins for the STL's std::hash<> specializations.
template <typename StringPieceType>
struct StringPieceHashImpl {
+ // This is a custom hash function. We don't use the ones already defined for
+ // string and std::u16string directly because it would require the string
+ // constructors to be called, which we don't want.
std::size_t operator()(StringPieceType sp) const {
std::size_t result = 0;
for (auto c : sp)
@@ -553,7 +617,6 @@
return result;
}
};
-
using StringPieceHash = StringPieceHashImpl<StringPiece>;
using StringPiece16Hash = StringPieceHashImpl<StringPiece16>;
using WStringPieceHash = StringPieceHashImpl<WStringPiece>;
diff --git a/base/strings/string_piece_forward.h b/base/strings/string_piece_forward.h
index aa79117..ce7e489 100644
--- a/base/strings/string_piece_forward.h
+++ b/base/strings/string_piece_forward.h
@@ -1,23 +1,21 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-
-// Forward declaration of StringPiece types from base/strings/string_piece.h
+//
+// Forward declaration of StringPiece types from base/strings/string_piece.h.
#ifndef BASE_STRINGS_STRING_PIECE_FORWARD_H_
#define BASE_STRINGS_STRING_PIECE_FORWARD_H_
#include <string>
-#include "base/strings/string16.h"
-
namespace gurl_base {
-template <typename STRING_TYPE>
+template <typename CharT, typename Traits = std::char_traits<CharT>>
class BasicStringPiece;
-typedef BasicStringPiece<std::string> StringPiece;
-typedef BasicStringPiece<string16> StringPiece16;
-typedef BasicStringPiece<std::wstring> WStringPiece;
+using StringPiece = BasicStringPiece<char>;
+using StringPiece16 = BasicStringPiece<char16_t>;
+using WStringPiece = BasicStringPiece<wchar_t>;
} // namespace base
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc
index 76de8f7..c6b1f29 100644
--- a/base/strings/string_piece_unittest.cc
+++ b/base/strings/string_piece_unittest.cc
@@ -6,44 +6,41 @@
#include <string>
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace gurl_base {
-template <typename T>
+template <typename CharT>
class CommonStringPieceTest : public ::testing::Test {
public:
- static const T as_string(const char* input) {
- return T(input);
- }
- static const T& as_string(const T& input) {
+ static std::string as_string(const char* input) { return input; }
+ static const std::string& as_string(const std::string& input) {
return input;
}
};
template <>
-class CommonStringPieceTest<string16> : public ::testing::Test {
+class CommonStringPieceTest<char16_t> : public ::testing::Test {
public:
- static const string16 as_string(const char* input) {
- return ASCIIToUTF16(input);
+ static std::u16string as_string(const char* input) {
+ return UTF8ToUTF16(input);
}
- static const string16 as_string(const std::string& input) {
- return ASCIIToUTF16(input);
+ static std::u16string as_string(const std::string& input) {
+ return UTF8ToUTF16(input);
}
};
-typedef ::testing::Types<std::string, string16> SupportedStringTypes;
+typedef ::testing::Types<char, char16_t> SupportedCharTypes;
-TYPED_TEST_SUITE(CommonStringPieceTest, SupportedStringTypes);
+TYPED_TEST_SUITE(CommonStringPieceTest, SupportedCharTypes);
TYPED_TEST(CommonStringPieceTest, CheckComparisonOperators) {
#define CMP_Y(op, x, y) \
{ \
- TypeParam lhs(TestFixture::as_string(x)); \
- TypeParam rhs(TestFixture::as_string(y)); \
+ std::basic_string<TypeParam> lhs(TestFixture::as_string(x)); \
+ std::basic_string<TypeParam> rhs(TestFixture::as_string(y)); \
ASSERT_TRUE((BasicStringPiece<TypeParam>((lhs.c_str())) \
op BasicStringPiece<TypeParam>((rhs.c_str())))); \
ASSERT_TRUE(BasicStringPiece<TypeParam>(lhs) op rhs); \
@@ -55,8 +52,8 @@
#define CMP_N(op, x, y) \
{ \
- TypeParam lhs(TestFixture::as_string(x)); \
- TypeParam rhs(TestFixture::as_string(y)); \
+ std::basic_string<TypeParam> lhs(TestFixture::as_string(x)); \
+ std::basic_string<TypeParam> rhs(TestFixture::as_string(y)); \
ASSERT_FALSE((BasicStringPiece<TypeParam>((lhs.c_str())) \
op BasicStringPiece<TypeParam>((rhs.c_str())))); \
ASSERT_FALSE(BasicStringPiece<TypeParam>(lhs) op rhs); \
@@ -141,39 +138,39 @@
}
TYPED_TEST(CommonStringPieceTest, CheckSTL) {
- TypeParam alphabet(TestFixture::as_string("abcdefghijklmnopqrstuvwxyz"));
- TypeParam abc(TestFixture::as_string("abc"));
- TypeParam xyz(TestFixture::as_string("xyz"));
- TypeParam foobar(TestFixture::as_string("foobar"));
+ std::basic_string<TypeParam> alphabet(
+ TestFixture::as_string("abcdefghijklmnopqrstuvwxyz"));
+ std::basic_string<TypeParam> abc(TestFixture::as_string("abc"));
+ std::basic_string<TypeParam> xyz(TestFixture::as_string("xyz"));
+ std::basic_string<TypeParam> foobar(TestFixture::as_string("foobar"));
BasicStringPiece<TypeParam> a(alphabet);
BasicStringPiece<TypeParam> b(abc);
BasicStringPiece<TypeParam> c(xyz);
BasicStringPiece<TypeParam> d(foobar);
BasicStringPiece<TypeParam> e;
- TypeParam temp(TestFixture::as_string("123"));
- temp += static_cast<typename TypeParam::value_type>(0);
+ std::basic_string<TypeParam> temp(TestFixture::as_string("123"));
+ temp += static_cast<TypeParam>(0);
temp += TestFixture::as_string("456");
BasicStringPiece<TypeParam> f(temp);
- ASSERT_EQ(a[6], static_cast<typename TypeParam::value_type>('g'));
- ASSERT_EQ(b[0], static_cast<typename TypeParam::value_type>('a'));
- ASSERT_EQ(c[2], static_cast<typename TypeParam::value_type>('z'));
- ASSERT_EQ(f[3], static_cast<typename TypeParam::value_type>('\0'));
- ASSERT_EQ(f[5], static_cast<typename TypeParam::value_type>('5'));
+ ASSERT_EQ(a[6], static_cast<TypeParam>('g'));
+ ASSERT_EQ(b[0], static_cast<TypeParam>('a'));
+ ASSERT_EQ(c[2], static_cast<TypeParam>('z'));
+ ASSERT_EQ(f[3], static_cast<TypeParam>('\0'));
+ ASSERT_EQ(f[5], static_cast<TypeParam>('5'));
- ASSERT_EQ(*d.data(), static_cast<typename TypeParam::value_type>('f'));
- ASSERT_EQ(d.data()[5], static_cast<typename TypeParam::value_type>('r'));
+ ASSERT_EQ(*d.data(), static_cast<TypeParam>('f'));
+ ASSERT_EQ(d.data()[5], static_cast<TypeParam>('r'));
ASSERT_EQ(e.data(), nullptr);
- ASSERT_EQ(*a.begin(), static_cast<typename TypeParam::value_type>('a'));
- ASSERT_EQ(*(b.begin() + 2), static_cast<typename TypeParam::value_type>('c'));
- ASSERT_EQ(*(c.end() - 1), static_cast<typename TypeParam::value_type>('z'));
+ ASSERT_EQ(*a.begin(), static_cast<TypeParam>('a'));
+ ASSERT_EQ(*(b.begin() + 2), static_cast<TypeParam>('c'));
+ ASSERT_EQ(*(c.end() - 1), static_cast<TypeParam>('z'));
- ASSERT_EQ(*a.rbegin(), static_cast<typename TypeParam::value_type>('z'));
- ASSERT_EQ(*(b.rbegin() + 2),
- static_cast<typename TypeParam::value_type>('a'));
- ASSERT_EQ(*(c.rend() - 1), static_cast<typename TypeParam::value_type>('x'));
+ ASSERT_EQ(*a.rbegin(), static_cast<TypeParam>('z'));
+ ASSERT_EQ(*(b.rbegin() + 2), static_cast<TypeParam>('a'));
+ ASSERT_EQ(*(c.rend() - 1), static_cast<TypeParam>('x'));
ASSERT_EQ(a.rbegin() + 26, a.rend());
ASSERT_EQ(a.size(), 26U);
@@ -196,17 +193,17 @@
ASSERT_EQ(d.data(), nullptr);
ASSERT_EQ(d.begin(), d.end());
- ASSERT_GE(a.max_size(), a.capacity());
- ASSERT_GE(a.capacity(), a.size());
+ ASSERT_GE(a.max_size(), a.size());
}
TYPED_TEST(CommonStringPieceTest, CheckFind) {
typedef BasicStringPiece<TypeParam> Piece;
- TypeParam alphabet(TestFixture::as_string("abcdefghijklmnopqrstuvwxyz"));
- TypeParam abc(TestFixture::as_string("abc"));
- TypeParam xyz(TestFixture::as_string("xyz"));
- TypeParam foobar(TestFixture::as_string("foobar"));
+ std::basic_string<TypeParam> alphabet(
+ TestFixture::as_string("abcdefghijklmnopqrstuvwxyz"));
+ std::basic_string<TypeParam> abc(TestFixture::as_string("abc"));
+ std::basic_string<TypeParam> xyz(TestFixture::as_string("xyz"));
+ std::basic_string<TypeParam> foobar(TestFixture::as_string("foobar"));
BasicStringPiece<TypeParam> a(alphabet);
BasicStringPiece<TypeParam> b(abc);
@@ -215,12 +212,12 @@
d = Piece();
Piece e;
- TypeParam temp(TestFixture::as_string("123"));
+ std::basic_string<TypeParam> temp(TestFixture::as_string("123"));
temp.push_back('\0');
temp += TestFixture::as_string("456");
Piece f(temp);
- typename TypeParam::value_type buf[4] = { '%', '%', '%', '%' };
+ TypeParam buf[4] = {'%', '%', '%', '%'};
ASSERT_EQ(a.copy(buf, 4), 4U);
ASSERT_EQ(buf[0], a[0]);
ASSERT_EQ(buf[1], a[1]);
@@ -237,7 +234,7 @@
ASSERT_EQ(buf[2], c[2]);
ASSERT_EQ(buf[3], a[3]);
- ASSERT_EQ(Piece::npos, TypeParam::npos);
+ ASSERT_EQ(Piece::npos, std::basic_string<TypeParam>::npos);
ASSERT_EQ(a.find(b), 0U);
ASSERT_EQ(a.find(b, 1), Piece::npos);
@@ -250,7 +247,8 @@
ASSERT_EQ(a.find(e), 0U);
ASSERT_EQ(a.find(d, 12), 12U);
ASSERT_EQ(a.find(e, 17), 17U);
- TypeParam not_found(TestFixture::as_string("xx not found bb"));
+ std::basic_string<TypeParam> not_found(
+ TestFixture::as_string("xx not found bb"));
Piece g(not_found);
ASSERT_EQ(a.find(g), Piece::npos);
// empty string nonsense
@@ -259,7 +257,8 @@
ASSERT_EQ(d.find(b, 4), Piece::npos);
ASSERT_EQ(e.find(b, 7), Piece::npos);
- size_t empty_search_pos = TypeParam().find(TypeParam());
+ size_t empty_search_pos =
+ std::basic_string<TypeParam>().find(std::basic_string<TypeParam>());
ASSERT_EQ(d.find(d), empty_search_pos);
ASSERT_EQ(d.find(e), empty_search_pos);
ASSERT_EQ(e.find(d), empty_search_pos);
@@ -269,7 +268,7 @@
ASSERT_EQ(e.find(d, 4), std::string().find(std::string(), 4));
ASSERT_EQ(e.find(e, 4), std::string().find(std::string(), 4));
- constexpr typename TypeParam::value_type kNul = '\0';
+ constexpr TypeParam kNul = '\0';
ASSERT_EQ(a.find('a'), 0U);
ASSERT_EQ(a.find('c'), 2U);
ASSERT_EQ(a.find('z'), 25U);
@@ -296,8 +295,6 @@
ASSERT_EQ(a.find(c.data(), 9, 0), 9U);
ASSERT_EQ(a.find(c.data(), Piece::npos, 0), Piece::npos);
ASSERT_EQ(b.find(c.data(), Piece::npos, 0), Piece::npos);
- ASSERT_EQ(a.find(d.data(), 12, 0), 12U);
- ASSERT_EQ(a.find(e.data(), 17, 0), 17U);
// empty string nonsense
ASSERT_EQ(d.find(b.data(), 4, 0), Piece::npos);
ASSERT_EQ(e.find(b.data(), 7, 0), Piece::npos);
@@ -306,21 +303,10 @@
ASSERT_EQ(a.find(c.data(), 9), 23U);
ASSERT_EQ(a.find(c.data(), Piece::npos), Piece::npos);
ASSERT_EQ(b.find(c.data(), Piece::npos), Piece::npos);
- ASSERT_EQ(a.find(d.data(), 12), 12U);
- ASSERT_EQ(a.find(e.data(), 17), 17U);
// empty string nonsense
ASSERT_EQ(d.find(b.data(), 4), Piece::npos);
ASSERT_EQ(e.find(b.data(), 7), Piece::npos);
- ASSERT_EQ(d.find(d.data(), 4, 0),
- std::string().find(std::string().data(), 4, 0));
- ASSERT_EQ(d.find(e.data(), 4, 1),
- std::string().find(std::string().data(), 4, 1));
- ASSERT_EQ(e.find(d.data(), 4, 2),
- std::string().find(std::string().data(), 4, 2));
- ASSERT_EQ(e.find(e.data(), 4, 3),
- std::string().find(std::string().data(), 4, 3));
-
ASSERT_EQ(a.rfind(b), 0U);
ASSERT_EQ(a.rfind(b, 1), 0U);
ASSERT_EQ(a.rfind(c), 23U);
@@ -329,10 +315,14 @@
ASSERT_EQ(a.rfind(c, 0U), Piece::npos);
ASSERT_EQ(b.rfind(c), Piece::npos);
ASSERT_EQ(b.rfind(c, 0U), Piece::npos);
- ASSERT_EQ(a.rfind(d), static_cast<size_t>(a.rfind(TypeParam())));
- ASSERT_EQ(a.rfind(e), a.rfind(TypeParam()));
- ASSERT_EQ(a.rfind(d), static_cast<size_t>(TypeParam(a).rfind(TypeParam())));
- ASSERT_EQ(a.rfind(e), TypeParam(a).rfind(TypeParam()));
+ ASSERT_EQ(a.rfind(d),
+ static_cast<size_t>(a.rfind(std::basic_string<TypeParam>())));
+ ASSERT_EQ(a.rfind(e), a.rfind(std::basic_string<TypeParam>()));
+ ASSERT_EQ(a.rfind(d),
+ static_cast<size_t>(std::basic_string<TypeParam>(a).rfind(
+ std::basic_string<TypeParam>())));
+ ASSERT_EQ(a.rfind(e), std::basic_string<TypeParam>(a).rfind(
+ std::basic_string<TypeParam>()));
ASSERT_EQ(a.rfind(d, 12), 12U);
ASSERT_EQ(a.rfind(e, 17), 17U);
ASSERT_EQ(a.rfind(g), Piece::npos);
@@ -370,19 +360,12 @@
ASSERT_EQ(a.rfind(c.data(), 1U, 0), 1U);
ASSERT_EQ(a.rfind(c.data(), 0U, 0), 0U);
ASSERT_EQ(b.rfind(c.data(), 0U, 0), 0U);
- ASSERT_EQ(a.rfind(d.data(), 12, 0), 12U);
- ASSERT_EQ(a.rfind(e.data(), 17, 0), 17U);
ASSERT_EQ(d.rfind(b.data(), 4, 0), 0U);
ASSERT_EQ(e.rfind(b.data(), 7, 0), 0U);
- // empty string nonsense
- ASSERT_EQ(d.rfind(d.data(), 4), std::string().rfind(std::string()));
- ASSERT_EQ(e.rfind(d.data(), 7), std::string().rfind(std::string()));
- ASSERT_EQ(d.rfind(e.data(), 4), std::string().rfind(std::string()));
- ASSERT_EQ(e.rfind(e.data(), 7), std::string().rfind(std::string()));
-
- TypeParam one_two_three_four(TestFixture::as_string("one,two:three;four"));
- TypeParam comma_colon(TestFixture::as_string(",:"));
+ std::basic_string<TypeParam> one_two_three_four(
+ TestFixture::as_string("one,two:three;four"));
+ std::basic_string<TypeParam> comma_colon(TestFixture::as_string(",:"));
ASSERT_EQ(3U, Piece(one_two_three_four).find_first_of(comma_colon));
ASSERT_EQ(a.find_first_of(b), 0U);
ASSERT_EQ(a.find_first_of(b, 0), 0U);
@@ -416,6 +399,10 @@
ASSERT_EQ(a.find_first_not_of(f), 0U);
ASSERT_EQ(a.find_first_not_of(d), 0U);
ASSERT_EQ(a.find_first_not_of(e), 0U);
+ ASSERT_EQ(a.find_first_not_of(d, 1), 1U);
+ ASSERT_EQ(a.find_first_not_of(e, 1), 1U);
+ ASSERT_EQ(a.find_first_not_of(d, a.size()), Piece::npos);
+ ASSERT_EQ(a.find_first_not_of(e, a.size()), Piece::npos);
// empty string nonsense
ASSERT_EQ(d.find_first_not_of(a), Piece::npos);
ASSERT_EQ(e.find_first_not_of(a), Piece::npos);
@@ -424,7 +411,7 @@
ASSERT_EQ(d.find_first_not_of(e), Piece::npos);
ASSERT_EQ(e.find_first_not_of(e), Piece::npos);
- TypeParam equals(TestFixture::as_string("===="));
+ std::basic_string<TypeParam> equals(TestFixture::as_string("===="));
Piece h(equals);
ASSERT_EQ(h.find_first_not_of('='), Piece::npos);
ASSERT_EQ(h.find_first_not_of('=', 3), Piece::npos);
@@ -440,7 +427,7 @@
ASSERT_EQ(e.find_first_not_of(kNul), Piece::npos);
// Piece g("xx not found bb");
- TypeParam fifty_six(TestFixture::as_string("56"));
+ std::basic_string<TypeParam> fifty_six(TestFixture::as_string("56"));
Piece i(fifty_six);
ASSERT_EQ(h.find_last_of(a), Piece::npos);
ASSERT_EQ(g.find_last_of(a), g.size()-1);
@@ -525,14 +512,14 @@
}
TYPED_TEST(CommonStringPieceTest, CheckCustom) {
- TypeParam foobar(TestFixture::as_string("foobar"));
+ std::basic_string<TypeParam> foobar(TestFixture::as_string("foobar"));
BasicStringPiece<TypeParam> a(foobar);
- TypeParam s1(TestFixture::as_string("123"));
- s1 += static_cast<typename TypeParam::value_type>('\0');
+ std::basic_string<TypeParam> s1(TestFixture::as_string("123"));
+ s1 += static_cast<TypeParam>('\0');
s1 += TestFixture::as_string("456");
BasicStringPiece<TypeParam> b(s1);
BasicStringPiece<TypeParam> e;
- TypeParam s2;
+ std::basic_string<TypeParam> s2;
// remove_prefix
BasicStringPiece<TypeParam> c(a);
@@ -565,9 +552,10 @@
ASSERT_NE(c, a);
// operator STRING_TYPE()
- TypeParam s5(TypeParam(a).c_str(), 7); // Note, has an embedded NULL
+ std::basic_string<TypeParam> s5(std::basic_string<TypeParam>(a).c_str(),
+ 7); // Note, has an embedded NULL
ASSERT_EQ(c, s5);
- TypeParam s6(e);
+ std::basic_string<TypeParam> s6(e);
ASSERT_TRUE(s6.empty());
}
@@ -594,15 +582,18 @@
ASSERT_EQ(s.data(), nullptr);
ASSERT_EQ(s.size(), 0U);
- TypeParam str(s);
+ std::basic_string<TypeParam> str(s);
ASSERT_EQ(str.length(), 0U);
- ASSERT_EQ(str, TypeParam());
+ ASSERT_EQ(str, std::basic_string<TypeParam>());
}
TYPED_TEST(CommonStringPieceTest, CheckComparisons2) {
- TypeParam alphabet(TestFixture::as_string("abcdefghijklmnopqrstuvwxyz"));
- TypeParam alphabet_z(TestFixture::as_string("abcdefghijklmnopqrstuvwxyzz"));
- TypeParam alphabet_y(TestFixture::as_string("abcdefghijklmnopqrstuvwxyy"));
+ std::basic_string<TypeParam> alphabet(
+ TestFixture::as_string("abcdefghijklmnopqrstuvwxyz"));
+ std::basic_string<TypeParam> alphabet_z(
+ TestFixture::as_string("abcdefghijklmnopqrstuvwxyzz"));
+ std::basic_string<TypeParam> alphabet_y(
+ TestFixture::as_string("abcdefghijklmnopqrstuvwxyy"));
BasicStringPiece<TypeParam> abc(alphabet);
// check comparison operations on strings longer than 4 bytes.
@@ -624,38 +615,37 @@
}
TYPED_TEST(CommonStringPieceTest, HeterogenousStringPieceEquals) {
- TypeParam hello(TestFixture::as_string("hello"));
+ std::basic_string<TypeParam> hello(TestFixture::as_string("hello"));
ASSERT_EQ(BasicStringPiece<TypeParam>(hello), hello);
ASSERT_EQ(hello.c_str(), BasicStringPiece<TypeParam>(hello));
}
-// string16-specific stuff
+// std::u16string-specific stuff
TEST(StringPiece16Test, CheckSTL) {
// Check some non-ascii characters.
- string16 fifth(ASCIIToUTF16("123"));
+ std::u16string fifth(u"123");
fifth.push_back(0x0000);
fifth.push_back(0xd8c5);
fifth.push_back(0xdffe);
StringPiece16 f(fifth);
ASSERT_EQ(f[3], '\0');
- ASSERT_EQ(f[5], static_cast<char16>(0xdffe));
+ ASSERT_EQ(f[5], 0xdffe);
ASSERT_EQ(f.size(), 6U);
}
-
-
TEST(StringPiece16Test, CheckConversion) {
- // Make sure that we can convert from UTF8 to UTF16 and back. We use a two
- // byte character (G clef) to test this.
- ASSERT_EQ(UTF16ToUTF8(UTF8ToUTF16("\xf0\x9d\x84\x9e")), "\xf0\x9d\x84\x9e");
+ // Make sure that we can convert from UTF8 to UTF16 and back. We use a
+ // character (G clef) outside the BMP to test this.
+ const char* kTest = "\U0001D11E";
+ ASSERT_EQ(UTF16ToUTF8(UTF8ToUTF16(kTest)), kTest);
}
TYPED_TEST(CommonStringPieceTest, CheckConstructors) {
- TypeParam str(TestFixture::as_string("hello world"));
- TypeParam empty;
+ std::basic_string<TypeParam> str(TestFixture::as_string("hello world"));
+ std::basic_string<TypeParam> empty;
ASSERT_EQ(str, BasicStringPiece<TypeParam>(str));
ASSERT_EQ(str, BasicStringPiece<TypeParam>(str.c_str()));
@@ -856,4 +846,46 @@
static_assert(piece.substr(0, 99) == piece, "");
}
+TEST(StringPieceTest, Find) {
+ constexpr StringPiece foobar("foobar", 6);
+ constexpr StringPiece foo = foobar.substr(0, 3);
+ constexpr StringPiece bar = foobar.substr(3);
+
+ // find
+ static_assert(foobar.find(bar, 0) == 3, "");
+ static_assert(foobar.find('o', 0) == 1, "");
+ static_assert(foobar.find("ox", 0, 1) == 1, "");
+ static_assert(foobar.find("ox", 0) == StringPiece::npos, "");
+
+ // rfind
+ static_assert(foobar.rfind(bar, 5) == 3, "");
+ static_assert(foobar.rfind('o', 5) == 2, "");
+ static_assert(foobar.rfind("ox", 5, 1) == 2, "");
+ static_assert(foobar.rfind("ox", 5) == StringPiece::npos, "");
+
+ // find_first_of
+ static_assert(foobar.find_first_of(foo, 2) == 2, "");
+ static_assert(foobar.find_first_of('o', 2) == 2, "");
+ static_assert(foobar.find_first_of("ox", 2, 2) == 2, "");
+ static_assert(foobar.find_first_of("ox", 2) == 2, "");
+
+ // find_last_of
+ static_assert(foobar.find_last_of(foo, 5) == 2, "");
+ static_assert(foobar.find_last_of('o', 5) == 2, "");
+ static_assert(foobar.find_last_of("ox", 5, 2) == 2, "");
+ static_assert(foobar.find_last_of("ox", 5) == 2, "");
+
+ // find_first_not_of
+ static_assert(foobar.find_first_not_of(foo, 2) == 3, "");
+ static_assert(foobar.find_first_not_of('o', 2) == 3, "");
+ static_assert(foobar.find_first_not_of("ox", 2, 2) == 3, "");
+ static_assert(foobar.find_first_not_of("ox", 2) == 3, "");
+
+ // find_last_not_of
+ static_assert(foobar.find_last_not_of(bar, 5) == 2, "");
+ static_assert(foobar.find_last_not_of('a', 4) == 3, "");
+ static_assert(foobar.find_last_not_of("ox", 2, 2) == 0, "");
+ static_assert(foobar.find_last_not_of("ox", 2) == 0, "");
+}
+
} // namespace base
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc
index 40dedb7..24b4a21 100644
--- a/base/strings/string_split.cc
+++ b/base/strings/string_split.cc
@@ -55,12 +55,12 @@
result_type);
}
-std::vector<string16> SplitString(StringPiece16 input,
- StringPiece16 separators,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- return internal::SplitStringT<string16>(input, separators, whitespace,
- result_type);
+std::vector<std::u16string> SplitString(StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return internal::SplitStringT<std::u16string>(input, separators, whitespace,
+ result_type);
}
std::vector<StringPiece> SplitStringPiece(StringPiece input,
@@ -110,12 +110,13 @@
return success;
}
-std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
- StringPiece16 delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
- return internal::SplitStringUsingSubstrT<string16>(input, delimiter,
- whitespace, result_type);
+std::vector<std::u16string> SplitStringUsingSubstr(
+ StringPiece16 input,
+ StringPiece16 delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return internal::SplitStringUsingSubstrT<std::u16string>(
+ input, delimiter, whitespace, result_type);
}
std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
diff --git a/base/strings/string_split.h b/base/strings/string_split.h
index 039a049..d7f56a6 100644
--- a/base/strings/string_split.h
+++ b/base/strings/string_split.h
@@ -10,7 +10,6 @@
#include <vector>
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
@@ -51,11 +50,11 @@
WhitespaceHandling whitespace,
SplitResult result_type)
WARN_UNUSED_RESULT;
-BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input,
- StringPiece16 separators,
- WhitespaceHandling whitespace,
- SplitResult result_type)
- WARN_UNUSED_RESULT;
+BASE_EXPORT std::vector<std::u16string> SplitString(
+ StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
// Like SplitString above except it returns a vector of StringPieces which
// reference the original buffer without copying. Although you have to be
@@ -103,7 +102,7 @@
// Similar to SplitString, but use a substring delimiter instead of a list of
// characters that are all possible delimiters.
-BASE_EXPORT std::vector<string16> SplitStringUsingSubstr(
+BASE_EXPORT std::vector<std::u16string> SplitStringUsingSubstr(
StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
diff --git a/base/strings/string_split_internal.h b/base/strings/string_split_internal.h
index 9dc3763..4430381 100644
--- a/base/strings/string_split_internal.h
+++ b/base/strings/string_split_internal.h
@@ -15,45 +15,46 @@
namespace internal {
// Returns either the ASCII or UTF-16 whitespace.
-template <typename Str>
-BasicStringPiece<Str> WhitespaceForType();
+template <typename CharT>
+BasicStringPiece<CharT> WhitespaceForType();
template <>
-inline StringPiece16 WhitespaceForType<string16>() {
+inline StringPiece16 WhitespaceForType<char16_t>() {
return kWhitespaceUTF16;
}
template <>
-inline StringPiece WhitespaceForType<std::string>() {
+inline StringPiece WhitespaceForType<char>() {
return kWhitespaceASCII;
}
// General string splitter template. Can take 8- or 16-bit input, can produce
// the corresponding string or StringPiece output.
-template <typename OutputStringType, typename Str>
-static std::vector<OutputStringType> SplitStringT(
- BasicStringPiece<Str> str,
- BasicStringPiece<Str> delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type) {
+template <typename OutputStringType,
+ typename T,
+ typename CharT = typename T::value_type>
+static std::vector<OutputStringType> SplitStringT(T str,
+ T delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
std::vector<OutputStringType> result;
if (str.empty())
return result;
size_t start = 0;
- while (start != Str::npos) {
+ while (start != std::basic_string<CharT>::npos) {
size_t end = str.find_first_of(delimiter, start);
- BasicStringPiece<Str> piece;
- if (end == Str::npos) {
+ BasicStringPiece<CharT> piece;
+ if (end == std::basic_string<CharT>::npos) {
piece = str.substr(start);
- start = Str::npos;
+ start = std::basic_string<CharT>::npos;
} else {
piece = str.substr(start, end - start);
start = end + 1;
}
if (whitespace == TRIM_WHITESPACE)
- piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
+ piece = TrimString(piece, WhitespaceForType<CharT>(), TRIM_ALL);
if (result_type == SPLIT_WANT_ALL || !piece.empty())
result.emplace_back(piece);
@@ -61,13 +62,15 @@
return result;
}
-template <typename OutputStringType, typename Str>
+template <typename OutputStringType,
+ typename T,
+ typename CharT = typename T::value_type>
std::vector<OutputStringType> SplitStringUsingSubstrT(
- BasicStringPiece<Str> input,
- BasicStringPiece<Str> delimiter,
+ T input,
+ T delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- using Piece = BasicStringPiece<Str>;
+ using Piece = BasicStringPiece<CharT>;
using size_type = typename Piece::size_type;
std::vector<OutputStringType> result;
@@ -84,7 +87,7 @@
: input.substr(begin_index, end_index - begin_index);
if (whitespace == TRIM_WHITESPACE)
- term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
+ term = TrimString(term, WhitespaceForType<CharT>(), TRIM_ALL);
if (result_type == SPLIT_WANT_ALL || !term.empty())
result.emplace_back(term);
diff --git a/base/strings/string_split_unittest.cc b/base/strings/string_split_unittest.cc
index a3e13fa..5bafec5 100644
--- a/base/strings/string_split_unittest.cc
+++ b/base/strings/string_split_unittest.cc
@@ -6,7 +6,6 @@
#include <stddef.h>
-#include "base/macros.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gmock/include/gmock/gmock.h"
diff --git a/base/strings/string_split_win.cc b/base/strings/string_split_win.cc
index 297853c..1327769 100644
--- a/base/strings/string_split_win.cc
+++ b/base/strings/string_split_win.cc
@@ -12,11 +12,10 @@
namespace gurl_base {
-#if defined(BASE_STRING16_IS_STD_U16STRING)
namespace internal {
template <>
-inline WStringPiece WhitespaceForType<std::wstring>() {
+inline WStringPiece WhitespaceForType<wchar_t>() {
return kWhitespaceWide;
}
@@ -54,6 +53,5 @@
return internal::SplitStringUsingSubstrT<WStringPiece>(
input, delimiter, whitespace, result_type);
}
-#endif
} // namespace base
diff --git a/base/strings/string_split_win.h b/base/strings/string_split_win.h
index 080641c..850d2ca 100644
--- a/base/strings/string_split_win.h
+++ b/base/strings/string_split_win.h
@@ -10,19 +10,13 @@
#include "polyfills/base/base_export.h"
#include "base/compiler_specific.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
namespace gurl_base {
// The following section contains overloads of the cross-platform APIs for
-// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring
-// and gurl_base::string16 are distinct types, as otherwise this would result in an
-// ODR violation.
-// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is
-// std::u16string.
-#if defined(BASE_STRING16_IS_STD_U16STRING)
+// std::wstring and gurl_base::WStringPiece.
BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input,
WStringPiece separators,
WhitespaceHandling whitespace,
@@ -46,7 +40,6 @@
WStringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
-#endif
} // namespace base
diff --git a/base/strings/string_tokenizer.h b/base/strings/string_tokenizer.h
index 7ee0178..14db1e1 100644
--- a/base/strings/string_tokenizer.h
+++ b/base/strings/string_tokenizer.h
@@ -9,13 +9,17 @@
#include <string>
#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
namespace gurl_base {
// StringTokenizerT is a simple string tokenizer class. It works like an
// iterator that with each step (see the Advance method) updates members that
// refer to the next token in the input string. The user may optionally
-// configure the tokenizer to return delimiters.
+// configure the tokenizer to return delimiters. For the optional
+// WhitespacePolicy parameter, kSkipOver will cause the tokenizer to skip
+// over whitespace characters. The tokenizer never stops on a whitespace
+// character.
//
// EXAMPLE 1:
//
@@ -80,6 +84,23 @@
// }
//
//
+// EXAMPLE 4:
+//
+// std::string input = "this, \t is, \t a, \t test";
+// StringTokenizer t(input, ",",
+// StringTokenizer::WhitespacePolicy::kSkipOver);
+// while (t.GetNext()) {
+// printf("%s\n", t.token().c_str());
+// }
+//
+// Output:
+//
+// this
+// is
+// a
+// test
+//
+//
template <class str, class const_iterator>
class StringTokenizerT {
public:
@@ -96,23 +117,41 @@
RETURN_EMPTY_TOKENS = 1 << 1,
};
+ // Policy indicating what to do with whitespace characters. Whitespace is
+ // defined to be the characters indicated here:
+ // https://www.w3schools.com/jsref/jsref_regexp_whitespace.asp
+ enum class WhitespacePolicy {
+ // Whitespace should be treated the same as any other non-delimiter
+ // character.
+ kIncludeInTokens,
+ // Whitespace is skipped over and not included in the resulting token.
+ // Whitespace will also delimit other tokens, however it is never returned
+ // even if RETURN_DELIMS is set. If quote chars are set (See set_quote_chars
+ // below) Whitespace will be included in a token when processing quotes.
+ kSkipOver,
+ };
+
// The string object must live longer than the tokenizer. In particular, this
// should not be constructed with a temporary. The deleted rvalue constructor
// blocks the most obvious instances of this (e.g. passing a string literal to
// the constructor), but caution must still be exercised.
- StringTokenizerT(const str& string,
- const str& delims) {
- Init(string.begin(), string.end(), delims);
+ StringTokenizerT(
+ const str& string,
+ const str& delims,
+ WhitespacePolicy whitespace_policy = WhitespacePolicy::kIncludeInTokens) {
+ Init(string.begin(), string.end(), delims, whitespace_policy);
}
// Don't allow temporary strings to be used with string tokenizer, since
// Init() would otherwise save iterators to a temporary string.
StringTokenizerT(str&&, const str& delims) = delete;
- StringTokenizerT(const_iterator string_begin,
- const_iterator string_end,
- const str& delims) {
- Init(string_begin, string_end, delims);
+ StringTokenizerT(
+ const_iterator string_begin,
+ const_iterator string_end,
+ const str& delims,
+ WhitespacePolicy whitespace_policy = WhitespacePolicy::kIncludeInTokens) {
+ Init(string_begin, string_end, delims, whitespace_policy);
}
// Set the options for this tokenizer. By default, this is 0.
@@ -151,15 +190,15 @@
const_iterator token_begin() const { return token_begin_; }
const_iterator token_end() const { return token_end_; }
str token() const { return str(token_begin_, token_end_); }
- BasicStringPiece<str> token_piece() const {
- return BasicStringPiece<str>(&*token_begin_,
- std::distance(token_begin_, token_end_));
+ BasicStringPiece<char_type> token_piece() const {
+ return MakeBasicStringPiece<char_type>(token_begin_, token_end_);
}
private:
void Init(const_iterator string_begin,
const_iterator string_end,
- const str& delims) {
+ const str& delims,
+ WhitespacePolicy whitespace_policy) {
start_pos_ = string_begin;
token_begin_ = string_begin;
token_end_ = string_begin;
@@ -167,6 +206,19 @@
delims_ = delims;
options_ = 0;
token_is_delim_ = true;
+ whitespace_policy_ = whitespace_policy;
+ }
+
+ bool ShouldSkip(char_type c) const {
+ return whitespace_policy_ == WhitespacePolicy::kSkipOver &&
+ IsAsciiWhitespace(c);
+ }
+
+ // Skip over any contiguous whitespace characters according to the whitespace
+ // policy.
+ void SkipWhitespace() {
+ while (token_end_ != end_ && ShouldSkip(*token_end_))
+ ++token_end_;
}
// Implementation of GetNext() for when we have no quote characters. We have
@@ -181,12 +233,16 @@
return false;
}
++token_end_;
- if (delims_.find(*token_begin_) == str::npos)
+ if (delims_.find(*token_begin_) == str::npos &&
+ !ShouldSkip(*token_begin_)) {
break;
- // else skip over delimiter.
+ }
+ // else skip over delimiter or skippable character.
}
- while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
+ while (token_end_ != end_ && delims_.find(*token_end_) == str::npos &&
+ !ShouldSkip(*token_end_)) {
++token_end_;
+ }
return true;
}
@@ -194,6 +250,7 @@
bool FullGetNext() {
AdvanceState state;
+ SkipWhitespace();
for (;;) {
if (token_is_delim_) {
// Last token was a delimiter. Note: This is also the case at the start.
@@ -250,13 +307,9 @@
return false;
}
- bool IsDelim(char_type c) const {
- return delims_.find(c) != str::npos;
- }
+ bool IsDelim(char_type c) const { return delims_.find(c) != str::npos; }
- bool IsQuote(char_type c) const {
- return quotes_.find(c) != str::npos;
- }
+ bool IsQuote(char_type c) const { return quotes_.find(c) != str::npos; }
struct AdvanceState {
bool in_quote;
@@ -265,7 +318,8 @@
AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}
};
- // Returns true if a delimiter was not hit.
+ // Returns true if a delimiter or, depending on policy, whitespace was not
+ // hit.
bool AdvanceOne(AdvanceState* state, char_type c) {
if (state->in_quote) {
if (state->in_escape) {
@@ -276,7 +330,7 @@
state->in_quote = false;
}
} else {
- if (IsDelim(c))
+ if (IsDelim(c) || ShouldSkip(c))
return false;
state->in_quote = IsQuote(state->quote_char = c);
}
@@ -291,11 +345,13 @@
str quotes_;
int options_;
bool token_is_delim_;
+ WhitespacePolicy whitespace_policy_;
};
typedef StringTokenizerT<std::string, std::string::const_iterator>
StringTokenizer;
-typedef StringTokenizerT<string16, string16::const_iterator> String16Tokenizer;
+typedef StringTokenizerT<std::u16string, std::u16string::const_iterator>
+ String16Tokenizer;
typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
} // namespace base
diff --git a/base/strings/string_tokenizer_unittest.cc b/base/strings/string_tokenizer_unittest.cc
index 1665d5d..9cca0c1 100644
--- a/base/strings/string_tokenizer_unittest.cc
+++ b/base/strings/string_tokenizer_unittest.cc
@@ -382,6 +382,36 @@
EXPECT_FALSE(t.GetNext());
}
+TEST(StringTokenizerTest, ParseWithWhitespace_NoQuotes) {
+ string input = "\t\t\t foo=a,\r\n b,\r\n\t\t\t bar\t ";
+ StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
+
+ EXPECT_TRUE(t.GetNext());
+ EXPECT_EQ("foo=a", t.token());
+
+ EXPECT_TRUE(t.GetNext());
+ EXPECT_EQ("b", t.token());
+
+ EXPECT_TRUE(t.GetNext());
+ EXPECT_EQ("bar", t.token());
+
+ EXPECT_FALSE(t.GetNext());
+}
+
+TEST(StringTokenizerTest, ParseWithWhitespace_Quotes) {
+ string input = "\t\t\t foo='a, b',\t\t\t bar\t ";
+ StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
+ t.set_quote_chars("'");
+
+ EXPECT_TRUE(t.GetNext());
+ EXPECT_EQ("foo='a, b'", t.token());
+
+ EXPECT_TRUE(t.GetNext());
+ EXPECT_EQ("bar", t.token());
+
+ EXPECT_FALSE(t.GetNext());
+}
+
} // namespace
} // namespace base
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index c2e440f..f76d2f7 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc
@@ -71,7 +71,7 @@
return internal::ToLowerASCIIImpl(str);
}
-string16 ToLowerASCII(StringPiece16 str) {
+std::u16string ToLowerASCII(StringPiece16 str) {
return internal::ToLowerASCIIImpl(str);
}
@@ -79,7 +79,7 @@
return internal::ToUpperASCIIImpl(str);
}
-string16 ToUpperASCII(StringPiece16 str) {
+std::u16string ToUpperASCII(StringPiece16 str) {
return internal::ToUpperASCIIImpl(str);
}
@@ -106,15 +106,15 @@
return *s;
}
-const string16& EmptyString16() {
- static const gurl_base::NoDestructor<string16> s16;
+const std::u16string& EmptyString16() {
+ static const gurl_base::NoDestructor<std::u16string> s16;
return *s16;
}
bool ReplaceChars(StringPiece16 input,
StringPiece16 replace_chars,
StringPiece16 replace_with,
- string16* output) {
+ std::u16string* output) {
return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
}
@@ -127,7 +127,7 @@
bool RemoveChars(StringPiece16 input,
StringPiece16 remove_chars,
- string16* output) {
+ std::u16string* output) {
return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output);
}
@@ -139,7 +139,7 @@
bool TrimString(StringPiece16 input,
StringPiece16 trim_chars,
- string16* output) {
+ std::u16string* output) {
return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
TRIM_NONE;
}
@@ -202,7 +202,7 @@
TrimPositions TrimWhitespace(StringPiece16 input,
TrimPositions positions,
- string16* output) {
+ std::u16string* output) {
return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16),
positions, output);
}
@@ -225,8 +225,8 @@
positions);
}
-string16 CollapseWhitespace(StringPiece16 text,
- bool trim_sequences_with_line_breaks) {
+std::u16string CollapseWhitespace(StringPiece16 text,
+ bool trim_sequences_with_line_breaks) {
return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
}
@@ -331,7 +331,7 @@
" PB"
};
-string16 FormatBytesUnlocalized(int64_t bytes) {
+std::u16string FormatBytesUnlocalized(int64_t bytes) {
double unit_amount = static_cast<double>(bytes);
size_t dimension = 0;
const int kKilo = 1024;
@@ -353,12 +353,12 @@
return ASCIIToUTF16(buf);
}
-void ReplaceFirstSubstringAfterOffset(string16* str,
+void ReplaceFirstSubstringAfterOffset(std::u16string* str,
size_t start_offset,
StringPiece16 find_this,
StringPiece16 replace_with) {
internal::DoReplaceMatchesAfterOffset(
- str, start_offset, internal::SubstringMatcher<string16>{find_this},
+ str, start_offset, internal::MakeSubstringMatcher(find_this),
replace_with, internal::ReplaceType::REPLACE_FIRST);
}
@@ -367,16 +367,16 @@
StringPiece find_this,
StringPiece replace_with) {
internal::DoReplaceMatchesAfterOffset(
- str, start_offset, internal::SubstringMatcher<std::string>{find_this},
+ str, start_offset, internal::MakeSubstringMatcher(find_this),
replace_with, internal::ReplaceType::REPLACE_FIRST);
}
-void ReplaceSubstringsAfterOffset(string16* str,
+void ReplaceSubstringsAfterOffset(std::u16string* str,
size_t start_offset,
StringPiece16 find_this,
StringPiece16 replace_with) {
internal::DoReplaceMatchesAfterOffset(
- str, start_offset, internal::SubstringMatcher<string16>{find_this},
+ str, start_offset, internal::MakeSubstringMatcher(find_this),
replace_with, internal::ReplaceType::REPLACE_ALL);
}
@@ -385,7 +385,7 @@
StringPiece find_this,
StringPiece replace_with) {
internal::DoReplaceMatchesAfterOffset(
- str, start_offset, internal::SubstringMatcher<std::string>{find_this},
+ str, start_offset, internal::MakeSubstringMatcher(find_this),
replace_with, internal::ReplaceType::REPLACE_ALL);
}
@@ -393,7 +393,7 @@
return internal::WriteIntoT(str, length_with_null);
}
-char16* WriteInto(string16* str, size_t length_with_null) {
+char16_t* WriteInto(std::u16string* str, size_t length_with_null) {
return internal::WriteIntoT(str, length_with_null);
}
@@ -401,7 +401,8 @@
return internal::JoinStringT(parts, separator);
}
-string16 JoinString(span<const string16> parts, StringPiece16 separator) {
+std::u16string JoinString(span<const std::u16string> parts,
+ StringPiece16 separator) {
return internal::JoinStringT(parts, separator);
}
@@ -409,7 +410,8 @@
return internal::JoinStringT(parts, separator);
}
-string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) {
+std::u16string JoinString(span<const StringPiece16> parts,
+ StringPiece16 separator) {
return internal::JoinStringT(parts, separator);
}
@@ -418,14 +420,15 @@
return internal::JoinStringT(parts, separator);
}
-string16 JoinString(std::initializer_list<StringPiece16> parts,
- StringPiece16 separator) {
+std::u16string JoinString(std::initializer_list<StringPiece16> parts,
+ StringPiece16 separator) {
return internal::JoinStringT(parts, separator);
}
-string16 ReplaceStringPlaceholders(StringPiece16 format_string,
- const std::vector<string16>& subst,
- std::vector<size_t>* offsets) {
+std::u16string ReplaceStringPlaceholders(
+ StringPiece16 format_string,
+ const std::vector<std::u16string>& subst,
+ std::vector<size_t>* offsets) {
return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
}
@@ -435,11 +438,12 @@
return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
}
-string16 ReplaceStringPlaceholders(const string16& format_string,
- const string16& a,
- size_t* offset) {
+std::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
+ const std::u16string& a,
+ size_t* offset) {
std::vector<size_t> offsets;
- string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets);
+ std::u16string result =
+ ReplaceStringPlaceholders(format_string, {a}, &offsets);
GURL_DCHECK_EQ(1U, offsets.size());
if (offset)
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index a1e5c59..ccbf745 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -20,8 +20,6 @@
#include "polyfills/base/base_export.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
-#include "base/stl_util.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h" // For implicit conversions.
#include "build/build_config.h"
@@ -86,27 +84,27 @@
// Simplified implementation of C++20's std::basic_string_view(It, End).
// Reference: https://wg21.link/string.view.cons
-template <typename StringT, typename Iter>
-constexpr BasicStringPiece<StringT> MakeBasicStringPiece(Iter begin, Iter end) {
+template <typename CharT, typename Iter>
+constexpr BasicStringPiece<CharT> MakeBasicStringPiece(Iter begin, Iter end) {
GURL_DCHECK_GE(end - begin, 0);
- return {gurl_base::to_address(begin), end - begin};
+ return {gurl_base::to_address(begin), static_cast<size_t>(end - begin)};
}
// Explicit instantiations of MakeBasicStringPiece for the BasicStringPiece
// aliases defined in base/strings/string_piece_forward.h
template <typename Iter>
constexpr StringPiece MakeStringPiece(Iter begin, Iter end) {
- return MakeBasicStringPiece<std::string>(begin, end);
+ return MakeBasicStringPiece<char>(begin, end);
}
template <typename Iter>
constexpr StringPiece16 MakeStringPiece16(Iter begin, Iter end) {
- return MakeBasicStringPiece<string16>(begin, end);
+ return MakeBasicStringPiece<char16_t>(begin, end);
}
template <typename Iter>
constexpr WStringPiece MakeWStringPiece(Iter begin, Iter end) {
- return MakeBasicStringPiece<std::wstring>(begin, end);
+ return MakeBasicStringPiece<wchar_t>(begin, end);
}
// ASCII-specific tolower. The standard library's tolower is locale sensitive,
@@ -127,11 +125,11 @@
// Converts the given string to it's ASCII-lowercase equivalent.
BASE_EXPORT std::string ToLowerASCII(StringPiece str);
-BASE_EXPORT string16 ToLowerASCII(StringPiece16 str);
+BASE_EXPORT std::u16string ToLowerASCII(StringPiece16 str);
// Converts the given string to it's ASCII-uppercase equivalent.
BASE_EXPORT std::string ToUpperASCII(StringPiece str);
-BASE_EXPORT string16 ToUpperASCII(StringPiece16 str);
+BASE_EXPORT std::u16string ToUpperASCII(StringPiece16 str);
// Functor for case-insensitive ASCII comparisons for STL algorithms like
// std::search.
@@ -178,16 +176,17 @@
// These should not be used as initializers, function arguments, or return
// values for functions which return by value or outparam.
BASE_EXPORT const std::string& EmptyString();
-BASE_EXPORT const string16& EmptyString16();
+BASE_EXPORT const std::u16string& EmptyString16();
// Contains the set of characters representing whitespace in the corresponding
// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
// by HTML5, and don't include control characters.
BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
-BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
-BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF.
+BASE_EXPORT extern const char16_t kWhitespaceUTF16[]; // Includes Unicode.
+BASE_EXPORT extern const char16_t
+ kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF.
BASE_EXPORT extern const char kWhitespaceASCII[];
-BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
+BASE_EXPORT extern const char16_t kWhitespaceASCIIAs16[]; // No unicode.
// Null-terminated string representing the UTF-8 byte order mark.
BASE_EXPORT extern const char kUtf8ByteOrderMark[];
@@ -197,7 +196,7 @@
// NOTE: Safe to use the same variable for both |input| and |output|.
BASE_EXPORT bool RemoveChars(StringPiece16 input,
StringPiece16 remove_chars,
- string16* output);
+ std::u16string* output);
BASE_EXPORT bool RemoveChars(StringPiece input,
StringPiece remove_chars,
std::string* output);
@@ -210,7 +209,7 @@
BASE_EXPORT bool ReplaceChars(StringPiece16 input,
StringPiece16 replace_chars,
StringPiece16 replace_with,
- string16* output);
+ std::u16string* output);
BASE_EXPORT bool ReplaceChars(StringPiece input,
StringPiece replace_chars,
StringPiece replace_with,
@@ -231,7 +230,7 @@
// the normal usage to trim in-place).
BASE_EXPORT bool TrimString(StringPiece16 input,
StringPiece16 trim_chars,
- string16* output);
+ std::u16string* output);
BASE_EXPORT bool TrimString(StringPiece input,
StringPiece trim_chars,
std::string* output);
@@ -260,7 +259,7 @@
// NOTE: Safe to use the same variable for both input and output.
BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input,
TrimPositions positions,
- string16* output);
+ std::u16string* output);
BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
TrimPositions positions);
BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input,
@@ -277,8 +276,9 @@
// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
// sequences containing a CR or LF are trimmed.
// (3) All other whitespace sequences are converted to single spaces.
-BASE_EXPORT string16 CollapseWhitespace(StringPiece16 text,
- bool trim_sequences_with_line_breaks);
+BASE_EXPORT std::u16string CollapseWhitespace(
+ StringPiece16 text,
+ bool trim_sequences_with_line_breaks);
BASE_EXPORT std::string CollapseWhitespaceASCII(
StringPiece text,
bool trim_sequences_with_line_breaks);
@@ -403,15 +403,14 @@
// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
// highly recommended instead. TODO(avi): Figure out how to get callers to use
// FormatBytes instead; remove this.
-BASE_EXPORT string16 FormatBytesUnlocalized(int64_t bytes);
+BASE_EXPORT std::u16string FormatBytesUnlocalized(int64_t bytes);
// Starting at |start_offset| (usually 0), replace the first instance of
// |find_this| with |replace_with|.
-BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
- gurl_base::string16* str,
- size_t start_offset,
- StringPiece16 find_this,
- StringPiece16 replace_with);
+BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::u16string* str,
+ size_t start_offset,
+ StringPiece16 find_this,
+ StringPiece16 replace_with);
BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
std::string* str,
size_t start_offset,
@@ -424,11 +423,10 @@
// This does entire substrings; use std::replace in <algorithm> for single
// characters, for example:
// std::replace(str.begin(), str.end(), 'a', 'b');
-BASE_EXPORT void ReplaceSubstringsAfterOffset(
- string16* str,
- size_t start_offset,
- StringPiece16 find_this,
- StringPiece16 replace_with);
+BASE_EXPORT void ReplaceSubstringsAfterOffset(std::u16string* str,
+ size_t start_offset,
+ StringPiece16 find_this,
+ StringPiece16 replace_with);
BASE_EXPORT void ReplaceSubstringsAfterOffset(
std::string* str,
size_t start_offset,
@@ -452,7 +450,7 @@
// than str.c_str() will get back a string of whatever size |str| had on entry
// to this function (probably 0).
BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
-BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
+BASE_EXPORT char16_t* WriteInto(std::u16string* str, size_t length_with_null);
// Joins a list of strings into a single string, inserting |separator| (which
// may be empty) in between all elements.
@@ -469,28 +467,29 @@
// Use StrCat (in base/strings/strcat.h) if you don't need a separator.
BASE_EXPORT std::string JoinString(span<const std::string> parts,
StringPiece separator);
-BASE_EXPORT string16 JoinString(span<const string16> parts,
- StringPiece16 separator);
+BASE_EXPORT std::u16string JoinString(span<const std::u16string> parts,
+ StringPiece16 separator);
BASE_EXPORT std::string JoinString(span<const StringPiece> parts,
StringPiece separator);
-BASE_EXPORT string16 JoinString(span<const StringPiece16> parts,
- StringPiece16 separator);
+BASE_EXPORT std::u16string JoinString(span<const StringPiece16> parts,
+ StringPiece16 separator);
// Explicit initializer_list overloads are required to break ambiguity when used
// with a literal initializer list (otherwise the compiler would not be able to
// decide between the string and StringPiece overloads).
BASE_EXPORT std::string JoinString(std::initializer_list<StringPiece> parts,
StringPiece separator);
-BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts,
- StringPiece16 separator);
+BASE_EXPORT std::u16string JoinString(
+ std::initializer_list<StringPiece16> parts,
+ StringPiece16 separator);
// Replace $1-$2-$3..$9 in the format string with values from |subst|.
// Additionally, any number of consecutive '$' characters is replaced by that
// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
// NULL. This only allows you to use up to nine replacements.
-BASE_EXPORT string16
-ReplaceStringPlaceholders(StringPiece16 format_string,
- const std::vector<string16>& subst,
- std::vector<size_t>* offsets);
+BASE_EXPORT std::u16string ReplaceStringPlaceholders(
+ StringPiece16 format_string,
+ const std::vector<std::u16string>& subst,
+ std::vector<size_t>* offsets);
BASE_EXPORT std::string ReplaceStringPlaceholders(
StringPiece format_string,
@@ -498,9 +497,10 @@
std::vector<size_t>* offsets);
// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
-BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
- const string16& a,
- size_t* offset);
+BASE_EXPORT std::u16string ReplaceStringPlaceholders(
+ const std::u16string& format_string,
+ const std::u16string& a,
+ size_t* offset);
} // namespace base
diff --git a/base/strings/string_util_constants.cc b/base/strings/string_util_constants.cc
index e9e4d93..198cd53 100644
--- a/base/strings/string_util_constants.cc
+++ b/base/strings/string_util_constants.cc
@@ -44,10 +44,10 @@
#define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII
const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0};
-const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0};
-const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0};
+const char16_t kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0};
+const char16_t kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0};
const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0};
-const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0};
+const char16_t kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0};
const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h
index ccc1367..07f4930 100644
--- a/base/strings/string_util_internal.h
+++ b/base/strings/string_util_internal.h
@@ -43,34 +43,33 @@
return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1));
}
-template <typename StringType>
-StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
- StringType ret;
+template <typename T, typename CharT = typename T::value_type>
+std::basic_string<CharT> ToLowerASCIIImpl(T str) {
+ std::basic_string<CharT> ret;
ret.reserve(str.size());
for (size_t i = 0; i < str.size(); i++)
ret.push_back(ToLowerASCII(str[i]));
return ret;
}
-template <typename StringType>
-StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
- StringType ret;
+template <typename T, typename CharT = typename T::value_type>
+std::basic_string<CharT> ToUpperASCIIImpl(T str) {
+ std::basic_string<CharT> ret;
ret.reserve(str.size());
for (size_t i = 0; i < str.size(); i++)
ret.push_back(ToUpperASCII(str[i]));
return ret;
}
-template <class StringType>
-int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
- BasicStringPiece<StringType> b) {
+template <typename T, typename CharT = typename T::value_type>
+int CompareCaseInsensitiveASCIIT(T a, T b) {
// Find the first characters that aren't equal and compare them. If the end
// of one of the strings is found before a nonequal character, the lengths
// of the strings are compared.
size_t i = 0;
while (i < a.length() && i < b.length()) {
- typename StringType::value_type lower_a = ToLowerASCII(a[i]);
- typename StringType::value_type lower_b = ToLowerASCII(b[i]);
+ CharT lower_a = ToLowerASCII(a[i]);
+ CharT lower_b = ToLowerASCII(b[i]);
if (lower_a < lower_b)
return -1;
if (lower_a > lower_b)
@@ -88,11 +87,11 @@
return 1;
}
-template <typename Str>
-TrimPositions TrimStringT(BasicStringPiece<Str> input,
- BasicStringPiece<Str> trim_chars,
+template <typename T, typename CharT = typename T::value_type>
+TrimPositions TrimStringT(T input,
+ T trim_chars,
TrimPositions positions,
- Str* output) {
+ std::basic_string<CharT>* output) {
// Find the edges of leading/trailing whitespace as desired. Need to use
// a StringPiece version of input to be able to call find* on it with the
// StringPiece version of trim_chars (normally the trim_chars will be a
@@ -107,8 +106,8 @@
// When the string was all trimmed, report that we stripped off characters
// from whichever position the caller was interested in. For empty input, we
// stripped no characters, but we still need to clear |output|.
- if (input.empty() || first_good_char == Str::npos ||
- last_good_char == Str::npos) {
+ if (input.empty() || first_good_char == std::basic_string<CharT>::npos ||
+ last_good_char == std::basic_string<CharT>::npos) {
bool input_was_empty = input.empty(); // in case output == &input
output->clear();
return input_was_empty ? TRIM_NONE : positions;
@@ -124,10 +123,8 @@
(last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING));
}
-template <typename Str>
-BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
- BasicStringPiece<Str> trim_chars,
- TrimPositions positions) {
+template <typename T, typename CharT = typename T::value_type>
+T TrimStringPieceT(T input, T trim_chars, TrimPositions positions) {
size_t begin =
(positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0;
size_t end = (positions & TRIM_TRAILING)
@@ -136,10 +133,11 @@
return input.substr(std::min(begin, input.size()), end - begin);
}
-template <typename STR>
-STR CollapseWhitespaceT(BasicStringPiece<STR> text,
- bool trim_sequences_with_line_breaks) {
- STR result;
+template <typename T, typename CharT = typename T::value_type>
+std::basic_string<CharT> CollapseWhitespaceT(
+ T text,
+ bool trim_sequences_with_line_breaks) {
+ std::basic_string<CharT> result;
result.resize(text.size());
// Set flags to pretend we're already in a trimmed whitespace sequence, so we
@@ -257,31 +255,27 @@
// The hardcoded strings are typically very short so it doesn't matter, and the
// string piece gives additional flexibility for the caller (doesn't have to be
// null terminated) so we choose the StringPiece route.
-template <typename Str>
-inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
- StringPiece lowercase_ascii) {
+template <typename T, typename CharT = typename T::value_type>
+inline bool DoLowerCaseEqualsASCII(T str, StringPiece lowercase_ascii) {
return std::equal(
str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(),
[](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; });
}
-template <typename Str>
-bool StartsWithT(BasicStringPiece<Str> str,
- BasicStringPiece<Str> search_for,
- CompareCase case_sensitivity) {
+template <typename T, typename CharT = typename T::value_type>
+bool StartsWithT(T str, T search_for, CompareCase case_sensitivity) {
if (search_for.size() > str.size())
return false;
- BasicStringPiece<Str> source = str.substr(0, search_for.size());
+ BasicStringPiece<CharT> source = str.substr(0, search_for.size());
switch (case_sensitivity) {
case CompareCase::SENSITIVE:
return source == search_for;
case CompareCase::INSENSITIVE_ASCII:
- return std::equal(
- search_for.begin(), search_for.end(), source.begin(),
- CaseInsensitiveCompareASCII<typename Str::value_type>());
+ return std::equal(search_for.begin(), search_for.end(), source.begin(),
+ CaseInsensitiveCompareASCII<CharT>());
default:
GURL_NOTREACHED();
@@ -289,14 +283,12 @@
}
}
-template <typename Str>
-bool EndsWithT(BasicStringPiece<Str> str,
- BasicStringPiece<Str> search_for,
- CompareCase case_sensitivity) {
+template <typename T, typename CharT = typename T::value_type>
+bool EndsWithT(T str, T search_for, CompareCase case_sensitivity) {
if (search_for.size() > str.size())
return false;
- BasicStringPiece<Str> source =
+ BasicStringPiece<CharT> source =
str.substr(str.size() - search_for.size(), search_for.size());
switch (case_sensitivity) {
@@ -304,9 +296,8 @@
return source == search_for;
case CompareCase::INSENSITIVE_ASCII:
- return std::equal(
- source.begin(), source.end(), search_for.begin(),
- CaseInsensitiveCompareASCII<typename Str::value_type>());
+ return std::equal(source.begin(), source.end(), search_for.begin(),
+ CaseInsensitiveCompareASCII<CharT>());
default:
GURL_NOTREACHED();
@@ -315,28 +306,40 @@
}
// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings.
-template <class StringType>
+template <class CharT>
struct SubstringMatcher {
- BasicStringPiece<StringType> find_this;
+ BasicStringPiece<CharT> find_this;
- size_t Find(const StringType& input, size_t pos) {
+ size_t Find(const std::basic_string<CharT>& input, size_t pos) {
return input.find(find_this.data(), pos, find_this.length());
}
size_t MatchSize() { return find_this.length(); }
};
-// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters.
-template <class StringType>
-struct CharacterMatcher {
- BasicStringPiece<StringType> find_any_of_these;
+// Type deduction helper for SubstringMatcher.
+template <typename T, typename CharT = typename T::value_type>
+auto MakeSubstringMatcher(T find_this) {
+ return SubstringMatcher<CharT>{find_this};
+}
- size_t Find(const StringType& input, size_t pos) {
+// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters.
+template <class CharT>
+struct CharacterMatcher {
+ BasicStringPiece<CharT> find_any_of_these;
+
+ size_t Find(const std::basic_string<CharT>& input, size_t pos) {
return input.find_first_of(find_any_of_these.data(), pos,
find_any_of_these.length());
}
constexpr size_t MatchSize() { return 1; }
};
+// Type deduction helper for CharacterMatcher.
+template <typename T, typename CharT = typename T::value_type>
+auto MakeCharacterMatcher(T find_any_of_these) {
+ return CharacterMatcher<CharT>{find_any_of_these};
+}
+
enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST };
// Runs in O(n) time in the length of |str|, and transforms the string without
@@ -344,13 +347,13 @@
//
// This is parameterized on a |Matcher| traits type, so that it can be the
// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset().
-template <class StringType, class Matcher>
-bool DoReplaceMatchesAfterOffset(StringType* str,
+template <typename Matcher, typename T, typename CharT = typename T::value_type>
+bool DoReplaceMatchesAfterOffset(std::basic_string<CharT>* str,
size_t initial_offset,
Matcher matcher,
- BasicStringPiece<StringType> replace_with,
+ T replace_with,
ReplaceType replace_type) {
- using CharTraits = typename StringType::traits_type;
+ using CharTraits = std::char_traits<CharT>;
const size_t find_length = matcher.MatchSize();
if (!find_length)
@@ -358,7 +361,7 @@
// If the find string doesn't appear, there's nothing to do.
size_t first_match = matcher.Find(*str, initial_offset);
- if (first_match == StringType::npos)
+ if (first_match == std::basic_string<CharT>::npos)
return false;
// If we're only replacing one instance, there's no need to do anything
@@ -373,7 +376,7 @@
// replace() on each instance, and finish the entire operation in O(n) time.
if (find_length == replace_length) {
auto* buffer = &((*str)[0]);
- for (size_t offset = first_match; offset != StringType::npos;
+ for (size_t offset = first_match; offset != std::basic_string<CharT>::npos;
offset = matcher.Find(*str, offset + replace_length)) {
CharTraits::copy(buffer + offset, replace_with.data(), replace_length);
}
@@ -403,7 +406,7 @@
// matches.
const size_t expansion_per_match = (replace_length - find_length);
size_t num_matches = 0;
- for (size_t match = first_match; match != StringType::npos;
+ for (size_t match = first_match; match != std::basic_string<CharT>::npos;
match = matcher.Find(*str, match + find_length)) {
expansion += expansion_per_match;
++num_matches;
@@ -413,7 +416,7 @@
if (str->capacity() < final_length) {
// If we'd have to allocate a new buffer to grow the string, build the
// result directly into the new allocation via append().
- StringType src(str->get_allocator());
+ std::basic_string<CharT> src(str->get_allocator());
str->swap(src);
str->reserve(final_length);
@@ -471,7 +474,8 @@
}
read_offset += find_length;
- // min() clamps StringType::npos (the largest unsigned value) to str_length.
+ // min() clamps std::basic_string<CharT>::npos (the largest unsigned value)
+ // to str_length.
size_t match = std::min(matcher.Find(*str, read_offset), str_length);
size_t length = match - read_offset;
@@ -487,19 +491,19 @@
return true;
}
-template <class StringType>
-bool ReplaceCharsT(BasicStringPiece<StringType> input,
- BasicStringPiece<StringType> find_any_of_these,
- BasicStringPiece<StringType> replace_with,
- StringType* output) {
+template <typename T, typename CharT = typename T::value_type>
+bool ReplaceCharsT(T input,
+ T find_any_of_these,
+ T replace_with,
+ std::basic_string<CharT>* output) {
// Commonly, this is called with output and input being the same string; in
// that case, skip the copy.
if (input.data() != output->data() || input.size() != output->size())
output->assign(input.data(), input.size());
- return DoReplaceMatchesAfterOffset(
- output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with,
- ReplaceType::REPLACE_ALL);
+ return DoReplaceMatchesAfterOffset(output, 0,
+ MakeCharacterMatcher(find_any_of_these),
+ replace_with, ReplaceType::REPLACE_ALL);
}
template <class string_type>
@@ -513,20 +517,21 @@
// Generic version for all JoinString overloads. |list_type| must be a sequence
// (gurl_base::span or std::initializer_list) of strings/StringPieces (std::string,
-// string16, StringPiece or StringPiece16). |string_type| is either std::string
-// or string16.
-template <typename list_type, typename string_type>
-static string_type JoinStringT(list_type parts,
- BasicStringPiece<string_type> sep) {
+// std::u16string, StringPiece or StringPiece16). |CharT| is either char or
+// char16_t.
+template <typename list_type,
+ typename T,
+ typename CharT = typename T::value_type>
+static std::basic_string<CharT> JoinStringT(list_type parts, T sep) {
if (gurl_base::empty(parts))
- return string_type();
+ return std::basic_string<CharT>();
// Pre-allocate the eventual size of the string. Start with the size of all of
// the separators (note that this *assumes* parts.size() > 0).
size_t total_size = (parts.size() - 1) * sep.size();
for (const auto& part : parts)
total_size += part.size();
- string_type result;
+ std::basic_string<CharT> result;
result.reserve(total_size);
auto iter = parts.begin();
@@ -545,10 +550,10 @@
return result;
}
-template <class StringType>
-StringType DoReplaceStringPlaceholders(
- BasicStringPiece<StringType> format_string,
- const std::vector<StringType>& subst,
+template <typename T, typename CharT = typename T::value_type>
+std::basic_string<CharT> DoReplaceStringPlaceholders(
+ T format_string,
+ const std::vector<std::basic_string<CharT>>& subst,
std::vector<size_t>* offsets) {
size_t substitutions = subst.size();
GURL_DCHECK_LT(substitutions, 10U);
@@ -557,7 +562,7 @@
for (const auto& cur : subst)
sub_length += cur.length();
- StringType formatted;
+ std::basic_string<CharT> formatted;
formatted.reserve(format_string.length() + sub_length);
std::vector<ReplacementOffset> r_offsets;
diff --git a/base/strings/string_util_perftest.cc b/base/strings/string_util_perftest.cc
index 033df0e..8a5d540 100644
--- a/base/strings/string_util_perftest.cc
+++ b/base/strings/string_util_perftest.cc
@@ -34,7 +34,7 @@
for (size_t non_ascii_loc = 0; non_ascii_loc < 3; ++non_ascii_loc) {
size_t non_ascii_pos = str_length * non_ascii_loc / 2 + 2;
MeasureIsStringASCII<std::string>(str_length, non_ascii_pos);
- MeasureIsStringASCII<string16>(str_length, non_ascii_pos);
+ MeasureIsStringASCII<std::u16string>(str_length, non_ascii_pos);
#if defined(WCHAR_T_IS_UTF32)
MeasureIsStringASCII<std::basic_string<wchar_t>>(str_length,
non_ascii_pos);
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index beb99e2..f8326cc 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc
@@ -10,11 +10,12 @@
#include <stdint.h>
#include <algorithm>
+#include <string>
#include <type_traits>
#include "base/bits.h"
#include "base/stl_util.h"
-#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "testing/gmock/include/gmock/gmock.h"
@@ -355,23 +356,23 @@
#if defined(WCHAR_T_IS_UTF16)
TEST(StringUtilTest, as_wcstr) {
- char16 rw_buffer[10] = {};
+ char16_t rw_buffer[10] = {};
static_assert(
std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>::value,
"");
EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer));
- string16 rw_str(10, '\0');
+ std::u16string rw_str(10, '\0');
static_assert(
std::is_same<wchar_t*, decltype(as_writable_wcstr(rw_str))>::value, "");
EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str));
- const char16 ro_buffer[10] = {};
+ const char16_t ro_buffer[10] = {};
static_assert(
std::is_same<const wchar_t*, decltype(as_wcstr(ro_buffer))>::value, "");
EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer));
- const string16 ro_str(10, '\0');
+ const std::u16string ro_str(10, '\0');
static_assert(std::is_same<const wchar_t*, decltype(as_wcstr(ro_str))>::value,
"");
EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str));
@@ -385,35 +386,37 @@
TEST(StringUtilTest, as_u16cstr) {
wchar_t rw_buffer[10] = {};
static_assert(
- std::is_same<char16*, decltype(as_writable_u16cstr(rw_buffer))>::value,
+ std::is_same<char16_t*, decltype(as_writable_u16cstr(rw_buffer))>::value,
"");
EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer));
std::wstring rw_str(10, '\0');
static_assert(
- std::is_same<char16*, decltype(as_writable_u16cstr(rw_str))>::value, "");
+ std::is_same<char16_t*, decltype(as_writable_u16cstr(rw_str))>::value,
+ "");
EXPECT_EQ(static_cast<const void*>(rw_str.data()),
as_writable_u16cstr(rw_str));
const wchar_t ro_buffer[10] = {};
static_assert(
- std::is_same<const char16*, decltype(as_u16cstr(ro_buffer))>::value, "");
+ std::is_same<const char16_t*, decltype(as_u16cstr(ro_buffer))>::value,
+ "");
EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer));
const std::wstring ro_str(10, '\0');
static_assert(
- std::is_same<const char16*, decltype(as_u16cstr(ro_str))>::value, "");
+ std::is_same<const char16_t*, decltype(as_u16cstr(ro_str))>::value, "");
EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str));
WStringPiece piece = ro_buffer;
- static_assert(std::is_same<const char16*, decltype(as_u16cstr(piece))>::value,
- "");
+ static_assert(
+ std::is_same<const char16_t*, decltype(as_u16cstr(piece))>::value, "");
EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece));
}
#endif // defined(WCHAR_T_IS_UTF16)
TEST(StringUtilTest, TrimWhitespace) {
- string16 output; // Allow contents to carry over to next testcase
+ std::u16string output; // Allow contents to carry over to next testcase
for (const auto& value : trim_cases) {
EXPECT_EQ(value.return_value,
TrimWhitespace(WideToUTF16(value.input), value.positions,
@@ -422,14 +425,14 @@
}
// Test that TrimWhitespace() can take the same string for input and output
- output = ASCIIToUTF16(" This is a test \r\n");
+ output = u" This is a test \r\n";
EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
- EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
+ EXPECT_EQ(u"This is a test", output);
// Once more, but with a string of whitespace
- output = ASCIIToUTF16(" \r\n");
+ output = u" \r\n";
EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
- EXPECT_EQ(string16(), output);
+ EXPECT_EQ(std::u16string(), output);
std::string output_ascii;
for (const auto& value : trim_cases_ascii) {
@@ -521,10 +524,10 @@
TEST(StringUtilTest, IsStringASCII) {
static char char_ascii[] =
"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
- static char16 char16_ascii[] = {
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
- 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
- '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
+ static char16_t char16_ascii[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8',
+ '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', '0',
+ '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ '0', 'A', 'B', 'C', 'D', 'E', 'F', 0};
static std::wstring wchar_ascii(
L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
@@ -604,7 +607,7 @@
for (size_t i = 0; i < gurl_base::size(char_cases); ++i) {
EXPECT_TRUE(IsStringASCII(char_cases[i]));
- string16 utf16 = ASCIIToUTF16(char_cases[i]);
+ std::u16string utf16 = ASCIIToUTF16(char_cases[i]);
EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
@@ -614,7 +617,7 @@
EXPECT_FALSE(IsStringASCII("Google \x80Video"));
// Convert empty strings.
- string16 empty16;
+ std::u16string empty16;
std::string empty;
EXPECT_EQ(empty, UTF16ToASCII(empty16));
EXPECT_EQ(empty16, ASCIIToUTF16(empty));
@@ -623,8 +626,8 @@
const char chars_with_nul[] = "test\0string";
const int length_with_nul = gurl_base::size(chars_with_nul) - 1;
std::string string_with_nul(chars_with_nul, length_with_nul);
- string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
- EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
+ std::u16string string16_with_nul = ASCIIToUTF16(string_with_nul);
+ EXPECT_EQ(static_cast<std::u16string::size_type>(length_with_nul),
string16_with_nul.length());
std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
@@ -637,12 +640,12 @@
EXPECT_EQ('c', ToLowerASCII('c'));
EXPECT_EQ('2', ToLowerASCII('2'));
- EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
- EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
- EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
+ EXPECT_EQ(u'c', ToLowerASCII(u'C'));
+ EXPECT_EQ(u'c', ToLowerASCII(u'c'));
+ EXPECT_EQ(u'2', ToLowerASCII(u'2'));
EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
- EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
+ EXPECT_EQ(u"cc2", ToLowerASCII(u"Cc2"));
}
TEST(StringUtilTest, ToUpperASCII) {
@@ -650,12 +653,12 @@
EXPECT_EQ('C', ToUpperASCII('c'));
EXPECT_EQ('2', ToUpperASCII('2'));
- EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
- EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
- EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
+ EXPECT_EQ(u'C', ToUpperASCII(u'C'));
+ EXPECT_EQ(u'C', ToUpperASCII(u'c'));
+ EXPECT_EQ(u'2', ToUpperASCII(u'2'));
EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
- EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
+ EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
}
TEST(StringUtilTest, LowerCaseEqualsASCII) {
@@ -746,9 +749,9 @@
{"abababab", 1, "aba", "c", "abcbab"},
};
- // gurl_base::string16 variant
+ // std::u16string variant
for (const auto& scenario : cases) {
- string16 str = ASCIIToUTF16(scenario.str);
+ std::u16string str = ASCIIToUTF16(scenario.str);
ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
ASCIIToUTF16(scenario.find_this),
ASCIIToUTF16(scenario.replace_with));
@@ -757,7 +760,7 @@
// std::string with insufficient capacity: expansion must realloc the buffer.
for (const auto& scenario : cases) {
- std::string str = scenario.str.as_string();
+ std::string str(scenario.str);
str.shrink_to_fit(); // This is nonbinding, but it's the best we've got.
ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
scenario.find_this, scenario.replace_with);
@@ -766,7 +769,7 @@
// std::string with ample capacity: should be possible to grow in-place.
for (const auto& scenario : cases) {
- std::string str = scenario.str.as_string();
+ std::string str(scenario.str);
str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
2);
@@ -779,7 +782,7 @@
TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
static const struct {
const char* str;
- string16::size_type start_offset;
+ std::u16string::size_type start_offset;
const char* find_this;
const char* replace_with;
const char* expected;
@@ -798,7 +801,7 @@
};
for (const auto& i : cases) {
- string16 str = ASCIIToUTF16(i.str);
+ std::u16string str = ASCIIToUTF16(i.str);
ReplaceFirstSubstringAfterOffset(&str, i.start_offset,
ASCIIToUTF16(i.find_this),
ASCIIToUTF16(i.replace_with));
@@ -856,25 +859,25 @@
}
TEST(StringUtilTest, JoinString16) {
- string16 separator = ASCIIToUTF16(", ");
- std::vector<string16> parts;
- EXPECT_EQ(string16(), JoinString(parts, separator));
+ std::u16string separator = u", ";
+ std::vector<std::u16string> parts;
+ EXPECT_EQ(std::u16string(), JoinString(parts, separator));
- parts.push_back(string16());
- EXPECT_EQ(string16(), JoinString(parts, separator));
+ parts.push_back(std::u16string());
+ EXPECT_EQ(std::u16string(), JoinString(parts, separator));
parts.clear();
- parts.push_back(ASCIIToUTF16("a"));
- EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
+ parts.push_back(u"a");
+ EXPECT_EQ(u"a", JoinString(parts, separator));
- parts.push_back(ASCIIToUTF16("b"));
- parts.push_back(ASCIIToUTF16("c"));
- EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
+ parts.push_back(u"b");
+ parts.push_back(u"c");
+ EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
- parts.push_back(ASCIIToUTF16(""));
- EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
- parts.push_back(ASCIIToUTF16(" "));
- EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
+ parts.push_back(u"");
+ EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
+ parts.push_back(u" ");
+ EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
}
TEST(StringUtilTest, JoinStringPiece) {
@@ -901,30 +904,30 @@
}
TEST(StringUtilTest, JoinStringPiece16) {
- string16 separator = ASCIIToUTF16(", ");
+ std::u16string separator = u", ";
std::vector<StringPiece16> parts;
- EXPECT_EQ(string16(), JoinString(parts, separator));
+ EXPECT_EQ(std::u16string(), JoinString(parts, separator));
// Test empty first part (https://crbug.com/698073).
parts.push_back(StringPiece16());
- EXPECT_EQ(string16(), JoinString(parts, separator));
+ EXPECT_EQ(std::u16string(), JoinString(parts, separator));
parts.clear();
- const string16 kA = ASCIIToUTF16("a");
+ const std::u16string kA = u"a";
parts.push_back(kA);
- EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
+ EXPECT_EQ(u"a", JoinString(parts, separator));
- const string16 kB = ASCIIToUTF16("b");
+ const std::u16string kB = u"b";
parts.push_back(kB);
- const string16 kC = ASCIIToUTF16("c");
+ const std::u16string kC = u"c";
parts.push_back(kC);
- EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
+ EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
parts.push_back(StringPiece16());
- EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
- const string16 kSpace = ASCIIToUTF16(" ");
+ EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
+ const std::u16string kSpace = u" ";
parts.push_back(kSpace);
- EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
+ EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
}
TEST(StringUtilTest, JoinStringInitializerList) {
@@ -952,31 +955,29 @@
}
TEST(StringUtilTest, JoinStringInitializerList16) {
- string16 separator = ASCIIToUTF16(", ");
- EXPECT_EQ(string16(), JoinString({}, separator));
+ std::u16string separator = u", ";
+ EXPECT_EQ(std::u16string(), JoinString({}, separator));
// Test empty first part (https://crbug.com/698073).
- EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator));
+ EXPECT_EQ(std::u16string(), JoinString({StringPiece16()}, separator));
// With string16s.
- const string16 kA = ASCIIToUTF16("a");
- EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator));
+ const std::u16string kA = u"a";
+ EXPECT_EQ(u"a", JoinString({kA}, separator));
- const string16 kB = ASCIIToUTF16("b");
- const string16 kC = ASCIIToUTF16("c");
- EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator));
+ const std::u16string kB = u"b";
+ const std::u16string kC = u"c";
+ EXPECT_EQ(u"a, b, c", JoinString({kA, kB, kC}, separator));
- EXPECT_EQ(ASCIIToUTF16("a, b, c, "),
- JoinString({kA, kB, kC, StringPiece16()}, separator));
- const string16 kSpace = ASCIIToUTF16(" ");
- EXPECT_EQ(
- ASCIIToUTF16("a|b|c|| "),
- JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|")));
+ EXPECT_EQ(u"a, b, c, ", JoinString({kA, kB, kC, StringPiece16()}, separator));
+ const std::u16string kSpace = u" ";
+ EXPECT_EQ(u"a|b|c|| ",
+ JoinString({kA, kB, kC, StringPiece16(), kSpace}, u"|"));
// With StringPiece16s.
const StringPiece16 kPieceA = kA;
const StringPiece16 kPieceB = kB;
- EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator));
+ EXPECT_EQ(u"a, b", JoinString({kPieceA, kPieceB}, separator));
}
TEST(StringUtilTest, StartsWith) {
@@ -999,83 +1000,74 @@
gurl_base::CompareCase::INSENSITIVE_ASCII));
EXPECT_TRUE(StartsWith("java", std::string(), gurl_base::CompareCase::SENSITIVE));
- EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
- ASCIIToUTF16("javascript"),
+ EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
gurl_base::CompareCase::SENSITIVE));
- EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
- ASCIIToUTF16("javascript"),
+ EXPECT_FALSE(StartsWith(u"JavaScript:url", u"javascript",
gurl_base::CompareCase::SENSITIVE));
- EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
- ASCIIToUTF16("javascript"),
+ EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
- ASCIIToUTF16("javascript"),
+ EXPECT_TRUE(StartsWith(u"JavaScript:url", u"javascript",
gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
+ EXPECT_FALSE(
+ StartsWith(u"java", u"javascript", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_FALSE(
+ StartsWith(u"java", u"javascript", gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
+ EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
gurl_base::CompareCase::SENSITIVE));
- EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
+ EXPECT_TRUE(StartsWith(u"java", std::u16string(),
gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
- gurl_base::CompareCase::SENSITIVE));
+ EXPECT_TRUE(
+ StartsWith(u"java", std::u16string(), gurl_base::CompareCase::SENSITIVE));
}
TEST(StringUtilTest, EndsWith) {
- EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
- gurl_base::CompareCase::SENSITIVE));
- EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
- gurl_base::CompareCase::SENSITIVE));
EXPECT_TRUE(
- EndsWith(string16(), string16(), gurl_base::CompareCase::INSENSITIVE_ASCII));
- EXPECT_TRUE(EndsWith(string16(), string16(), gurl_base::CompareCase::SENSITIVE));
+ EndsWith(u"Foo.plugin", u".plugin", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_FALSE(
+ EndsWith(u"Foo.Plugin", u".plugin", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_TRUE(EndsWith(u"Foo.plugin", u".plugin",
+ gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_TRUE(EndsWith(u"Foo.Plugin", u".plugin",
+ gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_FALSE(EndsWith(u".plug", u".plugin", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_FALSE(
+ EndsWith(u".plug", u".plugin", gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_FALSE(
+ EndsWith(u"Foo.plugin Bar", u".plugin", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_FALSE(EndsWith(u"Foo.plugin Bar", u".plugin",
+ gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_FALSE(EndsWith(std::u16string(), u".plugin",
+ gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_FALSE(
+ EndsWith(std::u16string(), u".plugin", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_TRUE(EndsWith(u"Foo.plugin", std::u16string(),
+ gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_TRUE(
+ EndsWith(u"Foo.plugin", std::u16string(), gurl_base::CompareCase::SENSITIVE));
+ EXPECT_TRUE(
+ EndsWith(u".plugin", u".plugin", gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_TRUE(EndsWith(u".plugin", u".plugin", gurl_base::CompareCase::SENSITIVE));
+ EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
+ gurl_base::CompareCase::INSENSITIVE_ASCII));
+ EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
+ gurl_base::CompareCase::SENSITIVE));
}
TEST(StringUtilTest, GetStringFWithOffsets) {
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("1"));
- subst.push_back(ASCIIToUTF16("2"));
+ std::vector<std::u16string> subst;
+ subst.push_back(u"1");
+ subst.push_back(u"2");
std::vector<size_t> offsets;
- ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
- subst,
- &offsets);
+ ReplaceStringPlaceholders(u"Hello, $1. Your number is $2.", subst, &offsets);
EXPECT_EQ(2U, offsets.size());
EXPECT_EQ(7U, offsets[0]);
EXPECT_EQ(25U, offsets[1]);
offsets.clear();
- ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
- subst,
- &offsets);
+ ReplaceStringPlaceholders(u"Hello, $2. Your number is $1.", subst, &offsets);
EXPECT_EQ(2U, offsets.size());
EXPECT_EQ(25U, offsets[0]);
EXPECT_EQ(7U, offsets[1]);
@@ -1085,54 +1077,52 @@
TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
// Test whether replacestringplaceholders works as expected when there
// are fewer inputs than outputs.
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("9a"));
- subst.push_back(ASCIIToUTF16("8b"));
- subst.push_back(ASCIIToUTF16("7c"));
+ std::vector<std::u16string> subst;
+ subst.push_back(u"9a");
+ subst.push_back(u"8b");
+ subst.push_back(u"7c");
- string16 formatted =
- ReplaceStringPlaceholders(
- ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
+ std::u16string formatted = ReplaceStringPlaceholders(
+ u"$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i", subst, nullptr);
- EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
+ EXPECT_EQ(u"9aa,8bb,7cc,d,e,f,9ag,8bh,7ci", formatted);
}
TEST(StringUtilTest, ReplaceStringPlaceholders) {
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("9a"));
- subst.push_back(ASCIIToUTF16("8b"));
- subst.push_back(ASCIIToUTF16("7c"));
- subst.push_back(ASCIIToUTF16("6d"));
- subst.push_back(ASCIIToUTF16("5e"));
- subst.push_back(ASCIIToUTF16("4f"));
- subst.push_back(ASCIIToUTF16("3g"));
- subst.push_back(ASCIIToUTF16("2h"));
- subst.push_back(ASCIIToUTF16("1i"));
+ std::vector<std::u16string> subst;
+ subst.push_back(u"9a");
+ subst.push_back(u"8b");
+ subst.push_back(u"7c");
+ subst.push_back(u"6d");
+ subst.push_back(u"5e");
+ subst.push_back(u"4f");
+ subst.push_back(u"3g");
+ subst.push_back(u"2h");
+ subst.push_back(u"1i");
- string16 formatted =
- ReplaceStringPlaceholders(
- ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
+ std::u16string formatted = ReplaceStringPlaceholders(
+ u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
- EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
+ EXPECT_EQ(u"9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
}
TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
// In this test, some of the substitutions are shorter than the placeholders,
// but overall the string gets longer.
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("9a____"));
- subst.push_back(ASCIIToUTF16("B"));
- subst.push_back(ASCIIToUTF16("7c___"));
- subst.push_back(ASCIIToUTF16("d"));
- subst.push_back(ASCIIToUTF16("5e____"));
- subst.push_back(ASCIIToUTF16("F"));
- subst.push_back(ASCIIToUTF16("3g___"));
- subst.push_back(ASCIIToUTF16("h"));
- subst.push_back(ASCIIToUTF16("1i_____"));
+ std::vector<std::u16string> subst;
+ subst.push_back(u"9a____");
+ subst.push_back(u"B");
+ subst.push_back(u"7c___");
+ subst.push_back(u"d");
+ subst.push_back(u"5e____");
+ subst.push_back(u"F");
+ subst.push_back(u"3g___");
+ subst.push_back(u"h");
+ subst.push_back(u"1i_____");
- string16 original = ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i");
- string16 expected =
- ASCIIToUTF16("9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i");
+ std::u16string original = u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i";
+ std::u16string expected =
+ u"9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i";
EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
@@ -1151,33 +1141,33 @@
// In this test, some of the substitutions are longer than the placeholders,
// but overall the string gets smaller. Additionally, the placeholders appear
// in a permuted order.
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("z"));
- subst.push_back(ASCIIToUTF16("y"));
- subst.push_back(ASCIIToUTF16("XYZW"));
- subst.push_back(ASCIIToUTF16("x"));
- subst.push_back(ASCIIToUTF16("w"));
+ std::vector<std::u16string> subst;
+ subst.push_back(u"z");
+ subst.push_back(u"y");
+ subst.push_back(u"XYZW");
+ subst.push_back(u"x");
+ subst.push_back(u"w");
- string16 formatted =
- ReplaceStringPlaceholders(ASCIIToUTF16("$3_$4$2$1$5"), subst, nullptr);
+ std::u16string formatted =
+ ReplaceStringPlaceholders(u"$3_$4$2$1$5", subst, nullptr);
- EXPECT_EQ(ASCIIToUTF16("XYZW_xyzw"), formatted);
+ EXPECT_EQ(u"XYZW_xyzw", formatted);
}
TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("1a"));
- string16 formatted =
- ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
- EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
+ std::vector<std::u16string> subst;
+ subst.push_back(u"1a");
+ std::u16string formatted =
+ ReplaceStringPlaceholders(u" $16 ", subst, nullptr);
+ EXPECT_EQ(u" 1a6 ", formatted);
}
TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
- std::vector<string16> subst;
- subst.push_back(ASCIIToUTF16("1a"));
- string16 formatted =
- ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
- EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
+ std::vector<std::u16string> subst;
+ subst.push_back(u"1a");
+ std::u16string formatted =
+ ReplaceStringPlaceholders(u"+$-+$A+$1+", subst, nullptr);
+ EXPECT_EQ(u"+++1a+", formatted);
}
TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
@@ -1324,14 +1314,14 @@
EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size());
EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty());
- constexpr char16 kBar[] = STRING16_LITERAL("Bar");
+ constexpr char16_t kBar[] = u"Bar";
static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, "");
static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, "");
static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, "");
static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), "");
static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), "");
- string16 bar = kBar;
+ std::u16string bar = kBar;
EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar);
EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data());
EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size());
@@ -1457,13 +1447,12 @@
EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
- EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
- EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
- EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
- EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16));
- EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
- EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "),
- kWhitespaceUTF16));
+ EXPECT_TRUE(ContainsOnlyChars(std::u16string(), kWhitespaceUTF16));
+ EXPECT_TRUE(ContainsOnlyChars(u" ", kWhitespaceUTF16));
+ EXPECT_TRUE(ContainsOnlyChars(u"\t", kWhitespaceUTF16));
+ EXPECT_TRUE(ContainsOnlyChars(u"\t \r \n ", kWhitespaceUTF16));
+ EXPECT_FALSE(ContainsOnlyChars(u"a", kWhitespaceUTF16));
+ EXPECT_FALSE(ContainsOnlyChars(u"\thello\r \n ", kWhitespaceUTF16));
}
TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
diff --git a/base/strings/string_util_win.cc b/base/strings/string_util_win.cc
index 1a98101..9d475ba 100644
--- a/base/strings/string_util_win.cc
+++ b/base/strings/string_util_win.cc
@@ -8,7 +8,6 @@
namespace gurl_base {
-#if defined(BASE_STRING16_IS_STD_U16STRING)
bool IsStringASCII(WStringPiece str) {
return internal::DoIsStringASCII(str.data(), str.length());
}
@@ -102,7 +101,7 @@
WStringPiece find_this,
WStringPiece replace_with) {
internal::DoReplaceMatchesAfterOffset(
- str, start_offset, internal::SubstringMatcher<std::wstring>{find_this},
+ str, start_offset, internal::MakeSubstringMatcher(find_this),
replace_with, internal::ReplaceType::REPLACE_FIRST);
}
@@ -111,7 +110,7 @@
WStringPiece find_this,
WStringPiece replace_with) {
internal::DoReplaceMatchesAfterOffset(
- str, start_offset, internal::SubstringMatcher<std::wstring>{find_this},
+ str, start_offset, internal::MakeSubstringMatcher(find_this),
replace_with, internal::ReplaceType::REPLACE_ALL);
}
@@ -140,6 +139,4 @@
return internal::DoReplaceStringPlaceholders(format_string, subst, offsets);
}
-#endif
-
} // namespace base
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h
index 3ddbc92..7c04176 100644
--- a/base/strings/string_util_win.h
+++ b/base/strings/string_util_win.h
@@ -16,7 +16,6 @@
#include "polyfills/base/check.h"
#include "base/containers/span.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
@@ -49,22 +48,22 @@
// Utility functions to access the underlying string buffer as a wide char
// pointer.
//
-// Note: These functions violate strict aliasing when char16 and wchar_t are
+// Note: These functions violate strict aliasing when char16_t and wchar_t are
// unrelated types. We thus pass -fno-strict-aliasing to the compiler on
// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2].
//
// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244
// [2]
// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949
-inline wchar_t* as_writable_wcstr(char16* str) {
+inline wchar_t* as_writable_wcstr(char16_t* str) {
return reinterpret_cast<wchar_t*>(str);
}
-inline wchar_t* as_writable_wcstr(string16& str) {
+inline wchar_t* as_writable_wcstr(std::u16string& str) {
return reinterpret_cast<wchar_t*>(data(str));
}
-inline const wchar_t* as_wcstr(const char16* str) {
+inline const wchar_t* as_wcstr(const char16_t* str) {
return reinterpret_cast<const wchar_t*>(str);
}
@@ -72,21 +71,22 @@
return reinterpret_cast<const wchar_t*>(str.data());
}
-// Utility functions to access the underlying string buffer as a char16 pointer.
-inline char16* as_writable_u16cstr(wchar_t* str) {
- return reinterpret_cast<char16*>(str);
+// Utility functions to access the underlying string buffer as a char16_t
+// pointer.
+inline char16_t* as_writable_u16cstr(wchar_t* str) {
+ return reinterpret_cast<char16_t*>(str);
}
-inline char16* as_writable_u16cstr(std::wstring& str) {
- return reinterpret_cast<char16*>(data(str));
+inline char16_t* as_writable_u16cstr(std::wstring& str) {
+ return reinterpret_cast<char16_t*>(data(str));
}
-inline const char16* as_u16cstr(const wchar_t* str) {
- return reinterpret_cast<const char16*>(str);
+inline const char16_t* as_u16cstr(const wchar_t* str) {
+ return reinterpret_cast<const char16_t*>(str);
}
-inline const char16* as_u16cstr(WStringPiece str) {
- return reinterpret_cast<const char16*>(str.data());
+inline const char16_t* as_u16cstr(WStringPiece str) {
+ return reinterpret_cast<const char16_t*>(str.data());
}
// Utility functions to convert between gurl_base::WStringPiece and
@@ -103,17 +103,12 @@
return std::wstring(as_wcstr(str.data()), str.size());
}
-inline string16 AsString16(WStringPiece str) {
- return string16(as_u16cstr(str.data()), str.size());
+inline std::u16string AsString16(WStringPiece str) {
+ return std::u16string(as_u16cstr(str.data()), str.size());
}
// The following section contains overloads of the cross-platform APIs for
-// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring
-// and gurl_base::string16 are distinct types, as otherwise this would result in an
-// ODR violation.
-// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is
-// std::u16string.
-#if defined(BASE_STRING16_IS_STD_U16STRING)
+// std::wstring and gurl_base::WStringPiece.
BASE_EXPORT bool IsStringASCII(WStringPiece str);
BASE_EXPORT std::wstring ToLowerASCII(WStringPiece str);
@@ -192,9 +187,8 @@
BASE_EXPORT std::wstring ReplaceStringPlaceholders(
WStringPiece format_string,
- const std::vector<string16>& subst,
+ const std::vector<std::wstring>& subst,
std::vector<size_t>* offsets);
-#endif
} // namespace base
diff --git a/base/strings/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc
index c2e8707..9da8861 100644
--- a/base/strings/stringprintf_unittest.cc
+++ b/base/strings/stringprintf_unittest.cc
@@ -7,7 +7,6 @@
#include <errno.h>
#include <stddef.h>
-#include "base/macros.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/sys_string_conversions.h b/base/strings/sys_string_conversions.h
index 4183d26..51977fe 100644
--- a/base/strings/sys_string_conversions.h
+++ b/base/strings/sys_string_conversions.h
@@ -14,7 +14,6 @@
#include <string>
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
@@ -82,12 +81,13 @@
// Converts a CFStringRef to an STL string. Returns an empty string on failure.
BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref)
WARN_UNUSED_RESULT;
-BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref) WARN_UNUSED_RESULT;
+BASE_EXPORT std::u16string SysCFStringRefToUTF16(CFStringRef ref)
+ WARN_UNUSED_RESULT;
// Same, but accepts NSString input. Converts nil NSString* to the appropriate
// string type of length 0.
BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref) WARN_UNUSED_RESULT;
-BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT;
+BASE_EXPORT std::u16string SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT;
#endif // defined(OS_APPLE)
diff --git a/base/strings/sys_string_conversions_unittest.cc b/base/strings/sys_string_conversions_unittest.cc
index 2f31dcc..95995c6 100644
--- a/base/strings/sys_string_conversions_unittest.cc
+++ b/base/strings/sys_string_conversions_unittest.cc
@@ -6,7 +6,6 @@
#include <string>
-#include "base/macros.h"
#include "base/strings/string_piece.h"
#include "base/strings/sys_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc
index 5a492d6..6120b71 100644
--- a/base/strings/utf_offset_string_conversions.cc
+++ b/base/strings/utf_offset_string_conversions.cc
@@ -37,14 +37,14 @@
size_t* offset,
size_t limit) {
GURL_DCHECK(offset);
- if (*offset == string16::npos)
+ if (*offset == std::u16string::npos)
return;
int adjustment = 0;
for (const auto& i : adjustments) {
if (*offset <= i.original_offset)
break;
if (*offset < (i.original_offset + i.original_length)) {
- *offset = string16::npos;
+ *offset = std::u16string::npos;
return;
}
adjustment += static_cast<int>(i.original_length - i.output_length);
@@ -52,7 +52,7 @@
*offset -= adjustment;
if (*offset > limit)
- *offset = string16::npos;
+ *offset = std::u16string::npos;
}
// static
@@ -68,7 +68,7 @@
// static
void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,
size_t* offset) {
- if (*offset == string16::npos)
+ if (*offset == std::u16string::npos)
return;
int adjustment = 0;
for (const auto& i : adjustments) {
@@ -76,7 +76,7 @@
break;
adjustment += static_cast<int>(i.original_length - i.output_length);
if ((*offset + adjustment) < (i.original_offset + i.original_length)) {
- *offset = string16::npos;
+ *offset = std::u16string::npos;
return;
}
}
@@ -219,29 +219,29 @@
bool UTF8ToUTF16WithAdjustments(
const char* src,
size_t src_len,
- string16* output,
+ std::u16string* output,
gurl_base::OffsetAdjuster::Adjustments* adjustments) {
PrepareForUTF16Or32Output(src, src_len, output);
return ConvertUnicode(src, src_len, output, adjustments);
}
-string16 UTF8ToUTF16WithAdjustments(
+std::u16string UTF8ToUTF16WithAdjustments(
const gurl_base::StringPiece& utf8,
gurl_base::OffsetAdjuster::Adjustments* adjustments) {
- string16 result;
+ std::u16string result;
UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);
return result;
}
-string16 UTF8ToUTF16AndAdjustOffsets(
+std::u16string UTF8ToUTF16AndAdjustOffsets(
const gurl_base::StringPiece& utf8,
std::vector<size_t>* offsets_for_adjustment) {
for (size_t& offset : *offsets_for_adjustment) {
if (offset > utf8.length())
- offset = string16::npos;
+ offset = std::u16string::npos;
}
OffsetAdjuster::Adjustments adjustments;
- string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
+ std::u16string result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
return result;
}
@@ -251,7 +251,7 @@
std::vector<size_t>* offsets_for_adjustment) {
for (size_t& offset : *offsets_for_adjustment) {
if (offset > utf16.length())
- offset = string16::npos;
+ offset = std::u16string::npos;
}
std::string result;
PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
diff --git a/base/strings/utf_offset_string_conversions.h b/base/strings/utf_offset_string_conversions.h
index c2e2ba7..aa4e59e 100644
--- a/base/strings/utf_offset_string_conversions.h
+++ b/base/strings/utf_offset_string_conversions.h
@@ -11,7 +11,6 @@
#include <vector>
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
namespace gurl_base {
@@ -35,7 +34,7 @@
// Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
// recorded in |adjustments|. Adjusted offsets greater than |limit| will be
- // set to string16::npos.
+ // set to std::u16string::npos.
//
// Offsets represents insertion/selection points between characters: if |src|
// is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
@@ -43,23 +42,24 @@
// exit, each offset will have been modified to point at the same logical
// position in the output string. If an offset cannot be successfully
// adjusted (e.g., because it points into the middle of a multibyte sequence),
- // it will be set to string16::npos.
+ // it will be set to std::u16string::npos.
static void AdjustOffsets(const Adjustments& adjustments,
std::vector<size_t>* offsets_for_adjustment,
- size_t limit = string16::npos);
+ size_t limit = std::u16string::npos);
// Adjusts the single |offset| to reflect the adjustments recorded in
// |adjustments|.
static void AdjustOffset(const Adjustments& adjustments,
size_t* offset,
- size_t limit = string16::npos);
+ size_t limit = std::u16string::npos);
// Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
// of the adjustments recorded in |adjustments|. In other words, the offsets
// provided represent offsets into an adjusted string and the caller wants
// to know the offsets they correspond to in the original string. If an
// offset cannot be successfully unadjusted (e.g., because it points into
- // the middle of a multibyte sequence), it will be set to string16::npos.
+ // the middle of a multibyte sequence), it will be set to
+ // std::u16string::npos.
static void UnadjustOffsets(const Adjustments& adjustments,
std::vector<size_t>* offsets_for_unadjustment);
@@ -94,15 +94,16 @@
BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
const char* src,
size_t src_len,
- string16* output,
+ std::u16string* output,
gurl_base::OffsetAdjuster::Adjustments* adjustments);
-BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
+BASE_EXPORT std::u16string UTF8ToUTF16WithAdjustments(
const gurl_base::StringPiece& utf8,
gurl_base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT;
// As above, but instead internally examines the adjustments and applies them
// to |offsets_for_adjustment|. Input offsets greater than the length of the
-// input string will be set to string16::npos. See comments by AdjustOffsets().
-BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
+// input string will be set to std::u16string::npos. See comments by
+// AdjustOffsets().
+BASE_EXPORT std::u16string UTF8ToUTF16AndAdjustOffsets(
const gurl_base::StringPiece& utf8,
std::vector<size_t>* offsets_for_adjustment);
BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc
index 8416524..0775dc4 100644
--- a/base/strings/utf_offset_string_conversions_unittest.cc
+++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -15,7 +15,7 @@
namespace {
-static const size_t kNpos = string16::npos;
+static const size_t kNpos = std::u16string::npos;
} // namespace
@@ -45,7 +45,7 @@
}
struct UTF16ToUTF8Case {
- char16 utf16[10];
+ char16_t utf16[10];
size_t input_offset;
size_t output_offset;
} utf16_to_utf8_cases[] = {
diff --git a/base/strings/utf_string_conversion_utils.cc b/base/strings/utf_string_conversion_utils.cc
index ce432e7..da68dd3 100644
--- a/base/strings/utf_string_conversion_utils.cc
+++ b/base/strings/utf_string_conversion_utils.cc
@@ -30,7 +30,7 @@
return IsValidCodepoint(code_point);
}
-bool ReadUnicodeCharacter(const char16* src,
+bool ReadUnicodeCharacter(const char16_t* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point) {
@@ -90,10 +90,10 @@
return char_offset - original_char_offset;
}
-size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) {
+size_t WriteUnicodeCharacter(uint32_t code_point, std::u16string* output) {
if (CBU16_LENGTH(code_point) == 1) {
// Thie code point is in the Basic Multilingual Plane (BMP).
- output->push_back(static_cast<char16>(code_point));
+ output->push_back(static_cast<char16_t>(code_point));
return 1;
}
// Non-BMP characters use a double-character encoding.
@@ -123,10 +123,10 @@
// Instantiate versions we know callers will need.
#if !defined(OS_WIN)
-// wchar_t and char16 are the same thing on Windows.
+// wchar_t and char16_t are the same thing on Windows.
template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*);
#endif
-template void PrepareForUTF8Output(const char16*, size_t, std::string*);
+template void PrepareForUTF8Output(const char16_t*, size_t, std::string*);
template<typename STRING>
void PrepareForUTF16Or32Output(const char* src,
@@ -147,9 +147,9 @@
// Instantiate versions we know callers will need.
#if !defined(OS_WIN)
-// std::wstring and string16 are the same thing on Windows.
+// std::wstring and std::u16string are the same thing on Windows.
template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*);
#endif
-template void PrepareForUTF16Or32Output(const char*, size_t, string16*);
+template void PrepareForUTF16Or32Output(const char*, size_t, std::u16string*);
} // namespace base
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h
index 075832e..640c7c6 100644
--- a/base/strings/utf_string_conversion_utils.h
+++ b/base/strings/utf_string_conversion_utils.h
@@ -11,8 +11,9 @@
#include <stddef.h>
#include <stdint.h>
+#include <string>
+
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
namespace gurl_base {
@@ -50,7 +51,7 @@
uint32_t* code_point_out);
// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
-BASE_EXPORT bool ReadUnicodeCharacter(const char16* src,
+BASE_EXPORT bool ReadUnicodeCharacter(const char16_t* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point);
@@ -72,7 +73,8 @@
// Appends the given code point as a UTF-16 character to the given 16-bit
// string. Returns the number of 16-bit values written.
-BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output);
+BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
+ std::u16string* output);
#if defined(WCHAR_T_IS_UTF32)
// Appends the given UTF-32 character to the given 32-bit string. Returns the
diff --git a/base/strings/utf_string_conversions.cc b/base/strings/utf_string_conversions.cc
index 92333e2..9595e7b 100644
--- a/base/strings/utf_string_conversions.cc
+++ b/base/strings/utf_string_conversions.cc
@@ -35,7 +35,7 @@
};
template <>
-struct SizeCoefficient<char16, char> {
+struct SizeCoefficient<char16_t, char> {
// One UTF-16 codeunit corresponds to at most 3 codeunits in UTF-8.
static constexpr int value = 3;
};
@@ -48,7 +48,7 @@
};
template <>
-struct SizeCoefficient<wchar_t, char16> {
+struct SizeCoefficient<wchar_t, char16_t> {
// UTF-16 uses at most 2 codeunits per character.
static constexpr int value = 2;
};
@@ -111,13 +111,13 @@
}
template <typename DestChar>
-bool DoUTFConversion(const char16* src,
+bool DoUTFConversion(const char16_t* src,
int32_t src_len,
DestChar* dest,
int32_t* dest_len) {
bool success = true;
- auto ConvertSingleChar = [&success](char16 in) -> int32_t {
+ auto ConvertSingleChar = [&success](char16_t in) -> int32_t {
if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) {
success = false;
return kErrorCodePoint;
@@ -211,19 +211,19 @@
// UTF16 <-> UTF8 --------------------------------------------------------------
-bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
+bool UTF8ToUTF16(const char* src, size_t src_len, std::u16string* output) {
return UTFConversion(StringPiece(src, src_len), output);
}
-string16 UTF8ToUTF16(StringPiece utf8) {
- string16 ret;
+std::u16string UTF8ToUTF16(StringPiece utf8) {
+ std::u16string ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
UTF8ToUTF16(utf8.data(), utf8.size(), &ret);
return ret;
}
-bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
+bool UTF16ToUTF8(const char16_t* src, size_t src_len, std::string* output) {
return UTFConversion(StringPiece16(src, src_len), output);
}
@@ -240,16 +240,16 @@
#if defined(WCHAR_T_IS_UTF16)
// When wide == UTF-16 the conversions are a NOP.
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
+bool WideToUTF16(const wchar_t* src, size_t src_len, std::u16string* output) {
output->assign(src, src + src_len);
return true;
}
-string16 WideToUTF16(WStringPiece wide) {
- return string16(wide.begin(), wide.end());
+std::u16string WideToUTF16(WStringPiece wide) {
+ return std::u16string(wide.begin(), wide.end());
}
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
+bool UTF16ToWide(const char16_t* src, size_t src_len, std::wstring* output) {
output->assign(src, src + src_len);
return true;
}
@@ -260,19 +260,19 @@
#elif defined(WCHAR_T_IS_UTF32)
-bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
+bool WideToUTF16(const wchar_t* src, size_t src_len, std::u16string* output) {
return UTFConversion(gurl_base::WStringPiece(src, src_len), output);
}
-string16 WideToUTF16(WStringPiece wide) {
- string16 ret;
+std::u16string WideToUTF16(WStringPiece wide) {
+ std::u16string ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
WideToUTF16(wide.data(), wide.length(), &ret);
return ret;
}
-bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
+bool UTF16ToWide(const char16_t* src, size_t src_len, std::wstring* output) {
return UTFConversion(StringPiece16(src, src_len), output);
}
@@ -329,9 +329,9 @@
#endif // defined(WCHAR_T_IS_UTF32)
-string16 ASCIIToUTF16(StringPiece ascii) {
+std::u16string ASCIIToUTF16(StringPiece ascii) {
GURL_DCHECK(IsStringASCII(ascii)) << ascii;
- return string16(ascii.begin(), ascii.end());
+ return std::u16string(ascii.begin(), ascii.end());
}
std::string UTF16ToASCII(StringPiece16 utf16) {
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h
index 043b6ae..ffb56e4 100644
--- a/base/strings/utf_string_conversions.h
+++ b/base/strings/utf_string_conversions.h
@@ -10,7 +10,6 @@
#include <string>
#include "polyfills/base/base_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
@@ -29,22 +28,27 @@
std::wstring* output);
BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
-BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len,
- string16* output);
-BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT;
-BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len,
+BASE_EXPORT bool WideToUTF16(const wchar_t* src,
+ size_t src_len,
+ std::u16string* output);
+BASE_EXPORT std::u16string WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT;
+BASE_EXPORT bool UTF16ToWide(const char16_t* src,
+ size_t src_len,
std::wstring* output);
BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT;
-BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
-BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT;
-BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len,
+BASE_EXPORT bool UTF8ToUTF16(const char* src,
+ size_t src_len,
+ std::u16string* output);
+BASE_EXPORT std::u16string UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT;
+BASE_EXPORT bool UTF16ToUTF8(const char16_t* src,
+ size_t src_len,
std::string* output);
BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT;
// This converts an ASCII string, typically a hardcoded constant, to a UTF16
// string.
-BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT;
+BASE_EXPORT std::u16string ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT;
// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
// beforehand.
@@ -60,6 +64,35 @@
BASE_EXPORT std::string WideToASCII(WStringPiece wide) WARN_UNUSED_RESULT;
#endif // defined(WCHAR_T_IS_UTF16)
+// The conversion functions in this file should not be used to convert string
+// literals. Instead, the corresponding prefixes (e.g. u"" for UTF16 or L"" for
+// Wide) should be used. Deleting the overloads here catches these cases at
+// compile time.
+template <size_t N>
+std::u16string WideToUTF16(const wchar_t (&str)[N]) {
+ static_assert(N == 0, "Error: Use the u\"...\" prefix instead.");
+ return std::u16string();
+}
+
+template <size_t N>
+std::u16string UTF8ToUTF16(const char (&str)[N]) {
+ static_assert(N == 0, "Error: Use the u\"...\" prefix instead.");
+ return std::u16string();
+}
+
+template <size_t N>
+std::u16string ASCIIToUTF16(const char (&str)[N]) {
+ static_assert(N == 0, "Error: Use the u\"...\" prefix instead.");
+ return std::u16string();
+}
+
+// Mutable character arrays are usually only populated during runtime. Continue
+// to allow this conversion.
+template <size_t N>
+std::u16string ASCIIToUTF16(char (&str)[N]) {
+ return ASCIIToUTF16(StringPiece(str));
+}
+
} // namespace base
#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
diff --git a/base/strings/utf_string_conversions_fuzzer.cc b/base/strings/utf_string_conversions_fuzzer.cc
index 55e75f7..932012a 100644
--- a/base/strings/utf_string_conversions_fuzzer.cc
+++ b/base/strings/utf_string_conversions_fuzzer.cc
@@ -8,7 +8,7 @@
std::string output_std_string;
std::wstring output_std_wstring;
-gurl_base::string16 output_string16;
+std::u16string output_string16;
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
@@ -22,15 +22,15 @@
gurl_base::UTF8ToUTF16(reinterpret_cast<const char*>(data), size,
&output_string16);
- // Test for char16.
+ // Test for char16_t.
if (size % 2 == 0) {
gurl_base::StringPiece16 string_piece_input16(
- reinterpret_cast<const gurl_base::char16*>(data), size / 2);
+ reinterpret_cast<const char16_t*>(data), size / 2);
ignore_result(gurl_base::UTF16ToWide(output_string16));
- gurl_base::UTF16ToWide(reinterpret_cast<const gurl_base::char16*>(data), size / 2,
+ gurl_base::UTF16ToWide(reinterpret_cast<const char16_t*>(data), size / 2,
&output_std_wstring);
ignore_result(gurl_base::UTF16ToUTF8(string_piece_input16));
- gurl_base::UTF16ToUTF8(reinterpret_cast<const gurl_base::char16*>(data), size / 2,
+ gurl_base::UTF16ToUTF8(reinterpret_cast<const char16_t*>(data), size / 2,
&output_std_string);
}
diff --git a/base/strings/utf_string_conversions_unittest.cc b/base/strings/utf_string_conversions_unittest.cc
index 6cffe99..752bf95 100644
--- a/base/strings/utf_string_conversions_unittest.cc
+++ b/base/strings/utf_string_conversions_unittest.cc
@@ -180,19 +180,15 @@
#endif // defined(WCHAR_T_IS_UTF32)
TEST(UTFStringConversionsTest, ConvertMultiString) {
- static char16 multi16[] = {
- 'f', 'o', 'o', '\0',
- 'b', 'a', 'r', '\0',
- 'b', 'a', 'z', '\0',
- '\0'
- };
+ static char16_t multi16[] = {'f', 'o', 'o', '\0', 'b', 'a', 'r',
+ '\0', 'b', 'a', 'z', '\0', '\0'};
static char multi[] = {
'f', 'o', 'o', '\0',
'b', 'a', 'r', '\0',
'b', 'a', 'z', '\0',
'\0'
};
- string16 multistring16;
+ std::u16string multistring16;
memcpy(WriteInto(&multistring16, gurl_base::size(multi16)), multi16,
sizeof(multi16));
EXPECT_EQ(gurl_base::size(multi16) - 1, multistring16.length());
diff --git a/base/template_util.h b/base/template_util.h
index 4b69c7a..78b52ee 100644
--- a/base/template_util.h
+++ b/base/template_util.h
@@ -12,6 +12,7 @@
#include <utility>
#include <vector>
+#include "base/compiler_specific.h"
#include "build/build_config.h"
// Some versions of libstdc++ have partial support for type_traits, but misses
@@ -146,8 +147,9 @@
#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 7
// Workaround for g++7 and earlier family.
// Due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80654, without this
-// Optional<std::vector<T>> where T is non-copyable causes a compile error.
-// As we know it is not trivially copy constructible, explicitly declare so.
+// absl::optional<std::vector<T>> where T is non-copyable causes a compile
+// error. As we know it is not trivially copy constructible, explicitly declare
+// so.
template <typename T>
struct is_trivially_copy_constructible
: std::is_trivially_copy_constructible<T> {};
@@ -233,16 +235,107 @@
template <typename B>
struct negation : bool_constant<!static_cast<bool>(B::value)> {};
-// Implementation of C++17's std::invoke_result_t.
+// Implementation of C++17's invoke_result.
//
// This implementation adds references to `Functor` and `Args` to work around
-// some quirks of std::result_of_t. See the #Notes section of [1] for details.
+// some quirks of std::result_of. See the #Notes section of [1] for details.
//
// References:
// [1] https://en.cppreference.com/w/cpp/types/result_of
-// [2] https://wg21.link/meta.type.synop#lib:invoke_result_t
+// [2] https://wg21.link/meta.trans.other#lib:invoke_result
template <typename Functor, typename... Args>
-using invoke_result_t = std::result_of_t<Functor && (Args && ...)>;
+using invoke_result = std::result_of<Functor && (Args && ...)>;
+
+// Implementation of C++17's std::invoke_result_t.
+//
+// Reference: https://wg21.link/meta.type.synop#lib:invoke_result_t
+template <typename Functor, typename... Args>
+using invoke_result_t = typename invoke_result<Functor, Args...>::type;
+
+namespace internal {
+
+// Base case, `InvokeResult` does not have a nested type member. This means `F`
+// could not be invoked with `Args...` and thus is not invocable.
+template <typename InvokeResult, typename R, typename = void>
+struct IsInvocableImpl : std::false_type {};
+
+// Happy case, `InvokeResult` does have a nested type member. Now check whether
+// `InvokeResult::type` is convertible to `R`. Short circuit in case
+// `std::is_void<R>`.
+template <typename InvokeResult, typename R>
+struct IsInvocableImpl<InvokeResult, R, void_t<typename InvokeResult::type>>
+ : disjunction<std::is_void<R>,
+ std::is_convertible<typename InvokeResult::type, R>> {};
+
+} // namespace internal
+
+// Implementation of C++17's std::is_invocable_r.
+//
+// Returns whether `F` can be invoked with `Args...` and the result is
+// convertible to `R`.
+//
+// Reference: https://wg21.link/meta.rel#lib:is_invocable_r
+template <typename R, typename F, typename... Args>
+struct is_invocable_r
+ : internal::IsInvocableImpl<invoke_result<F, Args...>, R> {};
+
+// Implementation of C++17's std::is_invocable.
+//
+// Returns whether `F` can be invoked with `Args...`.
+//
+// Reference: https://wg21.link/meta.rel#lib:is_invocable
+template <typename F, typename... Args>
+struct is_invocable : is_invocable_r<void, F, Args...> {};
+
+namespace internal {
+
+// The indirection with std::is_enum<T> is required, because instantiating
+// std::underlying_type_t<T> when T is not an enum is UB prior to C++20.
+template <typename T, bool = std::is_enum<T>::value>
+struct IsScopedEnumImpl : std::false_type {};
+
+template <typename T>
+struct IsScopedEnumImpl<T, /*std::is_enum<T>::value=*/true>
+ : negation<std::is_convertible<T, std::underlying_type_t<T>>> {};
+
+} // namespace internal
+
+// Implementation of C++23's std::is_scoped_enum
+//
+// Reference: https://en.cppreference.com/w/cpp/types/is_scoped_enum
+template <typename T>
+struct is_scoped_enum : internal::IsScopedEnumImpl<T> {};
+
+// Implementation of C++20's std::remove_cvref.
+//
+// References:
+// - https://en.cppreference.com/w/cpp/types/remove_cvref
+// - https://wg21.link/meta.trans.other#lib:remove_cvref
+template <typename T>
+struct remove_cvref {
+ using type = std::remove_cv_t<std::remove_reference_t<T>>;
+};
+
+// Implementation of C++20's std::remove_cvref_t.
+//
+// References:
+// - https://en.cppreference.com/w/cpp/types/remove_cvref
+// - https://wg21.link/meta.type.synop#lib:remove_cvref_t
+template <typename T>
+using remove_cvref_t = typename remove_cvref<T>::type;
+
+// Implementation of C++20's std::is_constant_evaluated.
+//
+// References:
+// - https://en.cppreference.com/w/cpp/types/is_constant_evaluated
+// - https://wg21.link/meta.const.eval
+constexpr bool is_constant_evaluated() noexcept {
+#if HAS_BUILTIN(__builtin_is_constant_evaluated)
+ return __builtin_is_constant_evaluated();
+#else
+ return false;
+#endif
+}
// Simplified implementation of C++20's std::iter_value_t.
// As opposed to std::iter_value_t, this implementation does not restrict
@@ -251,8 +344,8 @@
//
// Reference: https://wg21.link/readable.traits#2
template <typename Iter>
-using iter_value_t = typename std::iterator_traits<
- std::remove_cv_t<std::remove_reference_t<Iter>>>::value_type;
+using iter_value_t =
+ typename std::iterator_traits<remove_cvref_t<Iter>>::value_type;
// Simplified implementation of C++20's std::iter_reference_t.
// As opposed to std::iter_reference_t, this implementation does not restrict
@@ -281,7 +374,7 @@
typename Proj,
typename IndirectResultT = indirect_result_t<Proj, Iter>>
struct projected {
- using value_type = std::remove_cv_t<std::remove_reference_t<IndirectResultT>>;
+ using value_type = remove_cvref_t<IndirectResultT>;
IndirectResultT operator*() const; // not defined
};
diff --git a/build/build_config.h b/build/build_config.h
index c69df41..daf51ff 100644
--- a/build/build_config.h
+++ b/build/build_config.h
@@ -94,7 +94,7 @@
#error Please add support for your platform in build/build_config.h
#endif
// NOTE: Adding a new port? Please follow
-// https://chromium.googlesource.com/chromium/src/+/master/docs/new_port_policy.md
+// https://chromium.googlesource.com/chromium/src/+/main/docs/new_port_policy.md
#if defined(OS_MAC) || defined(OS_IOS)
#define OS_APPLE 1
@@ -224,7 +224,7 @@
// The compiler thinks std::string::const_iterator and "const char*" are
// equivalent types.
#define STD_STRING_ITERATOR_IS_CHAR_POINTER
-// The compiler thinks gurl_base::string16::const_iterator and "char16*" are
+// The compiler thinks std::u16string::const_iterator and "char16*" are
// equivalent types.
#define BASE_STRING16_ITERATOR_IS_CHAR16_POINTER
#endif
diff --git a/copy.bara.sky b/copy.bara.sky
index c50f711..1384bee 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -18,12 +18,12 @@
"base/containers/contiguous_iterator.h",
"base/containers/span.h",
"base/containers/util.h",
+ "base/cxx17_backports.h",
"base/debug/leak_annotations.h",
"base/functional/*.h",
"base/i18n/uchar.h",
"base/macros.h",
"base/no_destructor.h",
- "base/optional.h",
"base/ranges/*.h",
"base/stl_util.h",
"base/strings/*.cc",
@@ -63,8 +63,10 @@
"base/debug/alias.h",
"base/export_template.h",
"base/logging.h",
+ "base/metrics/histogram_macros.h",
"base/notreached.h",
"base/trace_event/memory_usage_estimator.h",
+ "third_party/perfetto/include/perfetto/tracing/traced_value.h",
]
transformations = [
@@ -81,6 +83,12 @@
core.replace("namespace base ", "namespace gurl_base "),
core.replace("base::", "gurl_base::"),
+ # Use Abseil at upstream-recommended paths.
+ core.replace("third_party/abseil-cpp/absl", "absl"),
+
+ # Fix some Perfetto includes.
+ core.replace("perfetto/tracing/traced_value_forward.h", "perfetto/tracing/traced_value.h"),
+
# Use system ICU.
core.replace(
'"third_party/icu/source/common/unicode/${file}.h"',
diff --git a/polyfills/BUILD b/polyfills/BUILD
index 820c63d..9bf74f4 100644
--- a/polyfills/BUILD
+++ b/polyfills/BUILD
@@ -13,8 +13,10 @@
"base/debug/alias.h",
"base/export_template.h",
"base/logging.h",
+ "base/metrics/histogram_macros.h",
"base/notreached.h",
"base/trace_event/memory_usage_estimator.h",
+ "third_party/perfetto/include/perfetto/tracing/traced_value.h",
],
copts = build_config.default_copts,
visibility = ["//visibility:public"],
diff --git a/polyfills/base/check_op.h b/polyfills/base/check_op.h
index ecc127a..faba308 100644
--- a/polyfills/base/check_op.h
+++ b/polyfills/base/check_op.h
@@ -6,5 +6,6 @@
#define POLYFILLS_BASE_CHECK_OP_H_
#include "polyfills/base/logging.h"
+#include "base/template_util.h"
#endif /* POLYFILLS_BASE_CHECK_OP_H_ */
diff --git a/polyfills/base/logging.h b/polyfills/base/logging.h
index afe296e..3d7aadc 100644
--- a/polyfills/base/logging.h
+++ b/polyfills/base/logging.h
@@ -22,6 +22,7 @@
#define GURL_CHECK_GE(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_CHECK_LE(statement, statement2) GurlFakeLogSink({statement, statement2})
+#define GURL_CHECK_LT(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_CHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_CHECK_EQ(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_CHECK(statement) GurlFakeLogSink({statement})
diff --git a/polyfills/base/metrics/histogram_macros.h b/polyfills/base/metrics/histogram_macros.h
new file mode 100644
index 0000000..127c53c
--- /dev/null
+++ b/polyfills/base/metrics/histogram_macros.h
@@ -0,0 +1,11 @@
+// Copyright (c) 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_
+#define POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_
+
+#define UMA_HISTOGRAM_ENUMERATION(name, ...) do {} while(false)
+
+#endif /* POLYFILLS_BASE_METRICS_HISTOGRAM_MACROS_H_ */
+
diff --git a/polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h b/polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h
new file mode 100644
index 0000000..b2f0286
--- /dev/null
+++ b/polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h
@@ -0,0 +1,17 @@
+// Copyright (c) 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef POLYFILLS_THIRD_PARTY_PERFETTO_INCLUDE_PERFETTO_TRACING_TRACED_VALUE_H_
+#define POLYFILLS_THIRD_PARTY_PERFETTO_INCLUDE_PERFETTO_TRACING_TRACED_VALUE_H_
+
+namespace perfetto {
+
+class TracedValue {
+ public:
+ void WriteString(const std::string&) && {}
+};
+
+} // namespace perfetto
+
+#endif // POLYFILLS_THIRD_PARTY_PERFETTO_INCLUDE_PERFETTO_TRACING_TRACED_VALUE_H_
diff --git a/url/BUILD b/url/BUILD
index f2ec8da..6ed3fc5 100644
--- a/url/BUILD
+++ b/url/BUILD
@@ -43,7 +43,9 @@
"url_file.h",
"url_util.h",
],
- copts = build_config.default_copts,
+ copts = build_config.default_copts + [
+ "-Wno-c++11-narrowing",
+ ],
linkopts = build_config.url_linkopts,
visibility = ["//visibility:public"],
deps = [
diff --git a/url/gurl.cc b/url/gurl.cc
index 3b7d9f5..2d68889 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -7,6 +7,7 @@
#include <stddef.h>
#include <algorithm>
+#include <memory>
#include <ostream>
#include <utility>
@@ -15,6 +16,7 @@
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "polyfills/base/trace_event/memory_usage_estimator.h"
+#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "url/url_canon_stdstring.h"
#include "url/url_util.h"
@@ -26,7 +28,7 @@
is_valid_(other.is_valid_),
parsed_(other.parsed_) {
if (other.inner_url_)
- inner_url_.reset(new GURL(*other.inner_url_));
+ inner_url_ = std::make_unique<GURL>(*other.inner_url_);
// Valid filesystem urls should always have an inner_url_.
GURL_DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
}
@@ -49,7 +51,7 @@
}
GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
- InitCanonical(gurl_base::StringPiece(url_string), false);
+ InitCanonical(url_string, false);
}
GURL::GURL(const char* canonical_spec,
@@ -67,9 +69,8 @@
InitializeFromCanonicalSpec();
}
-template<typename STR>
-void GURL::InitCanonical(gurl_base::BasicStringPiece<STR> input_spec,
- bool trim_path_end) {
+template <typename T, typename CharT>
+void GURL::InitCanonical(T input_spec, bool trim_path_end) {
url::StdStringCanonOutput output(&spec_);
is_valid_ = url::Canonicalize(
input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
@@ -77,8 +78,8 @@
output.Complete(); // Must be done before using string.
if (is_valid_ && SchemeIsFileSystem()) {
- inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
- *parsed_.inner_parsed(), true));
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
// Valid URLs always have non-empty specs.
GURL_DCHECK(!is_valid_ || !spec_.empty());
@@ -86,9 +87,8 @@
void GURL::InitializeFromCanonicalSpec() {
if (is_valid_ && SchemeIsFileSystem()) {
- inner_url_.reset(
- new GURL(spec_.data(), parsed_.Length(),
- *parsed_.inner_parsed(), true));
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
#ifndef NDEBUG
@@ -138,7 +138,7 @@
else if (inner_url_)
*inner_url_ = *other.inner_url_;
else
- inner_url_.reset(new GURL(*other.inner_url_));
+ inner_url_ = std::make_unique<GURL>(*other.inner_url_);
return *this;
}
@@ -189,9 +189,9 @@
output.Complete();
result.is_valid_ = true;
if (result.SchemeIsFileSystem()) {
- result.inner_url_.reset(
- new GURL(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
@@ -215,9 +215,9 @@
output.Complete();
result.is_valid_ = true;
if (result.SchemeIsFileSystem()) {
- result.inner_url_.reset(
- new GURL(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
@@ -238,16 +238,16 @@
output.Complete();
if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_.reset(new GURL(result.spec_.data(),
- result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
// Note: code duplicated above (it's inconvenient to use a template here).
GURL GURL::ReplaceComponents(
- const url::Replacements<gurl_base::char16>& replacements) const {
+ const url::Replacements<char16_t>& replacements) const {
GURL result;
// Not allowed for invalid URLs.
@@ -261,9 +261,9 @@
output.Complete();
if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_.reset(new GURL(result.spec_.data(),
- result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
@@ -412,11 +412,11 @@
}
std::string GURL::PathForRequest() const {
- return PathForRequestPiece().as_string();
+ return std::string(PathForRequestPiece());
}
std::string GURL::HostNoBrackets() const {
- return HostNoBracketsPiece().as_string();
+ return std::string(HostNoBracketsPiece());
}
gurl_base::StringPiece GURL::HostNoBracketsPiece() const {
@@ -501,13 +501,17 @@
if ((actual_path.size() == allowed_path.size() + 1) &&
actual_path.back() == '/') {
- GURL_DCHECK_EQ(actual_path, allowed_path.as_string() + '/');
+ GURL_DCHECK_EQ(actual_path, std::string(allowed_path) + '/');
return true;
}
return false;
}
+void GURL::WriteIntoTrace(perfetto::TracedValue context) const {
+ std::move(context).WriteString(possibly_invalid_spec());
+}
+
std::ostream& operator<<(std::ostream& out, const GURL& url) {
return out << url.possibly_invalid_spec();
}
diff --git a/url/gurl.h b/url/gurl.h
index 37e1c8d..21e6611 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -13,8 +13,8 @@
#include "polyfills/base/component_export.h"
#include "polyfills/base/debug/alias.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
+#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_stdstring.h"
@@ -45,8 +45,8 @@
// will know to escape this and produce the desired result.
class COMPONENT_EXPORT(URL) GURL {
public:
- typedef url::StringPieceReplacements<std::string> Replacements;
- typedef url::StringPieceReplacements<gurl_base::string16> ReplacementsW;
+ typedef url::StringPieceReplacements<char> Replacements;
+ typedef url::StringPieceReplacements<char16_t> ReplacementsW;
// Creates an empty, invalid URL.
GURL();
@@ -166,8 +166,7 @@
// Note that we use the more general url::Replacements type to give
// callers extra flexibility rather than our override.
GURL ReplaceComponents(const url::Replacements<char>& replacements) const;
- GURL ReplaceComponents(
- const url::Replacements<gurl_base::char16>& replacements) const;
+ GURL ReplaceComponents(const url::Replacements<char16_t>& replacements) const;
// A helper function that is equivalent to replacing the path with a slash
// and clearing out everything after that. We sometimes need to know just the
@@ -438,6 +437,8 @@
static bool IsAboutPath(gurl_base::StringPiece actual_path,
gurl_base::StringPiece allowed_path);
+ void WriteIntoTrace(perfetto::TracedValue context) const;
+
private:
// Variant of the string parsing constructor that allows the caller to elect
// retain trailing whitespace, if any, on the passed URL spec, but only if
@@ -447,9 +448,8 @@
enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
GURL(const std::string& url_string, RetainWhiteSpaceSelector);
- template<typename STR>
- void InitCanonical(gurl_base::BasicStringPiece<STR> input_spec,
- bool trim_path_end);
+ template <typename T, typename CharT = typename T::value_type>
+ void InitCanonical(T input_spec, bool trim_path_end);
void InitializeFromCanonicalSpec();
diff --git a/url/gurl_abstract_tests.h b/url/gurl_abstract_tests.h
new file mode 100644
index 0000000..ffe9942
--- /dev/null
+++ b/url/gurl_abstract_tests.h
@@ -0,0 +1,119 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_GURL_ABSTRACT_TESTS_H_
+#define URL_GURL_ABSTRACT_TESTS_H_
+
+// Test suite for tests that cover both url::Url and blink::SecurityUrl.
+//
+// AbstractUrlTest below abstracts away differences between GURL and blink::KURL
+// by parametrizing the tests with a class that has to expose the following
+// members:
+// using UrlType = ...;
+// static UrlType CreateUrlFromString(gurl_base::StringPiece s);
+// static bool IsAboutBlank(const UrlType& url);
+// static bool IsAboutSrcdoc(const UrlType& url);
+template <typename TUrlTraits>
+class AbstractUrlTest : public testing::Test {
+ protected:
+ // Wrappers that help ellide away TUrlTraits.
+ //
+ // Note that calling the wrappers needs to be prefixed with `this->...` to
+ // avoid hitting: explicit qualification required to use member 'IsAboutBlank'
+ // from dependent base class.
+ using UrlType = typename TUrlTraits::UrlType;
+ UrlType CreateUrlFromString(gurl_base::StringPiece s) {
+ return TUrlTraits::CreateUrlFromString(s);
+ }
+ bool IsAboutBlank(const UrlType& url) {
+ return TUrlTraits::IsAboutBlank(url);
+ }
+ bool IsAboutSrcdoc(const UrlType& url) {
+ return TUrlTraits::IsAboutSrcdoc(url);
+ }
+};
+
+TYPED_TEST_SUITE_P(AbstractUrlTest);
+
+TYPED_TEST_P(AbstractUrlTest, IsAboutBlankTest) {
+ // See https://tools.ietf.org/html/rfc6694 which explicitly allows
+ // `about-query` and `about-fragment` parts in about: URLs.
+ const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo",
+ "about:blank/#foo",
+ "about:blank?foo#foo"};
+ for (const auto& input : kAboutBlankUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_TRUE(this->IsAboutBlank(url));
+ }
+
+ const std::string kNotAboutBlankUrls[] = {"",
+ "about",
+ "about:",
+ "about:blanky",
+ "about:blan",
+ "about:about:blank:",
+ "data:blank",
+ "http:blank",
+ "about://blank",
+ "about:blank/foo",
+ "about://:8000/blank",
+ "about://foo:foo@/blank",
+ "foo@about:blank",
+ "foo:bar@about:blank",
+ "about:blank:8000",
+ "about:blANk"};
+ for (const auto& input : kNotAboutBlankUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_FALSE(this->IsAboutBlank(url));
+ }
+}
+
+TYPED_TEST_P(AbstractUrlTest, IsAboutSrcdocTest) {
+ // See https://tools.ietf.org/html/rfc6694 which explicitly allows
+ // `about-query` and `about-fragment` parts in about: URLs.
+ //
+ // `about:srcdoc` is defined in
+ // https://html.spec.whatwg.org/multipage/urls-and-fetching.html#about:srcdoc
+ // which refers to rfc6694 for details.
+ const std::string kAboutSrcdocUrls[] = {
+ "about:srcdoc", "about:srcdoc/", "about:srcdoc?foo", "about:srcdoc/#foo",
+ "about:srcdoc?foo#foo"};
+ for (const auto& input : kAboutSrcdocUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_TRUE(this->IsAboutSrcdoc(url));
+ }
+
+ const std::string kNotAboutSrcdocUrls[] = {"",
+ "about",
+ "about:",
+ "about:srcdocx",
+ "about:srcdo",
+ "about:about:srcdoc:",
+ "data:srcdoc",
+ "http:srcdoc",
+ "about:srcdo",
+ "about://srcdoc",
+ "about://srcdoc\\",
+ "about:srcdoc/foo",
+ "about://:8000/srcdoc",
+ "about://foo:foo@/srcdoc",
+ "foo@about:srcdoc",
+ "foo:bar@about:srcdoc",
+ "about:srcdoc:8000",
+ "about:srCDOc"};
+ for (const auto& input : kNotAboutSrcdocUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_FALSE(this->IsAboutSrcdoc(url));
+ }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(AbstractUrlTest,
+ IsAboutBlankTest,
+ IsAboutSrcdocTest);
+
+#endif // URL_GURL_ABSTRACT_TESTS_H_
diff --git a/url/gurl_fuzzer.cc b/url/gurl_fuzzer.cc
index 3b28aea..0c3c101 100644
--- a/url/gurl_fuzzer.cc
+++ b/url/gurl_fuzzer.cc
@@ -52,9 +52,9 @@
CheckReplaceComponentsPreservesSpec(url_from_string_piece);
}
// Test for StringPiece16 if size is even.
- if (size % 2 == 0) {
+ if (size % sizeof(char16_t) == 0) {
gurl_base::StringPiece16 string_piece_input16(
- reinterpret_cast<const gurl_base::char16*>(data), size / 2);
+ reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t));
const GURL url_from_string_piece16(string_piece_input16);
CheckIdempotency(url_from_string_piece16);
CheckReplaceComponentsPreservesSpec(url_from_string_piece16);
@@ -78,10 +78,10 @@
url_from_string_piece_part.Resolve(relative_string);
- if (relative_size % 2 == 0) {
- gurl_base::string16 relative_string16(
- reinterpret_cast<const gurl_base::char16*>(data + size_t_bytes),
- relative_size / 2);
+ if (relative_size % sizeof(char16_t) == 0) {
+ std::u16string relative_string16(
+ reinterpret_cast<const char16_t*>(data + size_t_bytes),
+ relative_size / sizeof(char16_t));
url_from_string_piece_part.Resolve(relative_string16);
}
}
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index 6295d98..0b81da4 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -68,11 +68,11 @@
// the parser is already tested and works, so we are mostly interested if the
// object does the right thing with the results.
TEST(GURLTest, Components) {
- GURL empty_url(gurl_base::UTF8ToUTF16(""));
+ GURL empty_url(u"");
EXPECT_TRUE(empty_url.is_empty());
EXPECT_FALSE(empty_url.is_valid());
- GURL url(gurl_base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
EXPECT_FALSE(url.is_empty());
EXPECT_TRUE(url.is_valid());
EXPECT_TRUE(url.SchemeIs("http"));
@@ -117,8 +117,7 @@
}
TEST(GURLTest, Copy) {
- GURL url(gurl_base::UTF8ToUTF16(
- "http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
GURL url2(url);
EXPECT_TRUE(url2.is_valid());
@@ -151,8 +150,7 @@
}
TEST(GURLTest, Assign) {
- GURL url(gurl_base::UTF8ToUTF16(
- "http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
GURL url2;
url2 = url;
@@ -194,8 +192,7 @@
}
TEST(GURLTest, CopyFileSystem) {
- GURL url(gurl_base::UTF8ToUTF16(
- "filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
+ GURL url(u"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref");
GURL url2(url);
EXPECT_TRUE(url2.is_valid());
@@ -268,21 +265,49 @@
EXPECT_EQ("/", url.path());
}
-// Given an invalid URL, we should still get most of the components.
+// Given invalid URLs, we should still get most of the components.
TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
- GURL url("http:google.com:foo");
- EXPECT_FALSE(url.is_valid());
- EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
+ constexpr struct InvalidURLTestExpectations {
+ const char* url;
+ const char* spec;
+ const char* scheme;
+ const char* host;
+ const char* port;
+ const char* path;
+ // Extend as needed...
+ } expectations[] = {
+ {
+ "http:google.com:foo",
+ "http://google.com:foo/",
+ "http",
+ "google.com",
+ "foo",
+ "/",
+ },
+ {
+ "https:google.com:foo",
+ "https://google.com:foo/",
+ "https",
+ "google.com",
+ "foo",
+ "/",
+ },
+ };
- EXPECT_EQ("http", url.scheme());
- EXPECT_EQ("", url.username());
- EXPECT_EQ("", url.password());
- EXPECT_EQ("google.com", url.host());
- EXPECT_EQ("foo", url.port());
- EXPECT_EQ(PORT_INVALID, url.IntPort());
- EXPECT_EQ("/", url.path());
- EXPECT_EQ("", url.query());
- EXPECT_EQ("", url.ref());
+ for (const auto& e : expectations) {
+ const GURL url(e.url);
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_EQ(e.spec, url.possibly_invalid_spec());
+ EXPECT_EQ(e.scheme, url.scheme());
+ EXPECT_EQ("", url.username());
+ EXPECT_EQ("", url.password());
+ EXPECT_EQ(e.host, url.host());
+ EXPECT_EQ(e.port, url.port());
+ EXPECT_EQ(PORT_INVALID, url.IntPort());
+ EXPECT_EQ(e.path, url.path());
+ EXPECT_EQ("", url.query());
+ EXPECT_EQ("", url.ref());
+ }
}
TEST(GURLTest, Resolve) {
@@ -314,6 +339,7 @@
// A non-standard base can be replaced with a standard absolute URL.
{"data:blahblah", "http://google.com/", true, "http://google.com/"},
{"data:blahblah", "http:google.com", true, "http://google.com/"},
+ {"data:blahblah", "https:google.com", true, "https://google.com/"},
// Filesystem URLs have different paths to test.
{"filesystem:http://www.google.com/type/", "foo.html", true,
"filesystem:http://www.google.com/type/foo.html"},
@@ -535,7 +561,7 @@
GURL url(" data: one ? two # three ");
// By default the trailing whitespace will have been stripped.
- EXPECT_EQ("data: one ? two # three", url.spec());
+ EXPECT_EQ("data: one ? two #%20three", url.spec());
GURL::Replacements repl;
repl.ClearRef();
GURL url_no_ref = url.ReplaceComponents(repl);
@@ -953,6 +979,21 @@
EXPECT_STREQ("https://foo.com/bar", url_debug_alias);
}
+TEST(GURLTest, InvalidHost) {
+ // This contains an invalid percent escape (%T%) and also a valid
+ // percent escape that's not 7-bit ascii (%ae), so that the unescaped
+ // host contains both an invalid percent escape and invalid UTF-8.
+ GURL url("http://%T%Ae");
+
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_TRUE(url.SchemeIs(url::kHttpScheme));
+
+ // The invalid percent escape becomes an escaped percent sign (%25), and the
+ // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as
+ // UTF-8.
+ EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD");
+}
+
TEST(GURLTest, PortZero) {
GURL port_zero_url("http://127.0.0.1:0/blah");
diff --git a/url/origin.cc b/url/origin.cc
index ca37428..33e26f9 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -16,7 +16,9 @@
#include "base/pickle.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
+#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "url/gurl.h"
#include "url/url_canon.h"
#include "url/url_canon_stdstring.h"
@@ -72,25 +74,25 @@
Origin::~Origin() = default;
// static
-gurl_base::Optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
gurl_base::StringPiece scheme,
gurl_base::StringPiece host,
uint16_t port) {
- SchemeHostPort tuple(scheme.as_string(), host.as_string(), port,
+ SchemeHostPort tuple(std::string(scheme), std::string(host), port,
SchemeHostPort::CHECK_CANONICALIZATION);
if (!tuple.IsValid())
- return gurl_base::nullopt;
+ return absl::nullopt;
return Origin(std::move(tuple));
}
// static
-gurl_base::Optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+absl::optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
gurl_base::StringPiece precursor_scheme,
gurl_base::StringPiece precursor_host,
uint16_t precursor_port,
const Origin::Nonce& nonce) {
- SchemeHostPort precursor(precursor_scheme.as_string(),
- precursor_host.as_string(), precursor_port,
+ SchemeHostPort precursor(std::string(precursor_scheme),
+ std::string(precursor_host), precursor_port,
SchemeHostPort::CHECK_CANONICALIZATION);
// For opaque origins, it is okay for the SchemeHostPort to be invalid;
// however, this should only arise when the arguments indicate the
@@ -98,7 +100,7 @@
if (!precursor.IsValid() &&
!(precursor_scheme.empty() && precursor_host.empty() &&
precursor_port == 0)) {
- return gurl_base::nullopt;
+ return absl::nullopt;
}
return Origin(std::move(nonce), std::move(precursor));
}
@@ -147,11 +149,11 @@
return tuple_.GetURL();
}
-gurl_base::Optional<gurl_base::UnguessableToken> Origin::GetNonceForSerialization()
+absl::optional<gurl_base::UnguessableToken> Origin::GetNonceForSerialization()
const {
// TODO(nasko): Consider not making a copy here, but return a reference to
// the nonce.
- return nonce_ ? gurl_base::make_optional(nonce_->token()) : gurl_base::nullopt;
+ return nonce_ ? absl::make_optional(nonce_->token()) : absl::nullopt;
}
bool Origin::IsSameOriginWith(const Origin& other) const {
@@ -287,11 +289,11 @@
GURL_DCHECK_EQ(0U, port());
}
-gurl_base::Optional<std::string> Origin::SerializeWithNonce() const {
+absl::optional<std::string> Origin::SerializeWithNonce() const {
return SerializeWithNonceImpl();
}
-gurl_base::Optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() {
+absl::optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() {
GetNonceForSerialization();
return SerializeWithNonceImpl();
}
@@ -300,9 +302,9 @@
// string - tuple_.GetURL().spec().
// uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized.
// uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized.
-gurl_base::Optional<std::string> Origin::SerializeWithNonceImpl() const {
+absl::optional<std::string> Origin::SerializeWithNonceImpl() const {
if (!opaque() && !tuple_.IsValid())
- return gurl_base::nullopt;
+ return absl::nullopt;
gurl_base::Pickle pickle;
pickle.WriteString(tuple_.Serialize());
@@ -323,16 +325,16 @@
}
// static
-gurl_base::Optional<Origin> Origin::Deserialize(const std::string& value) {
+absl::optional<Origin> Origin::Deserialize(const std::string& value) {
std::string data;
if (!gurl_base::Base64Decode(value, &data))
- return gurl_base::nullopt;
+ return absl::nullopt;
gurl_base::Pickle pickle(reinterpret_cast<char*>(&data[0]), data.size());
gurl_base::PickleIterator reader(pickle);
std::string pickled_url;
if (!reader.ReadString(&pickled_url))
- return gurl_base::nullopt;
+ return absl::nullopt;
GURL url(pickled_url);
// If only a tuple was serialized, then this origin is not opaque. For opaque
@@ -341,24 +343,24 @@
// Opaque origins without a tuple are ok.
if (!is_opaque && !url.is_valid())
- return gurl_base::nullopt;
+ return absl::nullopt;
SchemeHostPort tuple(url);
// Possible successful early return if the pickled Origin was not opaque.
if (!is_opaque) {
Origin origin(tuple);
if (origin.opaque())
- return gurl_base::nullopt; // Something went horribly wrong.
+ return absl::nullopt; // Something went horribly wrong.
return origin;
}
uint64_t nonce_high = 0;
if (!reader.ReadUInt64(&nonce_high))
- return gurl_base::nullopt;
+ return absl::nullopt;
uint64_t nonce_low = 0;
if (!reader.ReadUInt64(&nonce_low))
- return gurl_base::nullopt;
+ return absl::nullopt;
Origin::Nonce nonce;
if (nonce_high != 0 && nonce_low != 0) {
@@ -372,6 +374,10 @@
return origin;
}
+void Origin::WriteIntoTrace(perfetto::TracedValue context) const {
+ std::move(context).WriteString(GetDebugString());
+}
+
std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
out << origin.GetDebugString();
return out;
diff --git a/url/origin.h b/url/origin.h
index 8cb3ef0..bfd3b36 100644
--- a/url/origin.h
+++ b/url/origin.h
@@ -13,13 +13,13 @@
#include "polyfills/base/component_export.h"
#include "polyfills/base/debug/alias.h"
#include "base/debug/crash_logging.h"
-#include "base/optional.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/unguessable_token.h"
#include "build/build_config.h"
#include "ipc/ipc_param_traits.h"
+#include "absl/types/optional.h"
+#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
#include "url/scheme_host_port.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -182,7 +182,7 @@
// forth over IPC (as transitioning through GURL would risk potentially
// dangerous recanonicalization); other potential callers should prefer the
// 'GURL'-based constructor.
- static gurl_base::Optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
+ static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
gurl_base::StringPiece scheme,
gurl_base::StringPiece host,
uint16_t port);
@@ -295,6 +295,8 @@
const gurl_base::android::JavaRef<jobject>& java_origin);
#endif // OS_ANDROID
+ void WriteIntoTrace(perfetto::TracedValue context) const;
+
private:
friend class blink::SecurityOrigin;
// SchemefulSite needs access to the serialization/deserialization logic which
@@ -377,7 +379,7 @@
// This factory method should be used in order to pass opaque Origin objects
// back and forth over IPC (as transitioning through GURL would risk
// potentially dangerous recanonicalization).
- static gurl_base::Optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
+ static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
gurl_base::StringPiece precursor_scheme,
gurl_base::StringPiece precursor_host,
uint16_t precursor_port,
@@ -392,23 +394,23 @@
// Get the nonce associated with this origin, if it is opaque. This should be
// used only when trying to send an Origin across an IPC pipe.
- gurl_base::Optional<gurl_base::UnguessableToken> GetNonceForSerialization() const;
+ absl::optional<gurl_base::UnguessableToken> GetNonceForSerialization() const;
// Serializes this Origin, including its nonce if it is opaque. If an opaque
// origin's |tuple_| is invalid nullopt is returned. If the nonce is not
// initialized, a nonce of 0 is used. Use of this method should be limited as
// an opaque origin will never be matchable in future browser sessions.
- gurl_base::Optional<std::string> SerializeWithNonce() const;
+ absl::optional<std::string> SerializeWithNonce() const;
// Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to
// serializing.
- gurl_base::Optional<std::string> SerializeWithNonceAndInitIfNeeded();
+ absl::optional<std::string> SerializeWithNonceAndInitIfNeeded();
- gurl_base::Optional<std::string> SerializeWithNonceImpl() const;
+ absl::optional<std::string> SerializeWithNonceImpl() const;
// Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
// value was invalid in any way.
- static gurl_base::Optional<Origin> Deserialize(const std::string& value);
+ static absl::optional<Origin> Deserialize(const std::string& value);
// The tuple is used for both tuple origins (e.g. https://example.com:80), as
// well as for opaque origins, where it tracks the tuple origin from which
@@ -419,7 +421,7 @@
// The nonce is used for maintaining identity of an opaque origin. This
// nonce is preserved when an opaque origin is copied or moved. An Origin
// is considered opaque if and only if |nonce_| holds a value.
- gurl_base::Optional<Nonce> nonce_;
+ absl::optional<Nonce> nonce_;
};
// Pretty-printers for logging. These expose the internal state of the nonce.
diff --git a/url/origin_abstract_tests.cc b/url/origin_abstract_tests.cc
new file mode 100644
index 0000000..1619eae
--- /dev/null
+++ b/url/origin_abstract_tests.cc
@@ -0,0 +1,104 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin_abstract_tests.h"
+
+namespace url {
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+ EXPECT_EQ(a, b);
+ const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+ const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+ EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+ EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+ EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+ EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+ EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+ EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+ EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+ EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+ EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+ EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+ EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+ EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+ EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+ EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+ EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+ EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
+}
+
+// static
+Origin UrlOriginTestTraits::CreateOriginFromString(gurl_base::StringPiece s) {
+ return Origin::Create(GURL(s));
+}
+
+// static
+Origin UrlOriginTestTraits::CreateUniqueOpaqueOrigin() {
+ return Origin();
+}
+
+// static
+Origin UrlOriginTestTraits::CreateWithReferenceOrigin(
+ gurl_base::StringPiece url,
+ const Origin& reference_origin) {
+ return Origin::Resolve(GURL(url), reference_origin);
+}
+
+// static
+Origin UrlOriginTestTraits::DeriveNewOpaqueOrigin(
+ const Origin& reference_origin) {
+ return reference_origin.DeriveNewOpaqueOrigin();
+}
+
+// static
+bool UrlOriginTestTraits::IsOpaque(const Origin& origin) {
+ return origin.opaque();
+}
+
+// static
+std::string UrlOriginTestTraits::GetScheme(const Origin& origin) {
+ return origin.scheme();
+}
+
+// static
+std::string UrlOriginTestTraits::GetHost(const Origin& origin) {
+ return origin.host();
+}
+
+// static
+uint16_t UrlOriginTestTraits::GetPort(const Origin& origin) {
+ return origin.port();
+}
+
+// static
+SchemeHostPort UrlOriginTestTraits::GetTupleOrPrecursorTupleIfOpaque(
+ const Origin& origin) {
+ return origin.GetTupleOrPrecursorTupleIfOpaque();
+}
+
+// static
+bool UrlOriginTestTraits::IsSameOrigin(const Origin& a, const Origin& b) {
+ return a.IsSameOriginWith(b);
+}
+
+// static
+std::string UrlOriginTestTraits::Serialize(const Origin& origin) {
+ std::string serialized = origin.Serialize();
+
+ // Extra test assertion for GetURL (which doesn't have an equivalent in
+ // blink::SecurityOrigin).
+ ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+ return serialized;
+}
+
+// static
+bool UrlOriginTestTraits::IsValidUrl(gurl_base::StringPiece str) {
+ return GURL(str).is_valid();
+}
+
+// This is an abstract test suite which is instantiated by each implementation.
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AbstractOriginTest);
+
+} // namespace url
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h
new file mode 100644
index 0000000..0c53f82
--- /dev/null
+++ b/url/origin_abstract_tests.h
@@ -0,0 +1,536 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ORIGIN_ABSTRACT_TESTS_H_
+#define URL_ORIGIN_ABSTRACT_TESTS_H_
+
+#include <string>
+#include <type_traits>
+
+#include "base/containers/contains.h"
+#include "base/strings/string_piece.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "absl/types/optional.h"
+#include "url/gurl.h"
+#include "url/origin.h"
+#include "url/scheme_host_port.h"
+#include "url/url_util.h"
+
+namespace url {
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b);
+
+// AbstractOriginTest below abstracts away differences between url::Origin and
+// blink::SecurityOrigin by parametrizing the tests with a class that has to
+// expose the same public members as UrlOriginTestTraits below.
+class UrlOriginTestTraits {
+ public:
+ using OriginType = Origin;
+
+ // Constructing an origin.
+ static OriginType CreateOriginFromString(gurl_base::StringPiece s);
+ static OriginType CreateUniqueOpaqueOrigin();
+ static OriginType CreateWithReferenceOrigin(
+ gurl_base::StringPiece url,
+ const OriginType& reference_origin);
+ static OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin);
+
+ // Accessors for origin properties.
+ static bool IsOpaque(const OriginType& origin);
+ static std::string GetScheme(const OriginType& origin);
+ static std::string GetHost(const OriginType& origin);
+ static uint16_t GetPort(const OriginType& origin);
+ static SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(
+ const OriginType& origin);
+
+ // Wrappers for other instance methods of OriginType.
+ static bool IsSameOrigin(const OriginType& a, const OriginType& b);
+ static std::string Serialize(const OriginType& origin);
+
+ // "Accessors" of URL properties.
+ //
+ // TODO(lukasza): Consider merging together OriginTraitsBase here and
+ // UrlTraitsBase in //url/gurl_abstract_tests.h.
+ static bool IsValidUrl(gurl_base::StringPiece str);
+
+ // Only static members = no constructors are needed.
+ UrlOriginTestTraits() = delete;
+};
+
+// Test suite for tests that cover both url::Origin and blink::SecurityOrigin.
+template <typename TOriginTraits>
+class AbstractOriginTest : public testing::Test {
+ public:
+ void SetUp() override {
+ const char* kSchemesToRegister[] = {
+ "noaccess",
+ "std-with-host",
+ "noaccess-std-with-host",
+ "local",
+ "local-noaccess",
+ "local-std-with-host",
+ "local-noaccess-std-with-host",
+ "also-local",
+ "sec",
+ "sec-std-with-host",
+ "sec-noaccess",
+ };
+ for (const char* kScheme : kSchemesToRegister) {
+ std::string scheme(kScheme);
+ if (gurl_base::Contains(scheme, "noaccess"))
+ AddNoAccessScheme(kScheme);
+ if (gurl_base::Contains(scheme, "std-with-host"))
+ AddStandardScheme(kScheme, SchemeType::SCHEME_WITH_HOST);
+ if (gurl_base::Contains(scheme, "local"))
+ AddLocalScheme(kScheme);
+ if (gurl_base::Contains(scheme, "sec"))
+ AddSecureScheme(kScheme);
+ }
+ }
+
+ protected:
+ // Wrappers that help ellide away TOriginTraits.
+ //
+ // Note that calling the wrappers needs to be prefixed with `this->...` to
+ // avoid hitting: explicit qualification required to use member 'IsOpaque'
+ // from dependent base class.
+ using OriginType = typename TOriginTraits::OriginType;
+ OriginType CreateOriginFromString(gurl_base::StringPiece s) {
+ return TOriginTraits::CreateOriginFromString(s);
+ }
+ OriginType CreateUniqueOpaqueOrigin() {
+ return TOriginTraits::CreateUniqueOpaqueOrigin();
+ }
+ OriginType CreateWithReferenceOrigin(gurl_base::StringPiece url,
+ const OriginType& reference_origin) {
+ return TOriginTraits::CreateWithReferenceOrigin(url, reference_origin);
+ }
+ OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin) {
+ return TOriginTraits::DeriveNewOpaqueOrigin(reference_origin);
+ }
+ bool IsOpaque(const OriginType& origin) {
+ return TOriginTraits::IsOpaque(origin);
+ }
+ std::string GetScheme(const OriginType& origin) {
+ return TOriginTraits::GetScheme(origin);
+ }
+ std::string GetHost(const OriginType& origin) {
+ return TOriginTraits::GetHost(origin);
+ }
+ uint16_t GetPort(const OriginType& origin) {
+ return TOriginTraits::GetPort(origin);
+ }
+ SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(const OriginType& origin) {
+ return TOriginTraits::GetTupleOrPrecursorTupleIfOpaque(origin);
+ }
+ bool IsSameOrigin(const OriginType& a, const OriginType& b) {
+ bool is_a_same_with_b = TOriginTraits::IsSameOrigin(a, b);
+ bool is_b_same_with_a = TOriginTraits::IsSameOrigin(b, a);
+ EXPECT_EQ(is_a_same_with_b, is_b_same_with_a);
+ return is_a_same_with_b;
+ }
+ std::string Serialize(const OriginType& origin) {
+ return TOriginTraits::Serialize(origin);
+ }
+ bool IsValidUrl(gurl_base::StringPiece str) {
+ return TOriginTraits::IsValidUrl(str);
+ }
+
+#define EXPECT_SAME_ORIGIN(a, b) \
+ EXPECT_TRUE(this->IsSameOrigin((a), (b))) \
+ << "When checking if \"" << this->Serialize(a) << "\" is " \
+ << "same-origin with \"" << this->Serialize(b) << "\""
+
+#define EXPECT_CROSS_ORIGIN(a, b) \
+ EXPECT_FALSE(this->IsSameOrigin((a), (b))) \
+ << "When checking if \"" << this->Serialize(a) << "\" is " \
+ << "cross-origin from \"" << this->Serialize(b) << "\""
+
+ void VerifyOriginInvariants(const OriginType& origin) {
+ // An origin is always same-origin with itself.
+ EXPECT_SAME_ORIGIN(origin, origin);
+
+ // A copy of |origin| should be same-origin as well.
+ auto origin_copy = origin;
+ EXPECT_EQ(this->GetScheme(origin), this->GetScheme(origin_copy));
+ EXPECT_EQ(this->GetHost(origin), this->GetHost(origin_copy));
+ EXPECT_EQ(this->GetPort(origin), this->GetPort(origin_copy));
+ EXPECT_EQ(this->IsOpaque(origin), this->IsOpaque(origin_copy));
+ EXPECT_SAME_ORIGIN(origin, origin_copy);
+
+ // An origin is always cross-origin from another, unique, opaque origin.
+ EXPECT_CROSS_ORIGIN(origin, this->CreateUniqueOpaqueOrigin());
+
+ // An origin is always cross-origin from another tuple origin.
+ auto different_tuple_origin =
+ this->CreateOriginFromString("https://not-in-the-list.test/");
+ EXPECT_CROSS_ORIGIN(origin, different_tuple_origin);
+
+ // Deriving an origin for "about:blank".
+ auto about_blank_origin1 =
+ this->CreateWithReferenceOrigin("about:blank", origin);
+ auto about_blank_origin2 =
+ this->CreateWithReferenceOrigin("about:blank?bar#foo", origin);
+ EXPECT_SAME_ORIGIN(origin, about_blank_origin1);
+ EXPECT_SAME_ORIGIN(origin, about_blank_origin2);
+
+ // Derived opaque origins.
+ std::vector<OriginType> derived_origins = {
+ this->DeriveNewOpaqueOrigin(origin),
+ this->CreateWithReferenceOrigin("data:text/html,baz", origin),
+ this->DeriveNewOpaqueOrigin(about_blank_origin1),
+ };
+ for (size_t i = 0; i < derived_origins.size(); i++) {
+ SCOPED_TRACE(testing::Message() << "Derived origin #" << i);
+ const OriginType& derived_origin = derived_origins[i];
+ EXPECT_TRUE(this->IsOpaque(derived_origin));
+ EXPECT_SAME_ORIGIN(derived_origin, derived_origin);
+ EXPECT_CROSS_ORIGIN(origin, derived_origin);
+ EXPECT_EQ(this->GetTupleOrPrecursorTupleIfOpaque(origin),
+ this->GetTupleOrPrecursorTupleIfOpaque(derived_origin));
+ }
+ }
+
+ void VerifyUniqueOpaqueOriginInvariants(const OriginType& origin) {
+ if (!this->IsOpaque(origin)) {
+ ADD_FAILURE() << "Got unexpectedly non-opaque origin: "
+ << this->Serialize(origin);
+ return; // Skip other test assertions.
+ }
+
+ // Opaque origins should have an "empty" scheme, host and port.
+ EXPECT_EQ("", this->GetScheme(origin));
+ EXPECT_EQ("", this->GetHost(origin));
+ EXPECT_EQ(0, this->GetPort(origin));
+
+ // Unique opaque origins should have an empty precursor tuple.
+ EXPECT_EQ(SchemeHostPort(), this->GetTupleOrPrecursorTupleIfOpaque(origin));
+
+ // Serialization test.
+ EXPECT_EQ("null", this->Serialize(origin));
+
+ // Invariants that should hold for any origin.
+ VerifyOriginInvariants(origin);
+ }
+
+ void TestUniqueOpaqueOrigin(gurl_base::StringPiece test_input) {
+ auto origin = this->CreateOriginFromString(test_input);
+ this->VerifyUniqueOpaqueOriginInvariants(origin);
+
+ // Re-creating from the URL should be cross-origin.
+ auto origin_recreated_from_same_input =
+ this->CreateOriginFromString(test_input);
+ EXPECT_CROSS_ORIGIN(origin, origin_recreated_from_same_input);
+ }
+
+ void VerifyTupleOriginInvariants(const OriginType& origin,
+ const SchemeHostPort& expected_tuple) {
+ if (this->IsOpaque(origin)) {
+ ADD_FAILURE() << "Got unexpectedly opaque origin";
+ return; // Skip other test assertions.
+ }
+ SCOPED_TRACE(testing::Message()
+ << "Actual origin: " << this->Serialize(origin));
+
+ // Compare `origin` against the `expected_tuple`.
+ EXPECT_EQ(expected_tuple.scheme(), this->GetScheme(origin));
+ EXPECT_EQ(expected_tuple.host(), this->GetHost(origin));
+ EXPECT_EQ(expected_tuple.port(), this->GetPort(origin));
+ EXPECT_EQ(expected_tuple, this->GetTupleOrPrecursorTupleIfOpaque(origin));
+
+ // Serialization test.
+ //
+ // TODO(lukasza): Consider preserving the hostname when serializing file:
+ // URLs. Dropping the hostname seems incompatible with section 6 of
+ // rfc6454. Even though section 4 says that "the implementation MAY
+ // return an implementation-defined value", it seems that Chromium
+ // implementation *does* include the hostname in the origin SchemeHostPort
+ // tuple.
+ if (expected_tuple.scheme() != kFileScheme || expected_tuple.host() == "") {
+ EXPECT_SAME_ORIGIN(origin,
+ this->CreateOriginFromString(this->Serialize(origin)));
+ }
+
+ // Invariants that should hold for any origin.
+ VerifyOriginInvariants(origin);
+ }
+
+ private:
+ ScopedSchemeRegistryForTests scoped_scheme_registry_;
+};
+
+TYPED_TEST_SUITE_P(AbstractOriginTest);
+
+TYPED_TEST_P(AbstractOriginTest, NonStandardSchemeWithAndroidWebViewHack) {
+ EnableNonStandardSchemesForAndroidWebView();
+
+ // Regression test for https://crbug.com/896059.
+ auto origin = this->CreateOriginFromString("unknown-scheme://");
+ EXPECT_FALSE(this->IsOpaque(origin));
+ EXPECT_EQ("unknown-scheme", this->GetScheme(origin));
+ EXPECT_EQ("", this->GetHost(origin));
+ EXPECT_EQ(0, this->GetPort(origin));
+
+ // about:blank translates into an opaque origin, even in presence of
+ // EnableNonStandardSchemesForAndroidWebView.
+ origin = this->CreateOriginFromString("about:blank");
+ EXPECT_TRUE(this->IsOpaque(origin));
+}
+
+TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromValidUrls) {
+ const char* kTestCases[] = {
+ // Built-in noaccess schemes.
+ "data:text/html,Hello!",
+ "javascript:alert(1)",
+ "about:blank",
+
+ // Opaque blob URLs.
+ "blob:null/foo", // blob:null (actually a valid URL)
+ "blob:data:foo", // blob + data (which is nonstandard)
+ "blob:about://blank/", // blob + about (which is nonstandard)
+ "blob:about:blank/", // blob + about (which is nonstandard)
+ "blob:blob:http://www.example.com/guid-goes-here",
+ "blob:filesystem:ws:b/.",
+ "blob:filesystem:ftp://a/b",
+ "blob:blob:file://localhost/foo/bar",
+ };
+
+ for (const char* test_input : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+ // Verify that `origin` is opaque not just because `test_input` results is
+ // an invalid URL (because of a typo in the scheme name, or because of a
+ // technicality like having no host in a noaccess-std-with-host: scheme).
+ EXPECT_TRUE(this->IsValidUrl(test_input));
+
+ this->TestUniqueOpaqueOrigin(test_input);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromInvalidUrls) {
+ // TODO(lukasza): Consider moving those to GURL/KURL tests that verify what
+ // inputs are parsed as an invalid URL.
+
+ const char* kTestCases[] = {
+ // Invalid file: URLs.
+ "file://example.com:443/etc/passwd", // No port expected.
+
+ // Invalid HTTP URLs.
+ "http",
+ "http:",
+ "http:/",
+ "http://",
+ "http://:",
+ "http://:1",
+ "http::///invalid.example.com/",
+ "http://example.com:65536/", // Port out of range.
+ "http://example.com:-1/", // Port out of range.
+ "http://example.com:18446744073709551616/", // Port = 2^64.
+ "http://example.com:18446744073709551616999/", // Lots of port digits.
+
+ // Invalid filesystem URLs.
+ "filesystem:http://example.com/", // Missing /type/.
+ "filesystem:local:baz./type/",
+ "filesystem:local://hostname/type/",
+ "filesystem:unknown-scheme://hostname/type/",
+ "filesystem:filesystem:http://example.org:88/foo/bar",
+
+ // Invalid IP addresses
+ "http://[]/",
+ "http://[2001:0db8:0000:0000:0000:0000:0000:0000:0001]/", // 9 groups.
+
+ // Unknown scheme without a colon character (":") gives an invalid URL.
+ "unknown-scheme",
+
+ // Standard schemes require a hostname (and result in an opaque origin if
+ // the hostname is missing).
+ "local-std-with-host:",
+ "noaccess-std-with-host:",
+ };
+
+ for (const char* test_input : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+ // All testcases here are expected to represent invalid URLs.
+ // an invalid URL (because of a type in scheme name, or because of a
+ // technicality like having no host in a noaccess-std-with-host: scheme).
+ EXPECT_FALSE(this->IsValidUrl(test_input));
+
+ // Invalid URLs should always result in an opaque origin.
+ this->TestUniqueOpaqueOrigin(test_input);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, TupleOrigins) {
+ struct TestCase {
+ const char* input;
+ SchemeHostPort expected_tuple;
+ } kTestCases[] = {
+ // file: URLs
+ {"file:///etc/passwd", {"file", "", 0}},
+ {"file://example.com/etc/passwd", {"file", "example.com", 0}},
+ {"file:///", {"file", "", 0}},
+
+#ifdef WIN32
+ // TODO(https://crbug.com/1214098): Consider unifying URL parsing behavior
+ // on all platforms (or at least make sure that serialization always
+ // round-trips - see https://crbug.com/1214098).
+ {"file://hostname/C:/dir/file.txt", {"file", "", 0}},
+#else
+ {"file://hostname/C:/dir/file.txt", {"file", "hostname", 0}},
+#endif
+
+ // HTTP URLs
+ {"http://example.com/", {"http", "example.com", 80}},
+ {"http://example.com:80/", {"http", "example.com", 80}},
+ {"http://example.com:123/", {"http", "example.com", 123}},
+ {"http://example.com:0/", {"http", "example.com", 0}},
+ {"http://example.com:65535/", {"http", "example.com", 65535}},
+ {"https://example.com/", {"https", "example.com", 443}},
+ {"https://example.com:443/", {"https", "example.com", 443}},
+ {"https://example.com:123/", {"https", "example.com", 123}},
+ {"https://example.com:0/", {"https", "example.com", 0}},
+ {"https://example.com:65535/", {"https", "example.com", 65535}},
+ {"http://user:pass@example.com/", {"http", "example.com", 80}},
+ {"http://example.com:123/?query", {"http", "example.com", 123}},
+ {"https://example.com/#1234", {"https", "example.com", 443}},
+ {"https://u:p@example.com:123/?query#1234",
+ {"https", "example.com", 123}},
+ {"http://example/", {"http", "example", 80}},
+
+ // Blob URLs.
+ {"blob:http://example.com/guid-goes-here", {"http", "example.com", 80}},
+ {"blob:http://example.com:123/guid-goes-here",
+ {"http", "example.com", 123}},
+ {"blob:https://example.com/guid-goes-here",
+ {"https", "example.com", 443}},
+ {"blob:http://u:p@example.com/guid-goes-here",
+ {"http", "example.com", 80}},
+
+ // Filesystem URLs.
+ {"filesystem:http://example.com/type/", {"http", "example.com", 80}},
+ {"filesystem:http://example.com:123/type/", {"http", "example.com", 123}},
+ {"filesystem:https://example.com/type/", {"https", "example.com", 443}},
+ {"filesystem:https://example.com:123/type/",
+ {"https", "example.com", 123}},
+ {"filesystem:local-std-with-host:baz./type/",
+ {"local-std-with-host", "baz.", 0}},
+
+ // IP Addresses
+ {"http://192.168.9.1/", {"http", "192.168.9.1", 80}},
+ {"http://[2001:db8::1]/", {"http", "[2001:db8::1]", 80}},
+ {"http://[2001:0db8:0000:0000:0000:0000:0000:0001]/",
+ {"http", "[2001:db8::1]", 80}},
+ {"http://1/", {"http", "0.0.0.1", 80}},
+ {"http://1:1/", {"http", "0.0.0.1", 1}},
+ {"http://3232237825/", {"http", "192.168.9.1", 80}},
+
+ // Punycode
+ {"http://☃.net/", {"http", "xn--n3h.net", 80}},
+ {"blob:http://☃.net/", {"http", "xn--n3h.net", 80}},
+ {"local-std-with-host:↑↑↓↓←→←→ba.↑↑↓↓←→←→ba.0.bg",
+ {"local-std-with-host", "xn--ba-rzuadaibfa.xn--ba-rzuadaibfa.0.bg", 0}},
+
+ // Registered URLs
+ {"ftp://example.com/", {"ftp", "example.com", 21}},
+ {"ws://example.com/", {"ws", "example.com", 80}},
+ {"wss://example.com/", {"wss", "example.com", 443}},
+ {"wss://user:pass@example.com/", {"wss", "example.com", 443}},
+ };
+
+ for (const TestCase& test : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test.input);
+
+ // Only valid URLs should translate into valid, non-opaque origins.
+ EXPECT_TRUE(this->IsValidUrl(test.input));
+
+ auto origin = this->CreateOriginFromString(test.input);
+ this->VerifyTupleOriginInvariants(origin, test.expected_tuple);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, CustomSchemes_OpaqueOrigins) {
+ const char* kTestCases[] = {
+ // Unknown scheme
+ "unknown-scheme:foo",
+ "unknown-scheme://bar",
+
+ // Unknown scheme that is a prefix or suffix of a registered scheme.
+ "loca:foo",
+ "ocal:foo",
+ "local-suffix:foo",
+ "prefix-local:foo",
+
+ // Custom no-access schemes translate into an opaque origin (just like the
+ // built-in no-access schemes such as about:blank or data:).
+ "noaccess-std-with-host:foo",
+ "noaccess-std-with-host://bar",
+ "noaccess://host",
+ "local-noaccess://host",
+ "local-noaccess-std-with-host://host",
+ };
+
+ for (const char* test_input : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+ // Verify that `origin` is opaque not just because `test_input` results is
+ // an invalid URL (because of a typo in the scheme name, or because of a
+ // technicality like having no host in a noaccess-std-with-host: scheme).
+ EXPECT_TRUE(this->IsValidUrl(test_input));
+
+ this->TestUniqueOpaqueOrigin(test_input);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, CustomSchemes_TupleOrigins) {
+ struct TestCase {
+ const char* input;
+ SchemeHostPort expected_tuple;
+ } kTestCases[] = {
+ // Scheme (registered in SetUp()) that's both local and standard.
+ // TODO: Is it really appropriate to do network-host canonicalization of
+ // schemes without ports?
+ {"local-std-with-host:20", {"local-std-with-host", "0.0.0.20", 0}},
+ {"local-std-with-host:20.", {"local-std-with-host", "0.0.0.20", 0}},
+ {"local-std-with-host:foo", {"local-std-with-host", "foo", 0}},
+ {"local-std-with-host://bar:20", {"local-std-with-host", "bar", 0}},
+ {"local-std-with-host:baz.", {"local-std-with-host", "baz.", 0}},
+ {"local-std-with-host:baz..", {"local-std-with-host", "baz..", 0}},
+ {"local-std-with-host:baz..bar", {"local-std-with-host", "baz..bar", 0}},
+ {"local-std-with-host:baz...", {"local-std-with-host", "baz...", 0}},
+
+ // Scheme (registered in SetUp()) that's local but nonstandard. These
+ // always have empty hostnames, but are allowed to be url::Origins.
+ {"local:", {"local", "", 0}},
+ {"local:foo", {"local", "", 0}},
+ {"local://bar", {"local", "", 0}},
+ {"also-local://bar", {"also-local", "", 0}},
+
+ {"std-with-host://host", {"std-with-host", "host", 0}},
+ {"local://host", {"local", "", 0}},
+ {"local-std-with-host://host", {"local-std-with-host", "host", 0}},
+ };
+
+ for (const TestCase& test : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test.input);
+
+ // Only valid URLs should translate into valid, non-opaque origins.
+ EXPECT_TRUE(this->IsValidUrl(test.input));
+
+ auto origin = this->CreateOriginFromString(test.input);
+ this->VerifyTupleOriginInvariants(origin, test.expected_tuple);
+ }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(AbstractOriginTest,
+ NonStandardSchemeWithAndroidWebViewHack,
+ OpaqueOriginsFromValidUrls,
+ OpaqueOriginsFromInvalidUrls,
+ TupleOrigins,
+ CustomSchemes_OpaqueOrigins,
+ CustomSchemes_TupleOrigins);
+
+} // namespace url
+
+#endif // URL_ORIGIN_ABSTRACT_TESTS_H_
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index 2f342c5..a9d3a4f 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -69,13 +69,13 @@
return Origin::Nonce(nonce);
}
- gurl_base::Optional<gurl_base::UnguessableToken> GetNonce(const Origin& origin) {
+ absl::optional<gurl_base::UnguessableToken> GetNonce(const Origin& origin) {
return origin.GetNonceForSerialization();
}
// Wrappers around url::Origin methods to expose it to tests.
- gurl_base::Optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
+ absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
gurl_base::StringPiece precursor_scheme,
gurl_base::StringPiece precursor_host,
uint16_t precursor_port,
@@ -84,16 +84,16 @@
precursor_scheme, precursor_host, precursor_port, nonce);
}
- gurl_base::Optional<std::string> SerializeWithNonce(const Origin& origin) {
+ absl::optional<std::string> SerializeWithNonce(const Origin& origin) {
return origin.SerializeWithNonce();
}
- gurl_base::Optional<std::string> SerializeWithNonceAndInitIfNeeded(
+ absl::optional<std::string> SerializeWithNonceAndInitIfNeeded(
Origin& origin) {
return origin.SerializeWithNonceAndInitIfNeeded();
}
- gurl_base::Optional<Origin> Deserialize(const std::string& value) {
+ absl::optional<Origin> Deserialize(const std::string& value) {
return Origin::Deserialize(value);
}
@@ -286,7 +286,7 @@
for (const auto& test : cases) {
SCOPED_TRACE(testing::Message()
<< test.scheme << "://" << test.host << ":" << test.port);
- gurl_base::Optional<url::Origin> origin =
+ absl::optional<url::Origin> origin =
url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
test.scheme, test.host, test.port);
ASSERT_TRUE(origin);
@@ -299,7 +299,7 @@
ExpectParsedUrlsEqual(GURL(origin->Serialize()), origin->GetURL());
gurl_base::UnguessableToken nonce = gurl_base::UnguessableToken::Create();
- gurl_base::Optional<url::Origin> opaque_origin =
+ absl::optional<url::Origin> opaque_origin =
UnsafelyCreateOpaqueOriginWithoutNormalization(
test.scheme, test.host, test.port, CreateNonce(nonce));
ASSERT_TRUE(opaque_origin);
@@ -355,7 +355,7 @@
// Opaque origins with unknown precursors are allowed.
gurl_base::UnguessableToken token = gurl_base::UnguessableToken::Create();
- gurl_base::Optional<url::Origin> anonymous_opaque =
+ absl::optional<url::Origin> anonymous_opaque =
UnsafelyCreateOpaqueOriginWithoutNormalization("", "", 0,
CreateNonce(token));
ASSERT_TRUE(anonymous_opaque)
@@ -667,10 +667,10 @@
for (const GURL& url : valid_urls) {
SCOPED_TRACE(url.spec());
Origin origin = Origin::Create(url);
- gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin);
+ absl::optional<std::string> serialized = SerializeWithNonce(origin);
ASSERT_TRUE(serialized);
- gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
ASSERT_TRUE(deserialized.has_value());
EXPECT_TRUE(DoEqualityComparisons(origin, deserialized.value(), true));
@@ -679,11 +679,11 @@
}
TEST_F(OriginTest, DeserializeInvalid) {
- EXPECT_EQ(gurl_base::nullopt, Deserialize(std::string()));
- EXPECT_EQ(gurl_base::nullopt, Deserialize("deadbeef"));
- EXPECT_EQ(gurl_base::nullopt, Deserialize("0123456789"));
- EXPECT_EQ(gurl_base::nullopt, Deserialize("https://a.com"));
- EXPECT_EQ(gurl_base::nullopt, Deserialize("https://192.168.1.1"));
+ EXPECT_EQ(absl::nullopt, Deserialize(std::string()));
+ EXPECT_EQ(absl::nullopt, Deserialize("deadbeef"));
+ EXPECT_EQ(absl::nullopt, Deserialize("0123456789"));
+ EXPECT_EQ(absl::nullopt, Deserialize("https://a.com"));
+ EXPECT_EQ(absl::nullopt, Deserialize("https://192.168.1.1"));
}
TEST_F(OriginTest, SerializeTBDNonce) {
@@ -695,8 +695,8 @@
for (const GURL& url : invalid_urls) {
SCOPED_TRACE(url.spec());
Origin origin = Origin::Create(url);
- gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin);
- gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ absl::optional<std::string> serialized = SerializeWithNonce(origin);
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
ASSERT_TRUE(deserialized.has_value());
// Can't use DoEqualityComparisons here since empty nonces are never ==
@@ -706,10 +706,10 @@
// Same basic test as above, but without a GURL to create tuple_.
Origin opaque;
- gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque);
+ absl::optional<std::string> serialized = SerializeWithNonce(opaque);
ASSERT_TRUE(serialized);
- gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
ASSERT_TRUE(deserialized.has_value());
// Can't use DoEqualityComparisons here since empty nonces are never == unless
@@ -720,9 +720,9 @@
for (const GURL& url : invalid_urls) {
SCOPED_TRACE(url.spec());
Origin origin = Origin::Create(url);
- gurl_base::Optional<std::string> serialized =
+ absl::optional<std::string> serialized =
SerializeWithNonceAndInitIfNeeded(origin);
- gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
ASSERT_TRUE(deserialized.has_value());
// The nonce should have been initialized prior to Serialization().
@@ -734,10 +734,10 @@
Origin opaque;
GetNonce(opaque);
- gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque);
+ absl::optional<std::string> serialized = SerializeWithNonce(opaque);
ASSERT_TRUE(serialized);
- gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
ASSERT_TRUE(deserialized.has_value());
EXPECT_TRUE(DoEqualityComparisons(opaque, deserialized.value(), true));
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
index 21b473d..c337da3 100644
--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -14,6 +14,7 @@
#include "polyfills/base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_piece.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -159,8 +160,8 @@
SchemeHostPort::SchemeHostPort(gurl_base::StringPiece scheme,
gurl_base::StringPiece host,
uint16_t port)
- : SchemeHostPort(scheme.as_string(),
- host.as_string(),
+ : SchemeHostPort(std::string(scheme),
+ std::string(host),
port,
ConstructPolicy::CHECK_CANONICALIZATION) {}
diff --git a/url/third_party/mozilla/DIR_METADATA b/url/third_party/mozilla/DIR_METADATA
deleted file mode 100644
index fb07a25..0000000
--- a/url/third_party/mozilla/DIR_METADATA
+++ /dev/null
@@ -1,11 +0,0 @@
-# Metadata information for this directory.
-#
-# For more information on DIR_METADATA files, see:
-# https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/README.md
-#
-# For the schema of this file, see Metadata message:
-# https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/proto/dir_metadata.proto
-
-monorail {
- component: "Internals>Core"
-}
\ No newline at end of file
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc
index 4fd3a8e..e8a1edb 100644
--- a/url/third_party/mozilla/url_parse.cc
+++ b/url/third_party/mozilla/url_parse.cc
@@ -48,7 +48,7 @@
namespace {
// Returns true if the given character is a valid digit to use in a port.
-inline bool IsPortDigit(gurl_base::char16 ch) {
+inline bool IsPortDigit(char16_t ch) {
return ch >= '0' && ch <= '9';
}
@@ -812,13 +812,13 @@
return DoExtractScheme(url, url_len, scheme);
}
-bool ExtractScheme(const gurl_base::char16* url, int url_len, Component* scheme) {
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme) {
return DoExtractScheme(url, url_len, scheme);
}
// This handles everything that may be an authority terminator, including
// backslash. For special backslash handling see DoParseAfterScheme.
-bool IsAuthorityTerminator(gurl_base::char16 ch) {
+bool IsAuthorityTerminator(char16_t ch) {
return IsURLSlash(ch) || ch == '?' || ch == '#';
}
@@ -828,7 +828,7 @@
DoExtractFileName(url, path, file_name);
}
-void ExtractFileName(const gurl_base::char16* url,
+void ExtractFileName(const char16_t* url,
const Component& path,
Component* file_name) {
DoExtractFileName(url, path, file_name);
@@ -841,7 +841,7 @@
return DoExtractQueryKeyValue(url, query, key, value);
}
-bool ExtractQueryKeyValue(const gurl_base::char16* url,
+bool ExtractQueryKeyValue(const char16_t* url,
Component* query,
Component* key,
Component* value) {
@@ -857,7 +857,7 @@
DoParseAuthority(spec, auth, username, password, hostname, port_num);
}
-void ParseAuthority(const gurl_base::char16* spec,
+void ParseAuthority(const char16_t* spec,
const Component& auth,
Component* username,
Component* password,
@@ -870,7 +870,7 @@
return DoParsePort(url, port);
}
-int ParsePort(const gurl_base::char16* url, const Component& port) {
+int ParsePort(const char16_t* url, const Component& port) {
return DoParsePort(url, port);
}
@@ -878,7 +878,7 @@
DoParseStandardURL(url, url_len, parsed);
}
-void ParseStandardURL(const gurl_base::char16* url, int url_len, Parsed* parsed) {
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseStandardURL(url, url_len, parsed);
}
@@ -889,7 +889,7 @@
DoParsePathURL(url, url_len, trim_path_end, parsed);
}
-void ParsePathURL(const gurl_base::char16* url,
+void ParsePathURL(const char16_t* url,
int url_len,
bool trim_path_end,
Parsed* parsed) {
@@ -900,7 +900,7 @@
DoParseFileSystemURL(url, url_len, parsed);
}
-void ParseFileSystemURL(const gurl_base::char16* url, int url_len, Parsed* parsed) {
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseFileSystemURL(url, url_len, parsed);
}
@@ -908,7 +908,7 @@
DoParseMailtoURL(url, url_len, parsed);
}
-void ParseMailtoURL(const gurl_base::char16* url, int url_len, Parsed* parsed) {
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseMailtoURL(url, url_len, parsed);
}
@@ -920,7 +920,7 @@
ParsePath(spec, path, filepath, query, ref);
}
-void ParsePathInternal(const gurl_base::char16* spec,
+void ParsePathInternal(const char16_t* spec,
const Component& path,
Component* filepath,
Component* query,
@@ -935,7 +935,7 @@
DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
}
-void ParseAfterScheme(const gurl_base::char16* spec,
+void ParseAfterScheme(const char16_t* spec,
int spec_len,
int after_scheme,
Parsed* parsed) {
diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h
index 54b2af2..1ec0ef8 100644
--- a/url/third_party/mozilla/url_parse.h
+++ b/url/third_party/mozilla/url_parse.h
@@ -6,7 +6,6 @@
#define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
#include "polyfills/base/component_export.h"
-#include "base/strings/string16.h"
namespace url {
@@ -202,7 +201,7 @@
void clear_inner_parsed() {
if (inner_parsed_) {
delete inner_parsed_;
- inner_parsed_ = NULL;
+ inner_parsed_ = nullptr;
}
}
@@ -230,7 +229,7 @@
COMPONENT_EXPORT(URL)
void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseStandardURL(const gurl_base::char16* url, int url_len, Parsed* parsed);
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed);
// PathURL is for when the scheme is known not to have an authority (host)
// section but that aren't file URLs either. The scheme is parsed, and
@@ -242,7 +241,7 @@
bool trim_path_end,
Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParsePathURL(const gurl_base::char16* url,
+void ParsePathURL(const char16_t* url,
int url_len,
bool trim_path_end,
Parsed* parsed);
@@ -252,19 +251,19 @@
COMPONENT_EXPORT(URL)
void ParseFileURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseFileURL(const gurl_base::char16* url, int url_len, Parsed* parsed);
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed);
// Filesystem URLs are structured differently than other URLs.
COMPONENT_EXPORT(URL)
void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseFileSystemURL(const gurl_base::char16* url, int url_len, Parsed* parsed);
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed);
// MailtoURL is for mailto: urls. They are made up scheme,path,query
COMPONENT_EXPORT(URL)
void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
COMPONENT_EXPORT(URL)
-void ParseMailtoURL(const gurl_base::char16* url, int url_len, Parsed* parsed);
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed);
// Helper functions -----------------------------------------------------------
@@ -291,11 +290,11 @@
COMPONENT_EXPORT(URL)
bool ExtractScheme(const char* url, int url_len, Component* scheme);
COMPONENT_EXPORT(URL)
-bool ExtractScheme(const gurl_base::char16* url, int url_len, Component* scheme);
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme);
// Returns true if ch is a character that terminates the authority segment
// of a URL.
-COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(gurl_base::char16 ch);
+COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(char16_t ch);
// Does a best effort parse of input |spec|, in range |auth|. If a particular
// component is not found, it will be set to invalid.
@@ -307,7 +306,7 @@
Component* hostname,
Component* port_num);
COMPONENT_EXPORT(URL)
-void ParseAuthority(const gurl_base::char16* spec,
+void ParseAuthority(const char16_t* spec,
const Component& auth,
Component* username,
Component* password,
@@ -323,7 +322,7 @@
enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
COMPONENT_EXPORT(URL) int ParsePort(const char* url, const Component& port);
COMPONENT_EXPORT(URL)
-int ParsePort(const gurl_base::char16* url, const Component& port);
+int ParsePort(const char16_t* url, const Component& port);
// Extracts the range of the file name in the given url. The path must
// already have been computed by the parse function, and the matching URL
@@ -340,7 +339,7 @@
const Component& path,
Component* file_name);
COMPONENT_EXPORT(URL)
-void ExtractFileName(const gurl_base::char16* url,
+void ExtractFileName(const char16_t* url,
const Component& path,
Component* file_name);
@@ -365,7 +364,7 @@
Component* key,
Component* value);
COMPONENT_EXPORT(URL)
-bool ExtractQueryKeyValue(const gurl_base::char16* url,
+bool ExtractQueryKeyValue(const char16_t* url,
Component* query,
Component* key,
Component* value);
diff --git a/url/url_canon.cc b/url/url_canon.cc
index 1860234..dce7847 100644
--- a/url/url_canon.cc
+++ b/url/url_canon.cc
@@ -10,6 +10,6 @@
template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>;
template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL))
- CanonOutputT<gurl_base::char16>;
+ CanonOutputT<char16_t>;
} // namespace url
diff --git a/url/url_canon.h b/url/url_canon.h
index 84b3549..457f58a 100644
--- a/url/url_canon.h
+++ b/url/url_canon.h
@@ -8,9 +8,10 @@
#include <stdlib.h>
#include <string.h>
+#include <string>
+
#include "polyfills/base/component_export.h"
#include "polyfills/base/export_template.h"
-#include "base/strings/string16.h"
#include "url/third_party/mozilla/url_parse.h"
namespace url {
@@ -178,18 +179,18 @@
extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
CanonOutputT<char>;
extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
- CanonOutputT<gurl_base::char16>;
+ CanonOutputT<char16_t>;
// Normally, all canonicalization output is in narrow characters. We support
// the templates so it can also be used internally if a wide buffer is
// required.
typedef CanonOutputT<char> CanonOutput;
-typedef CanonOutputT<gurl_base::char16> CanonOutputW;
+typedef CanonOutputT<char16_t> CanonOutputW;
template<int fixed_capacity>
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
-template<int fixed_capacity>
-class RawCanonOutputW : public RawCanonOutputT<gurl_base::char16, fixed_capacity> {};
+template <int fixed_capacity>
+class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
// Character set converter ----------------------------------------------------
//
@@ -215,7 +216,7 @@
// decimal, (such as "你") with escaping of the ampersand, number
// sign, and semicolon (in the previous example it would be
// "%26%2320320%3B"). This rule is based on what IE does in this situation.
- virtual void ConvertFromUTF16(const gurl_base::char16* input,
+ virtual void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) = 0;
};
@@ -273,11 +274,11 @@
int* output_len,
bool* potentially_dangling_markup);
COMPONENT_EXPORT(URL)
-const gurl_base::char16* RemoveURLWhitespace(const gurl_base::char16* input,
- int input_len,
- CanonOutputT<gurl_base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup);
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup);
// IDN ------------------------------------------------------------------------
@@ -291,7 +292,7 @@
//
// On error, returns false. The output in this case is undefined.
COMPONENT_EXPORT(URL)
-bool IDNToASCII(const gurl_base::char16* src, int src_len, CanonOutputW* output);
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output);
// Piece-by-piece canonicalizers ----------------------------------------------
//
@@ -323,7 +324,7 @@
CanonOutput* output,
Component* out_scheme);
COMPONENT_EXPORT(URL)
-bool CanonicalizeScheme(const gurl_base::char16* spec,
+bool CanonicalizeScheme(const char16_t* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme);
@@ -347,9 +348,9 @@
Component* out_username,
Component* out_password);
COMPONENT_EXPORT(URL)
-bool CanonicalizeUserInfo(const gurl_base::char16* username_source,
+bool CanonicalizeUserInfo(const char16_t* username_source,
const Component& username,
- const gurl_base::char16* password_source,
+ const char16_t* password_source,
const Component& password,
CanonOutput* output,
Component* out_username,
@@ -411,7 +412,7 @@
CanonOutput* output,
Component* out_host);
COMPONENT_EXPORT(URL)
-bool CanonicalizeHost(const gurl_base::char16* spec,
+bool CanonicalizeHost(const char16_t* spec,
const Component& host,
CanonOutput* output,
Component* out_host);
@@ -426,7 +427,7 @@
CanonOutput* output,
CanonHostInfo* host_info);
COMPONENT_EXPORT(URL)
-void CanonicalizeHostVerbose(const gurl_base::char16* spec,
+void CanonicalizeHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info);
@@ -456,7 +457,7 @@
const Component& host,
CanonOutput* output);
COMPONENT_EXPORT(URL)
-bool CanonicalizeHostSubstring(const gurl_base::char16* spec,
+bool CanonicalizeHostSubstring(const char16_t* spec,
const Component& host,
CanonOutput* output);
@@ -476,7 +477,7 @@
CanonOutput* output,
CanonHostInfo* host_info);
COMPONENT_EXPORT(URL)
-void CanonicalizeIPAddress(const gurl_base::char16* spec,
+void CanonicalizeIPAddress(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info);
@@ -493,7 +494,7 @@
CanonOutput* output,
Component* out_port);
COMPONENT_EXPORT(URL)
-bool CanonicalizePort(const gurl_base::char16* spec,
+bool CanonicalizePort(const char16_t* spec,
const Component& port,
int default_port_for_scheme,
CanonOutput* output,
@@ -519,11 +520,24 @@
CanonOutput* output,
Component* out_path);
COMPONENT_EXPORT(URL)
-bool CanonicalizePath(const gurl_base::char16* spec,
+bool CanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path);
+// Like CanonicalizePath(), but does not assume that its operating on the
+// entire path. It therefore does not prepend a slash, etc.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+
// Canonicalizes the input as a file path. This is like CanonicalizePath except
// that it also handles Windows drive specs. For example, the path can begin
// with "c|\" and it will get properly canonicalized to "C:/".
@@ -536,7 +550,7 @@
CanonOutput* output,
Component* out_path);
COMPONENT_EXPORT(URL)
-bool FileCanonicalizePath(const gurl_base::char16* spec,
+bool FileCanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path);
@@ -560,7 +574,7 @@
CanonOutput* output,
Component* out_query);
COMPONENT_EXPORT(URL)
-void CanonicalizeQuery(const gurl_base::char16* spec,
+void CanonicalizeQuery(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output,
@@ -578,7 +592,7 @@
CanonOutput* output,
Component* out_path);
COMPONENT_EXPORT(URL)
-void CanonicalizeRef(const gurl_base::char16* spec,
+void CanonicalizeRef(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path);
@@ -603,7 +617,7 @@
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeStandardURL(const gurl_base::char16* spec,
+bool CanonicalizeStandardURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
@@ -620,7 +634,7 @@
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeFileURL(const gurl_base::char16* spec,
+bool CanonicalizeFileURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
@@ -636,7 +650,7 @@
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeFileSystemURL(const gurl_base::char16* spec,
+bool CanonicalizeFileSystemURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
@@ -652,12 +666,25 @@
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizePathURL(const gurl_base::char16* spec,
+bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed);
+// Use to canonicalize just the path component of a "path" URL; e.g. the
+// path of a javascript URL.
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
+
// Use for mailto URLs. This "canonicalizes" the URL into a path and query
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
// the query encoding if there is a query. This is because a mailto URL is
@@ -670,7 +697,7 @@
CanonOutput* output,
Parsed* new_parsed);
COMPONENT_EXPORT(URL)
-bool CanonicalizeMailtoURL(const gurl_base::char16* spec,
+bool CanonicalizeMailtoURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
@@ -869,7 +896,7 @@
COMPONENT_EXPORT(URL)
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
@@ -887,7 +914,7 @@
COMPONENT_EXPORT(URL)
bool ReplaceFileSystemURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
@@ -904,7 +931,7 @@
COMPONENT_EXPORT(URL)
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed);
@@ -920,7 +947,7 @@
COMPONENT_EXPORT(URL)
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed);
@@ -935,7 +962,7 @@
COMPONENT_EXPORT(URL)
bool ReplaceMailtoURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed);
@@ -963,7 +990,7 @@
COMPONENT_EXPORT(URL)
bool IsRelativeURL(const char* base,
const Parsed& base_parsed,
- const gurl_base::char16* fragment,
+ const char16_t* fragment,
int fragment_len,
bool is_base_hierarchical,
bool* is_relative,
@@ -1000,7 +1027,7 @@
bool ResolveRelativeURL(const char* base_url,
const Parsed& base_parsed,
bool base_is_file,
- const gurl_base::char16* relative_url,
+ const char16_t* relative_url,
const Component& relative_component,
CharsetConverter* query_converter,
CanonOutput* output,
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index 8482c35..b45cea0 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc
@@ -299,11 +299,6 @@
// Now iterate through all the characters, converting to UTF-8 and validating.
int end = ref.end();
for (int i = ref.begin; i < end; i++) {
- if (spec[i] == 0) {
- // IE just strips NULLs, so we do too.
- continue;
- }
-
UCHAR current_char = static_cast<UCHAR>(spec[i]);
if (current_char < 0x80) {
if (kShouldEscapeCharInFragment[current_char])
@@ -329,16 +324,16 @@
potentially_dangling_markup);
}
-const gurl_base::char16* RemoveURLWhitespace(const gurl_base::char16* input,
- int input_len,
- CanonOutputT<gurl_base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup) {
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
potentially_dangling_markup);
}
-char CanonicalSchemeChar(gurl_base::char16 ch) {
+char CanonicalSchemeChar(char16_t ch) {
if (ch >= 0x80)
return 0; // Non-ASCII is not supported by schemes.
return kSchemeCanonical[ch];
@@ -351,11 +346,11 @@
return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
}
-bool CanonicalizeScheme(const gurl_base::char16* spec,
+bool CanonicalizeScheme(const char16_t* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- return DoScheme<gurl_base::char16, gurl_base::char16>(spec, scheme, output, out_scheme);
+ return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme);
}
bool CanonicalizeUserInfo(const char* username_source,
@@ -370,16 +365,16 @@
output, out_username, out_password);
}
-bool CanonicalizeUserInfo(const gurl_base::char16* username_source,
+bool CanonicalizeUserInfo(const char16_t* username_source,
const Component& username,
- const gurl_base::char16* password_source,
+ const char16_t* password_source,
const Component& password,
CanonOutput* output,
Component* out_username,
Component* out_password) {
- return DoUserInfo<gurl_base::char16, gurl_base::char16>(
- username_source, username, password_source, password,
- output, out_username, out_password);
+ return DoUserInfo<char16_t, char16_t>(username_source, username,
+ password_source, password, output,
+ out_username, out_password);
}
bool CanonicalizePort(const char* spec,
@@ -392,13 +387,13 @@
output, out_port);
}
-bool CanonicalizePort(const gurl_base::char16* spec,
+bool CanonicalizePort(const char16_t* spec,
const Component& port,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
- return DoPort<gurl_base::char16, gurl_base::char16>(spec, port, default_port_for_scheme,
- output, out_port);
+ return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output,
+ out_port);
}
void CanonicalizeRef(const char* spec,
@@ -408,11 +403,11 @@
DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
}
-void CanonicalizeRef(const gurl_base::char16* spec,
+void CanonicalizeRef(const char16_t* spec,
const Component& ref,
CanonOutput* output,
Component* out_ref) {
- DoCanonicalizeRef<gurl_base::char16, gurl_base::char16>(spec, ref, output, out_ref);
+ DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref);
}
} // namespace url
diff --git a/url/url_canon_filesystemurl.cc b/url/url_canon_filesystemurl.cc
index 9a642cd..b36198a 100644
--- a/url/url_canon_filesystemurl.cc
+++ b/url/url_canon_filesystemurl.cc
@@ -94,14 +94,14 @@
new_parsed);
}
-bool CanonicalizeFileSystemURL(const gurl_base::char16* spec,
+bool CanonicalizeFileSystemURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeFileSystemURL<gurl_base::char16, gurl_base::char16>(
- spec, URLComponentSource<gurl_base::char16>(spec), parsed, charset_converter,
+ return DoCanonicalizeFileSystemURL<char16_t, char16_t>(
+ spec, URLComponentSource<char16_t>(spec), parsed, charset_converter,
output, new_parsed);
}
@@ -120,7 +120,7 @@
bool ReplaceFileSystemURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* new_parsed) {
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc
index 4622c6e..8f6c2f8 100644
--- a/url/url_canon_fileurl.cc
+++ b/url/url_canon_fileurl.cc
@@ -25,29 +25,47 @@
int FileDoDriveSpec(const CHAR* spec, int begin, int end,
CanonOutput* output) {
// The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
- // (with backslashes instead of slashes as well).
- int num_slashes = CountConsecutiveSlashes(spec, begin, end);
- int after_slashes = begin + num_slashes;
+ // /./c:/foo, (with backslashes instead of slashes as well). The code
+ // first guesses the beginning of the drive letter, then verifies that the
+ // path up to that point can be canonicalised as "/". If it can, then the
+ // found drive letter is indeed a drive letter, otherwise the path has no
+ // drive letter in it.
+ if (begin > end) // Nothing to search in.
+ return begin; // Found no letter, so didn't consum any characters.
- if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
- return begin; // Haven't consumed any characters
+ // If there is something that looks like a drive letter in the spec between
+ // being and end, store its position in drive_letter_pos.
+ int drive_letter_pos =
+ DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
+ if (drive_letter_pos < begin)
+ return begin; // Found no letter, so didn't consum any characters.
- // A drive spec is the start of a path, so we need to add a slash for the
- // authority terminator (typically the third slash).
- output->push_back('/');
+ // Check if the path up to the drive letter candidate can be canonicalized as
+ // "/".
+ Component sub_path = MakeRange(begin, drive_letter_pos);
+ Component output_path;
+ const int initial_length = output->length();
+ bool success = CanonicalizePath(spec, sub_path, output, &output_path);
+ if (!success || output_path.len != 1 ||
+ output->at(output_path.begin) != '/') {
+ // Undo writing the canonicalized path.
+ output->set_length(initial_length);
+ return begin; // Found no letter, so didn't consum any characters.
+ }
- // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
- // and that it is followed by a colon/pipe.
+ // By now, "/" has been written to the output and a valid drive letter is
+ // confirmed at position drive_letter_pos, followed by a valid drive letter
+ // separator (a colon or a pipe).
- // Normalize Windows drive letters to uppercase
- if (gurl_base::IsAsciiLower(spec[after_slashes]))
- output->push_back(static_cast<char>(spec[after_slashes] - 'a' + 'A'));
+ // Normalize Windows drive letters to uppercase.
+ if (gurl_base::IsAsciiLower(spec[drive_letter_pos]))
+ output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
else
- output->push_back(static_cast<char>(spec[after_slashes]));
+ output->push_back(static_cast<char>(spec[drive_letter_pos]));
// Normalize the character following it to a colon rather than pipe.
output->push_back(':');
- return after_slashes + 2;
+ return drive_letter_pos + 2;
}
#endif // WIN32
@@ -133,15 +151,15 @@
output, new_parsed);
}
-bool CanonicalizeFileURL(const gurl_base::char16* spec,
+bool CanonicalizeFileURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeFileURL<gurl_base::char16, gurl_base::char16>(
- URLComponentSource<gurl_base::char16>(spec), parsed, query_converter,
- output, new_parsed);
+ return DoCanonicalizeFileURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, query_converter, output,
+ new_parsed);
}
bool FileCanonicalizePath(const char* spec,
@@ -152,12 +170,12 @@
output, out_path);
}
-bool FileCanonicalizePath(const gurl_base::char16* spec,
+bool FileCanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
- return DoFileCanonicalizePath<gurl_base::char16, gurl_base::char16>(spec, path,
- output, out_path);
+ return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
+ out_path);
}
bool ReplaceFileURL(const char* base,
@@ -175,7 +193,7 @@
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index 28a7c38..b278b15 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -3,8 +3,10 @@
// found in the LICENSE file.
#include "polyfills/base/check.h"
+#include "polyfills/base/metrics/histogram_macros.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_canon_ip.h"
namespace url {
@@ -80,7 +82,7 @@
const int kTempHostBufferLen = 1024;
typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer;
-typedef RawCanonOutputT<gurl_base::char16, kTempHostBufferLen> StackBufferW;
+typedef RawCanonOutputT<char16_t, kTempHostBufferLen> StackBufferW;
// Scans a host name and fills in the output flags according to what we find.
// |has_non_ascii| will be true if there are any non-7-bit characters, and
@@ -174,7 +176,7 @@
}
// Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const gurl_base::char16* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) {
int original_output_len = output->length(); // So we can rewind below.
// We need to escape URL before doing IDN conversion, since punicode strings
@@ -239,6 +241,7 @@
// input or the unescaped version written to |*output| if necessary.
const char* utf8_source;
int utf8_source_len;
+ bool are_all_escaped_valid = true;
if (has_escaped) {
// Unescape before converting to UTF-16 for IDN. We write this into the
// output because it most likely does not require IDNization, and we can
@@ -247,14 +250,16 @@
// unescaped input requires IDN.
if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) {
// Error with some escape sequence. We'll call the current output
- // complete. DoSimpleHost will have written some "reasonable" output.
- return false;
+ // complete. DoSimpleHost will have written some "reasonable" output
+ // for the invalid escapes, but the output could be non-ASCII and
+ // needs to go through re-encoding below.
+ are_all_escaped_valid = false;
}
// Unescaping may have left us with ASCII input, in which case the
// unescaped version we wrote to output is complete.
if (!has_non_ascii) {
- return true;
+ return are_all_escaped_valid;
}
// Save the pointer into the data was just converted (it may be appended to
@@ -286,14 +291,18 @@
// This will call DoSimpleHost which will do normal ASCII canonicalization
// and also check for IP addresses in the outpt.
- return DoIDNHost(utf16.data(), utf16.length(), output);
+ return DoIDNHost(utf16.data(), utf16.length(), output) &&
+ are_all_escaped_valid;
}
// UTF-16 convert host to its ASCII version. The set up is already ready for
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
-bool DoComplexHost(const gurl_base::char16* host, int host_len,
- bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char16_t* host,
+ int host_len,
+ bool has_non_ascii,
+ bool has_escaped,
+ CanonOutput* output) {
if (has_escaped) {
// Yikes, we have escaped characters with wide input. The escaped
// characters should be interpreted as UTF-8. To solve this problem,
@@ -370,6 +379,16 @@
if (host_info->IsIPAddress()) {
output->set_length(output_begin);
output->Append(canon_ip.data(), canon_ip.length());
+ } else if (host_info->family == CanonHostInfo::NEUTRAL) {
+ // Only need to call CheckHostnameSafety() for valid hosts that aren't IP
+ // addresses and aren't broken.
+ HostSafetyStatus host_safety_status = CheckHostnameSafety(spec, host);
+ // Don't record kOK. Ratio of OK to not-OK statuses is not meaningful at
+ // this layer, and hostnames are canonicalized a lot.
+ if (host_safety_status != HostSafetyStatus::kOk) {
+ UMA_HISTOGRAM_ENUMERATION("Net.Url.HostSafetyStatus",
+ host_safety_status);
+ }
}
} else {
// Canonicalization failed. Set BROKEN to notify the caller.
@@ -391,12 +410,12 @@
return (host_info.family != CanonHostInfo::BROKEN);
}
-bool CanonicalizeHost(const gurl_base::char16* spec,
+bool CanonicalizeHost(const char16_t* spec,
const Component& host,
CanonOutput* output,
Component* out_host) {
CanonHostInfo host_info;
- DoHost<gurl_base::char16, gurl_base::char16>(spec, host, output, &host_info);
+ DoHost<char16_t, char16_t>(spec, host, output, &host_info);
*out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
@@ -408,11 +427,11 @@
DoHost<char, unsigned char>(spec, host, output, host_info);
}
-void CanonicalizeHostVerbose(const gurl_base::char16* spec,
+void CanonicalizeHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- DoHost<gurl_base::char16, gurl_base::char16>(spec, host, output, host_info);
+ DoHost<char16_t, char16_t>(spec, host, output, host_info);
}
bool CanonicalizeHostSubstring(const char* spec,
@@ -421,10 +440,10 @@
return DoHostSubstring<char, unsigned char>(spec, host, output);
}
-bool CanonicalizeHostSubstring(const gurl_base::char16* spec,
+bool CanonicalizeHostSubstring(const char16_t* spec,
const Component& host,
CanonOutput* output) {
- return DoHostSubstring<gurl_base::char16, gurl_base::char16>(spec, host, output);
+ return DoHostSubstring<char16_t, char16_t>(spec, host, output);
}
} // namespace url
diff --git a/url/url_canon_icu.cc b/url/url_canon_icu.cc
index 93c9247..b4f8f81 100644
--- a/url/url_canon_icu.cc
+++ b/url/url_canon_icu.cc
@@ -9,7 +9,6 @@
#include <string.h>
#include "polyfills/base/check.h"
-#include "base/i18n/uchar.h"
#include <unicode/ucnv.h>
#include <unicode/ucnv_cb.h>
#include <unicode/utypes.h>
@@ -81,7 +80,7 @@
ICUCharsetConverter::~ICUCharsetConverter() = default;
-void ICUCharsetConverter::ConvertFromUTF16(const gurl_base::char16* input,
+void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) {
// Install our error handler. It will be called for character that can not
@@ -95,9 +94,8 @@
do {
UErrorCode err = U_ZERO_ERROR;
char* dest = &output->data()[begin_offset];
- int required_capacity =
- ucnv_fromUChars(converter_, dest, dest_capacity,
- gurl_base::i18n::ToUCharPtr(input), input_len, &err);
+ int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
+ input, input_len, &err);
if (err != U_BUFFER_OVERFLOW_ERROR) {
output->set_length(begin_offset + required_capacity);
return;
diff --git a/url/url_canon_icu.h b/url/url_canon_icu.h
index 33fc863..34bb99e 100644
--- a/url/url_canon_icu.h
+++ b/url/url_canon_icu.h
@@ -26,7 +26,7 @@
~ICUCharsetConverter() override;
- void ConvertFromUTF16(const gurl_base::char16* input,
+ void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) override;
diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc
index 3f3025b..7cd5cae 100644
--- a/url/url_canon_icu_unittest.cc
+++ b/url/url_canon_icu_unittest.cc
@@ -67,7 +67,7 @@
std::string str;
StdStringCanonOutput output(&str);
- gurl_base::string16 input_str(
+ std::u16string input_str(
test_utils::TruncateWStringToUTF16(icu_cases[i].input));
int input_len = static_cast<int>(input_str.length());
converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
@@ -84,7 +84,7 @@
ICUCharsetConverter converter(conv.converter());
for (int i = static_size - 2; i <= static_size + 2; i++) {
// Make a string with the appropriate length.
- gurl_base::string16 input;
+ std::u16string input;
for (int ch = 0; ch < i; ch++)
input.push_back('a');
@@ -138,7 +138,7 @@
}
if (query_cases[i].input16) {
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index 961c3b0..ab56e7b 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -85,7 +85,7 @@
// may get resized while we're overriding a subsequent component. Instead, the
// caller should use the beginning of the |utf8_buffer| as the string pointer
// for all components once all overrides have been prepared.
-bool PrepareUTF16OverrideComponent(const gurl_base::char16* override_source,
+bool PrepareUTF16OverrideComponent(const char16_t* override_source,
const Component& override_component,
CanonOutput* utf8_buffer,
Component* dest_component) {
@@ -233,7 +233,7 @@
0, // 0xE0 - 0xFF
};
-const gurl_base::char16 kUnicodeReplacementCharacter = 0xfffd;
+const char16_t kUnicodeReplacementCharacter = 0xfffd;
void AppendStringOfType(const char* source, int length,
SharedCharTypes type,
@@ -241,11 +241,11 @@
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
}
-void AppendStringOfType(const gurl_base::char16* source, int length,
+void AppendStringOfType(const char16_t* source,
+ int length,
SharedCharTypes type,
CanonOutput* output) {
- DoAppendStringOfType<gurl_base::char16, gurl_base::char16>(
- source, length, type, output);
+ DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
bool ReadUTFChar(const char* str, int* begin, int length,
@@ -261,7 +261,9 @@
return true;
}
-bool ReadUTFChar(const gurl_base::char16* str, int* begin, int length,
+bool ReadUTFChar(const char16_t* str,
+ int* begin,
+ int length,
unsigned* code_point_out) {
// This depends on ints and int32s being the same thing. If they're not, it
// will fail to compile.
@@ -279,13 +281,15 @@
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
}
-void AppendInvalidNarrowString(const gurl_base::char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const char16_t* spec,
+ int begin,
+ int end,
CanonOutput* output) {
- DoAppendInvalidNarrowString<gurl_base::char16, gurl_base::char16>(
- spec, begin, end, output);
+ DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
}
-bool ConvertUTF16ToUTF8(const gurl_base::char16* input, int input_len,
+bool ConvertUTF16ToUTF8(const char16_t* input,
+ int input_len,
CanonOutput* output) {
bool success = true;
for (int i = 0; i < input_len; i++) {
@@ -296,8 +300,9 @@
return success;
}
-bool ConvertUTF8ToUTF16(const char* input, int input_len,
- CanonOutputT<gurl_base::char16>* output) {
+bool ConvertUTF8ToUTF16(const char* input,
+ int input_len,
+ CanonOutputT<char16_t>* output) {
bool success = true;
for (int i = 0; i < input_len; i++) {
unsigned code_point;
@@ -339,14 +344,14 @@
}
bool SetupUTF16OverrideComponents(const char* base,
- const Replacements<gurl_base::char16>& repl,
+ const Replacements<char16_t>& repl,
CanonOutput* utf8_buffer,
URLComponentSource<char>* source,
Parsed* parsed) {
bool success = true;
// Get the source and parsed structures of the things we are replacing.
- const URLComponentSource<gurl_base::char16>& repl_source = repl.sources();
+ const URLComponentSource<char16_t>& repl_source = repl.sources();
const Parsed& repl_parsed = repl.components();
success &= PrepareUTF16OverrideComponent(
@@ -408,7 +413,7 @@
return 0;
}
-int _itow_s(int value, gurl_base::char16* buffer, size_t size_in_chars, int radix) {
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
if (radix != 10)
return EINVAL;
@@ -422,7 +427,7 @@
}
for (int i = 0; i < written; ++i) {
- buffer[i] = static_cast<gurl_base::char16>(temp[i]);
+ buffer[i] = char16_t{temp[i]};
}
buffer[written] = '\0';
return 0;
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 4a282b1..11e0f7a 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -79,7 +79,8 @@
void AppendStringOfType(const char* source, int length,
SharedCharTypes type,
CanonOutput* output);
-void AppendStringOfType(const gurl_base::char16* source, int length,
+void AppendStringOfType(const char16_t* source,
+ int length,
SharedCharTypes type,
CanonOutput* output);
@@ -123,7 +124,7 @@
// required for relative URL resolving to test for scheme equality.
//
// Returns 0 if the input character is not a valid scheme character.
-char CanonicalSchemeChar(gurl_base::char16 ch);
+char CanonicalSchemeChar(char16_t ch);
// Write a single character, escaped, to the output. This always escapes: it
// does no checking that thee character requires escaping.
@@ -138,7 +139,7 @@
}
// The character we'll substitute for undecodable or invalid characters.
-extern const gurl_base::char16 kUnicodeReplacementCharacter;
+extern const char16_t kUnicodeReplacementCharacter;
// UTF-8 functions ------------------------------------------------------------
@@ -229,19 +230,19 @@
// can be incremented in a loop and will be ready for the next character.
// (for a single-16-bit-word character, it will not be changed).
COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const gurl_base::char16* str,
+bool ReadUTFChar(const char16_t* str,
int* begin,
int length,
unsigned* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
inline void AppendUTF16Value(unsigned code_point,
- CanonOutputT<gurl_base::char16>* output) {
+ CanonOutputT<char16_t>* output) {
if (code_point > 0xffff) {
- output->push_back(static_cast<gurl_base::char16>((code_point >> 10) + 0xd7c0));
- output->push_back(static_cast<gurl_base::char16>((code_point & 0x3ff) | 0xdc00));
+ output->push_back(static_cast<char16_t>((code_point >> 10) + 0xd7c0));
+ output->push_back(static_cast<char16_t>((code_point & 0x3ff) | 0xdc00));
} else {
- output->push_back(static_cast<gurl_base::char16>(code_point));
+ output->push_back(static_cast<char16_t>(code_point));
}
}
@@ -266,8 +267,10 @@
//
// Assumes that ch[begin] is within range in the array, but does not assume
// that any following characters are.
-inline bool AppendUTF8EscapedChar(const gurl_base::char16* str, int* begin,
- int length, CanonOutput* output) {
+inline bool AppendUTF8EscapedChar(const char16_t* str,
+ int* begin,
+ int length,
+ CanonOutput* output) {
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
@@ -301,7 +304,7 @@
inline bool Is8BitChar(char c) {
return true; // this case is specialized to avoid a warning
}
-inline bool Is8BitChar(gurl_base::char16 c) {
+inline bool Is8BitChar(char16_t c) {
return c <= 255;
}
@@ -337,7 +340,9 @@
// the escaping rules are not guaranteed!
void AppendInvalidNarrowString(const char* spec, int begin, int end,
CanonOutput* output);
-void AppendInvalidNarrowString(const gurl_base::char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const char16_t* spec,
+ int begin,
+ int end,
CanonOutput* output);
// Misc canonicalization helpers ----------------------------------------------
@@ -351,17 +356,17 @@
// return false in the failure case, and the caller should not continue as
// normal.
COMPONENT_EXPORT(URL)
-bool ConvertUTF16ToUTF8(const gurl_base::char16* input,
+bool ConvertUTF16ToUTF8(const char16_t* input,
int input_len,
CanonOutput* output);
COMPONENT_EXPORT(URL)
bool ConvertUTF8ToUTF16(const char* input,
int input_len,
- CanonOutputT<gurl_base::char16>* output);
+ CanonOutputT<char16_t>* output);
// Converts from UTF-16 to 8-bit using the character set converter. If the
// converter is NULL, this will use UTF-8.
-void ConvertUTF16ToQueryEncoding(const gurl_base::char16* input,
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
const Component& query,
CharsetConverter* converter,
CanonOutput* output);
@@ -397,21 +402,21 @@
// although we will have still done the override with "invalid characters" in
// place of errors.
bool SetupUTF16OverrideComponents(const char* base,
- const Replacements<gurl_base::char16>& repl,
+ const Replacements<char16_t>& repl,
CanonOutput* utf8_buffer,
URLComponentSource<char>* source,
Parsed* parsed);
// Implemented in url_canon_path.cc, these are required by the relative URL
// resolver as well, so we declare them here.
-bool CanonicalizePartialPath(const char* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output);
-bool CanonicalizePartialPath(const gurl_base::char16* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output);
#ifndef WIN32
@@ -419,7 +424,7 @@
COMPONENT_EXPORT(URL)
int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
COMPONENT_EXPORT(URL)
-int _itow_s(int value, gurl_base::char16* buffer, size_t size_in_chars, int radix);
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix);
// Secure template overloads for these functions
template<size_t N>
@@ -427,8 +432,8 @@
return _itoa_s(value, buffer, N, radix);
}
-template<size_t N>
-inline int _itow_s(int value, gurl_base::char16 (&buffer)[N], int radix) {
+template <size_t N>
+inline int _itow_s(int value, char16_t (&buffer)[N], int radix) {
return _itow_s(value, buffer, N, radix);
}
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc
index c214217..8234b4e 100644
--- a/url/url_canon_ip.cc
+++ b/url/url_canon_ip.cc
@@ -9,6 +9,8 @@
#include <limits>
#include "polyfills/base/check.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
#include "url/url_canon_internal.h"
namespace url {
@@ -593,6 +595,105 @@
return true;
}
+// Method to check if something looks like a number. Used instead of
+// IPv4ComponentToNumber() so that it counts things that look like bad base-8
+// (e.g. 09).
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+template <typename CHAR>
+bool LooksLikeANumber(const CHAR* spec, const Component& component) {
+ // Empty components don't look like numbers.
+ if (!component.is_nonempty())
+ return false;
+
+ SharedCharTypes base = CHAR_DEC;
+ size_t start = component.begin;
+ if (component.len >= 2 && spec[start] == '0' &&
+ (spec[start + 1] == 'x' || spec[start + 1] == 'X')) {
+ base = CHAR_HEX;
+ start += 2;
+ }
+ for (int i = start; i < component.end(); i++) {
+ if (!IsCharOfType(spec[i], base))
+ return false;
+ }
+ return true;
+}
+
+// Calculates the "HostSafetyStatus" of the provided hostname.
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+template <typename CHAR>
+HostSafetyStatus DoCheckHostnameSafety(const CHAR* spec,
+ const Component& host) {
+ if (!host.is_nonempty())
+ return HostSafetyStatus::kOk;
+
+ // Find the last two components.
+
+ // Number of identified components. Stops after second component. Does not
+ // include the empty terminal component, if the host ends with a dot.
+ int existing_components = 0;
+ // Parsed component values. Populated last component first.
+ Component components[2];
+
+ // Index of the character after the end of the current component.
+ int cur_component_end = host.end();
+
+ // Ignore terminal dot, if there is one.
+ if (spec[cur_component_end - 1] == '.') {
+ cur_component_end--;
+ // Nothing else to do if the host is just a dot.
+ if (host.begin == cur_component_end)
+ return HostSafetyStatus::kOk;
+ }
+
+ for (int i = cur_component_end; /* nothing */; i--) {
+ GURL_DCHECK_GE(i, host.begin);
+
+ // If `i` is not the first character of the component, continue.
+ if (i != host.begin && spec[i - 1] != '.')
+ continue;
+
+ // Otherwise, i is the index of the the start of a component.
+ components[existing_components] = Component(i, cur_component_end - i);
+ existing_components++;
+
+ // Finished parsing last component.
+ if (i == host.begin)
+ break;
+
+ // If there's anything left to parse after the 2th component, nothing more
+ // to do.
+ if (existing_components == 2)
+ break;
+
+ // The next component ends before the dot at spec[i]. `i` will be
+ // decremented when restarting the loop, so no need to modify it.
+ cur_component_end = i - 1;
+ }
+
+ // If the last value doesn't look like a number, no need to do more work, as
+ // IPv6 and hostnames with non-numeric final components are all considered OK.
+ if (!LooksLikeANumber(spec, components[0]))
+ return HostSafetyStatus::kOk;
+
+ url::RawCanonOutputT<char> ignored_output;
+ CanonHostInfo host_info;
+ CanonicalizeIPAddress(spec, host, &ignored_output, &host_info);
+ // Ignore valid IPv4 addresses, and hostnames considered invalid by the IPv4
+ // and IPv6 parsers. The IPv6 check doesn't provide a whole lot, but does mean
+ // things like "].6" will correctly be considered already invalid, so will
+ // return kOk.
+ if (host_info.family != CanonHostInfo::NEUTRAL)
+ return HostSafetyStatus::kOk;
+
+ if (LooksLikeANumber(spec, components[1]))
+ return HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric;
+
+ return HostSafetyStatus::kTopLevelDomainIsNumeric;
+}
+
} // namespace
void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) {
@@ -650,11 +751,10 @@
return DoFindIPv4Components<char, unsigned char>(spec, host, components);
}
-bool FindIPv4Components(const gurl_base::char16* spec,
+bool FindIPv4Components(const char16_t* spec,
const Component& host,
Component components[4]) {
- return DoFindIPv4Components<gurl_base::char16, gurl_base::char16>(
- spec, host, components);
+ return DoFindIPv4Components<char16_t, char16_t>(spec, host, components);
}
void CanonicalizeIPAddress(const char* spec,
@@ -669,15 +769,15 @@
return;
}
-void CanonicalizeIPAddress(const gurl_base::char16* spec,
+void CanonicalizeIPAddress(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- if (DoCanonicalizeIPv4Address<gurl_base::char16, gurl_base::char16>(
- spec, host, output, host_info))
+ if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output,
+ host_info))
return;
- if (DoCanonicalizeIPv6Address<gurl_base::char16, gurl_base::char16>(
- spec, host, output, host_info))
+ if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output,
+ host_info))
return;
}
@@ -688,12 +788,12 @@
return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components);
}
-CanonHostInfo::Family IPv4AddressToNumber(const gurl_base::char16* spec,
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components) {
- return DoIPv4AddressToNumber<gurl_base::char16>(
- spec, host, address, num_ipv4_components);
+ return DoIPv4AddressToNumber<char16_t>(spec, host, address,
+ num_ipv4_components);
}
bool IPv6AddressToNumber(const char* spec,
@@ -702,10 +802,19 @@
return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);
}
-bool IPv6AddressToNumber(const gurl_base::char16* spec,
+bool IPv6AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[16]) {
- return DoIPv6AddressToNumber<gurl_base::char16, gurl_base::char16>(spec, host, address);
+ return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address);
+}
+
+HostSafetyStatus CheckHostnameSafety(const char* spec, const Component& host) {
+ return DoCheckHostnameSafety(spec, host);
+}
+
+HostSafetyStatus CheckHostnameSafety(const char16_t* spec,
+ const Component& host) {
+ return DoCheckHostnameSafety(spec, host);
}
} // namespace url
diff --git a/url/url_canon_ip.h b/url/url_canon_ip.h
index 5d93f28..8980dbb 100644
--- a/url/url_canon_ip.h
+++ b/url/url_canon_ip.h
@@ -6,7 +6,7 @@
#define URL_URL_CANON_IP_H_
#include "polyfills/base/component_export.h"
-#include "base/strings/string16.h"
+#include "base/strings/string_piece_forward.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -43,7 +43,7 @@
const Component& host,
Component components[4]);
COMPONENT_EXPORT(URL)
-bool FindIPv4Components(const gurl_base::char16* spec,
+bool FindIPv4Components(const char16_t* spec,
const Component& host,
Component components[4]);
@@ -64,7 +64,7 @@
unsigned char address[4],
int* num_ipv4_components);
COMPONENT_EXPORT(URL)
-CanonHostInfo::Family IPv4AddressToNumber(const gurl_base::char16* spec,
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components);
@@ -79,10 +79,52 @@
const Component& host,
unsigned char address[16]);
COMPONENT_EXPORT(URL)
-bool IPv6AddressToNumber(const gurl_base::char16* spec,
+bool IPv6AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[16]);
+// Temporary enum for collecting histograms at the DNS and URL level about
+// hostname validity, for potentially updating the URL spec.
+//
+// This is used in histograms, so old values should not be reused, and new
+// values should be added at the bottom.
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+enum class HostSafetyStatus {
+ // Any canonical hostname that doesn't fit into any other class. IPv4
+ // hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are
+ // broken are also considered OK.
+ kOk = 0,
+
+ // The top level domain looks numeric. This is basically means it either
+ // parses as a number per the URL spec, or is entirely numeric ("09" doesn't
+ // currently parse as a number, since the leading "0" indicates an octal
+ // value).
+ kTopLevelDomainIsNumeric = 1,
+
+ // Both the top level domain and the next level domain look like a number,
+ // using the above definition. This is the case that is actually concerning -
+ // for these domains, the eTLD+1 is purely numeric, which means putting it as
+ // the hostname of a URL will potentially result in an IPv4 hostname. This is
+ // logically a subset of kTopLevelDomainIsNumeric, but when both apply, this
+ // label will be returned instead.
+ kTwoHighestLevelDomainsAreNumeric = 2,
+
+ kMaxValue = kTwoHighestLevelDomainsAreNumeric,
+};
+
+// Calculates the HostSafetyStatus of a hostname. Hostname should have been
+// canonicalized. This function is only intended to be temporary, to inform
+// decisions around tightening up what the URL parser considers valid hostnames.
+//
+// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
+COMPONENT_EXPORT(URL)
+HostSafetyStatus CheckHostnameSafety(const char* hostname,
+ const Component& host);
+COMPONENT_EXPORT(URL)
+HostSafetyStatus CheckHostnameSafety(const char16_t* hostname,
+ const Component& host);
+
} // namespace url
#endif // URL_URL_CANON_IP_H_
diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc
index f09faa7..f4fe2b4 100644
--- a/url/url_canon_mailtourl.cc
+++ b/url/url_canon_mailtourl.cc
@@ -90,13 +90,13 @@
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
-bool CanonicalizeMailtoURL(const gurl_base::char16* spec,
+bool CanonicalizeMailtoURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeMailtoURL<gurl_base::char16, gurl_base::char16>(
- URLComponentSource<gurl_base::char16>(spec), parsed, output, new_parsed);
+ return DoCanonicalizeMailtoURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
}
bool ReplaceMailtoURL(const char* base,
@@ -113,7 +113,7 @@
bool ReplaceMailtoURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index da32bd8..e043043 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -20,7 +20,8 @@
// table below more clear when neither ESCAPE or UNESCAPE is set.
PASS = 0,
- // This character requires special handling in DoPartialPath. Doing this test
+ // This character requires special handling in DoPartialPathInternal. Doing
+ // this test
// first allows us to filter out the common cases of regular characters that
// can be directly copied.
SPECIAL = 1,
@@ -235,10 +236,8 @@
}
}
-// Appends the given path to the output. It assumes that if the input path
-// starts with a slash, it should be copied to the output. If no path has
-// already been appended to the output (the case when not resolving
-// relative URLs), the path should begin with a slash.
+// Canonicalizes and appends the given path to the output. It assumes that if
+// the input path starts with a slash, it should be copied to the output.
//
// If there are already path components (this mode is used when appending
// relative paths for resolving), it assumes that the output already has
@@ -248,11 +247,11 @@
// We do not collapse multiple slashes in a row to a single slash. It seems
// no web browsers do this, and we don't want incompatibilities, even though
// it would be correct for most systems.
-template<typename CHAR, typename UCHAR>
-bool DoPartialPath(const CHAR* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
+template <typename CHAR, typename UCHAR>
+bool DoPartialPathInternal(const CHAR* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output) {
int end = path.end();
// We use this variable to minimize the amount of work done when unescaping --
@@ -279,16 +278,12 @@
// Needs special handling of some sort.
int dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
- // See if this dot was preceded by a slash in the output. We
- // assume that when canonicalizing paths, they will always
- // start with a slash and not a dot, so we don't have to
- // bounds check the output.
+ // See if this dot was preceded by a slash in the output.
//
// Note that we check this in the case of dots so we don't have to
// special case slashes. Since slashes are much more common than
// dots, this actually increases performance measurably (though
// slightly).
- GURL_DCHECK(output->length() > path_begin_in_output);
if (output->length() > path_begin_in_output &&
output->at(output->length() - 1) == '/') {
// Slash followed by a dot, check to see if this is means relative
@@ -382,6 +377,21 @@
return success;
}
+// Perform the same logic as in DoPartialPathInternal(), but updates the
+// publicly exposed CanonOutput structure similar to DoPath(). Returns
+// true if successful.
+template <typename CHAR, typename UCHAR>
+bool DoPartialPath(const CHAR* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path) {
+ out_path->begin = output->length();
+ bool success =
+ DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
+ out_path->len = output->length() - out_path->begin;
+ return success;
+}
+
template<typename CHAR, typename UCHAR>
bool DoPath(const CHAR* spec,
const Component& path,
@@ -397,7 +407,8 @@
if (!IsURLSlash(spec[path.begin]))
output->push_back('/');
- success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output);
+ success =
+ DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
} else {
// No input, canonical path is a slash.
output->push_back('/');
@@ -415,28 +426,41 @@
return DoPath<char, unsigned char>(spec, path, output, out_path);
}
-bool CanonicalizePath(const gurl_base::char16* spec,
+bool CanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
- return DoPath<gurl_base::char16, gurl_base::char16>(spec, path, output, out_path);
+ return DoPath<char16_t, char16_t>(spec, path, output, out_path);
}
bool CanonicalizePartialPath(const char* spec,
const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output,
- output);
+ CanonOutput* output,
+ Component* out_path) {
+ return DoPartialPath<char, unsigned char>(spec, path, output, out_path);
}
-bool CanonicalizePartialPath(const gurl_base::char16* spec,
+bool CanonicalizePartialPath(const char16_t* spec,
const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- return DoPartialPath<gurl_base::char16, gurl_base::char16>(spec, path,
- path_begin_in_output,
- output);
+ CanonOutput* output,
+ Component* out_path) {
+ return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path);
+}
+
+bool CanonicalizePartialPathInternal(const char* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output) {
+ return DoPartialPathInternal<char, unsigned char>(
+ spec, path, path_begin_in_output, output);
+}
+
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+ const Component& path,
+ int path_begin_in_output,
+ CanonOutput* output) {
+ return DoPartialPathInternal<char16_t, char16_t>(
+ spec, path, path_begin_in_output, output);
}
} // namespace url
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc
index 0330b06..134e132 100644
--- a/url/url_canon_pathurl.cc
+++ b/url/url_canon_pathurl.cc
@@ -62,8 +62,8 @@
new_parsed->password.reset();
new_parsed->host.reset();
new_parsed->port.reset();
- // We allow path URLs to have the path, query and fragment components, but we
- // will canonicalize each of the via the weaker path URL rules.
+
+ // Canonicalize path and query via the weaker path URL rules.
//
// Note: parsing the path part should never cause a failure, see
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
@@ -71,8 +71,8 @@
output, &new_parsed->path);
DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?',
output, &new_parsed->query);
- DoCanonicalizePathComponent<CHAR, UCHAR>(source.ref, parsed.ref, '#', output,
- &new_parsed->ref);
+
+ CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
return success;
}
@@ -88,13 +88,29 @@
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
-bool CanonicalizePathURL(const gurl_base::char16* spec,
+bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizePathURL<gurl_base::char16, gurl_base::char16>(
- URLComponentSource<gurl_base::char16>(spec), parsed, output, new_parsed);
+ return DoCanonicalizePathURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
+}
+
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component) {
+ DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
+ output, new_component);
+}
+
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component) {
+ DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
+ output, new_component);
}
bool ReplacePathURL(const char* base,
@@ -111,7 +127,7 @@
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
diff --git a/url/url_canon_query.cc b/url/url_canon_query.cc
index 99b8ed8..b3a1118 100644
--- a/url/url_canon_query.cc
+++ b/url/url_canon_query.cc
@@ -82,7 +82,7 @@
// Runs the converter with the given UTF-16 input. We don't have to do
// anything, but this overridden function allows us to use the same code
// for both UTF-8 and UTF-16 input.
-void RunConverter(const gurl_base::char16* spec,
+void RunConverter(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
@@ -144,21 +144,20 @@
output, out_query);
}
-void CanonicalizeQuery(const gurl_base::char16* spec,
+void CanonicalizeQuery(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output,
Component* out_query) {
- DoCanonicalizeQuery<gurl_base::char16, gurl_base::char16>(spec, query, converter,
- output, out_query);
+ DoCanonicalizeQuery<char16_t, char16_t>(spec, query, converter, output,
+ out_query);
}
-void ConvertUTF16ToQueryEncoding(const gurl_base::char16* input,
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- DoConvertToQueryEncoding<gurl_base::char16, gurl_base::char16>(input, query,
- converter, output);
+ DoConvertToQueryEncoding<char16_t, char16_t>(input, query, converter, output);
}
} // namespace url
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc
index e148128..f047d7f 100644
--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc
@@ -358,8 +358,8 @@
int path_begin = output->length();
CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(),
output);
- success &= CanonicalizePartialPath(relative_url, path, path_begin,
- output);
+ success &= CanonicalizePartialPathInternal(relative_url, path, path_begin,
+ output);
out_parsed->path = MakeRange(path_begin, output->length());
// Copy the rest of the stuff after the path from the relative path.
@@ -581,14 +581,14 @@
bool IsRelativeURL(const char* base,
const Parsed& base_parsed,
- const gurl_base::char16* fragment,
+ const char16_t* fragment,
int fragment_len,
bool is_base_hierarchical,
bool* is_relative,
Component* relative_component) {
- return DoIsRelativeURL<gurl_base::char16>(
- base, base_parsed, fragment, fragment_len, is_base_hierarchical,
- is_relative, relative_component);
+ return DoIsRelativeURL<char16_t>(base, base_parsed, fragment, fragment_len,
+ is_base_hierarchical, is_relative,
+ relative_component);
}
bool ResolveRelativeURL(const char* base_url,
@@ -607,14 +607,14 @@
bool ResolveRelativeURL(const char* base_url,
const Parsed& base_parsed,
bool base_is_file,
- const gurl_base::char16* relative_url,
+ const char16_t* relative_url,
const Component& relative_component,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* out_parsed) {
- return DoResolveRelativeURL<gurl_base::char16>(
- base_url, base_parsed, base_is_file, relative_url,
- relative_component, query_converter, output, out_parsed);
+ return DoResolveRelativeURL<char16_t>(base_url, base_parsed, base_is_file,
+ relative_url, relative_component,
+ query_converter, output, out_parsed);
}
} // namespace url
diff --git a/url/url_canon_stdstring.h b/url/url_canon_stdstring.h
index 9b7943a..6d23abf 100644
--- a/url/url_canon_stdstring.h
+++ b/url/url_canon_stdstring.h
@@ -59,11 +59,11 @@
// references to std::strings.
// Note: Extra const char* overloads are necessary to break ambiguities that
// would otherwise exist for char literals.
-template <typename STR>
-class StringPieceReplacements : public Replacements<typename STR::value_type> {
+template <typename CharT>
+class StringPieceReplacements : public Replacements<CharT> {
private:
- using CharT = typename STR::value_type;
- using StringPieceT = gurl_base::BasicStringPiece<STR>;
+ using StringT = std::basic_string<CharT>;
+ using StringPieceT = gurl_base::BasicStringPiece<CharT>;
using ParentT = Replacements<CharT>;
using SetterFun = void (ParentT::*)(const CharT*, const Component&);
@@ -74,35 +74,35 @@
public:
void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); }
void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); }
- void SetSchemeStr(const STR&&) = delete;
+ void SetSchemeStr(const StringT&&) = delete;
void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); }
void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); }
- void SetUsernameStr(const STR&&) = delete;
+ void SetUsernameStr(const StringT&&) = delete;
void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); }
void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); }
- void SetPasswordStr(const STR&&) = delete;
+ void SetPasswordStr(const StringT&&) = delete;
void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); }
void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); }
- void SetHostStr(const STR&&) = delete;
+ void SetHostStr(const StringT&&) = delete;
void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); }
void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); }
- void SetPortStr(const STR&&) = delete;
+ void SetPortStr(const StringT&&) = delete;
void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); }
void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); }
- void SetPathStr(const STR&&) = delete;
+ void SetPathStr(const StringT&&) = delete;
void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); }
void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); }
- void SetQueryStr(const STR&&) = delete;
+ void SetQueryStr(const StringT&&) = delete;
void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); }
void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); }
- void SetRefStr(const STR&&) = delete;
+ void SetRefStr(const StringT&&) = delete;
};
} // namespace url
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc
index 005877a..c7e7454 100644
--- a/url/url_canon_stdurl.cc
+++ b/url/url_canon_stdurl.cc
@@ -150,16 +150,16 @@
output, new_parsed);
}
-bool CanonicalizeStandardURL(const gurl_base::char16* spec,
+bool CanonicalizeStandardURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeStandardURL<gurl_base::char16, gurl_base::char16>(
- URLComponentSource<gurl_base::char16>(spec), parsed, scheme_type,
- query_converter, output, new_parsed);
+ return DoCanonicalizeStandardURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, scheme_type, query_converter,
+ output, new_parsed);
}
// It might be nice in the future to optimize this so unchanged components don't
@@ -189,7 +189,7 @@
// regular code path can be used.
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index e2469ca..a59c745 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -12,6 +12,7 @@
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
+#include "url/url_canon_ip.h"
#include "url/url_canon_stdstring.h"
#include "url/url_test_utils.h"
@@ -186,7 +187,7 @@
out_str.clear();
StdStringCanonOutput output(&out_str);
- gurl_base::string16 input_str(
+ std::u16string input_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
int input_len = static_cast<int>(input_str.length());
bool success = true;
@@ -205,7 +206,7 @@
// UTF-16 -> UTF-8
std::string input8_str(utf_cases[i].input8);
- gurl_base::string16 input16_str(
+ std::u16string input16_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
EXPECT_EQ(input8_str, gurl_base::UTF16ToUTF8(input16_str));
@@ -258,7 +259,7 @@
out_str.clear();
StdStringCanonOutput output2(&out_str);
- gurl_base::string16 wide_input(gurl_base::UTF8ToUTF16(scheme_cases[i].input));
+ std::u16string wide_input(gurl_base::UTF8ToUTF16(scheme_cases[i].input));
in_comp.len = static_cast<int>(wide_input.length());
success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
&out_comp);
@@ -529,7 +530,7 @@
// Wide version.
if (host_cases[i].input16) {
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -580,7 +581,7 @@
// Wide version.
if (host_cases[i].input16) {
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -703,7 +704,7 @@
}
// 16-bit version.
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
@@ -856,7 +857,7 @@
}
// 16-bit version.
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
@@ -988,7 +989,7 @@
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- gurl_base::string16 wide_input(gurl_base::UTF8ToUTF16(user_info_cases[i].input));
+ std::u16string wide_input(gurl_base::UTF8ToUTF16(user_info_cases[i].input));
success = CanonicalizeUserInfo(wide_input.c_str(),
parsed.username,
wide_input.c_str(),
@@ -1051,7 +1052,7 @@
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- gurl_base::string16 wide_input(gurl_base::UTF8ToUTF16(port_cases[i].input));
+ std::u16string wide_input(gurl_base::UTF8ToUTF16(port_cases[i].input));
success = CanonicalizePort(wide_input.c_str(),
in_comp,
port_cases[i].default_port,
@@ -1066,105 +1067,117 @@
}
}
-TEST(URLCanonTest, Path) {
- DualComponentCase path_cases[] = {
- // ----- path collapsing tests -----
- {"/././foo", L"/././foo", "/foo", Component(0, 4), true},
- {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
- {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
- {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
- // double dots followed by a slash or the end of the string count
- {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
- {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
- // don't count double dots when they aren't followed by a slash
- {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
- // some in the middle
- {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8),
- true},
- {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a",
- Component(0, 2), true},
- // we should not be able to go above the root
- {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
- {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4),
- true},
- // escaped dots should be unescaped and treated the same as dots
- {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
- {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
- {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar",
- "/..bar", Component(0, 6), true},
- // Multiple slashes in a row should be preserved and treated like empty
- // directory names.
- {"////../..", L"////../..", "//", Component(0, 2), true},
+DualComponentCase kCommonPathCases[] = {
+ // ----- path collapsing tests -----
+ {"/././foo", L"/././foo", "/foo", Component(0, 4), true},
+ {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
+ {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
+ {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
+ // double dots followed by a slash or the end of the string count
+ {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
+ {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
+ // don't count double dots when they aren't followed by a slash
+ {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
+ // some in the middle
+ {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true},
+ {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a",
+ Component(0, 2), true},
+ // we should not be able to go above the root
+ {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
+ {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true},
+ // escaped dots should be unescaped and treated the same as dots
+ {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
+ {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
+ {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar",
+ "/..bar", Component(0, 6), true},
+ // Multiple slashes in a row should be preserved and treated like empty
+ // directory names.
+ {"////../..", L"////../..", "//", Component(0, 2), true},
- // ----- escaping tests -----
- {"/foo", L"/foo", "/foo", Component(0, 4), true},
- // Valid escape sequence
- {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
- // Invalid escape sequence we should pass through unchanged.
- {"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
- {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
- // Invalid escape sequence: bad characters should be treated the same as
- // the sourrounding text, not as escaped (in this case, UTF-8).
- {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
- {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16),
- true},
- {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar",
- Component(0, 22), true},
- // Regular characters that are escaped should be unescaped
- {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
- // Funny characters that are unescaped should be escaped
- {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
- {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
- // Invalid characters that are escaped should cause a failure.
- {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
- // Some characters should be passed through unchanged regardless of esc.
- {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
- true},
- // Characters that are properly escaped should not have the case changed
- // of hex letters.
- {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13),
- true},
- // Funny characters that are unescaped should be escaped
- {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
- // Backslashes should get converted to forward slashes
- {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
- // Hashes found in paths (possibly only when the caller explicitly sets
- // the path on an already-parsed URL) should be escaped.
- {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
- // %7f should be allowed and %3D should not be unescaped (these were wrong
- // in a previous version).
- {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd",
- "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
- // @ should be passed through unchanged (escaped or unescaped).
- {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
- // Nested escape sequences should result in escaping the leading '%' if
- // unescaping would result in a new escape sequence.
- {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
- {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
- {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
- // Make sure truncated "nested" escapes don't result in reading off the
- // string end.
- {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
- // Don't unescape the leading '%' if unescaping doesn't result in a valid
- // new escape sequence.
- {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
- {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
- // Don't erroneously downcast a UTF-16 charater in a way that makes it
- // look like part of an escape sequence.
- {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
+ // ----- escaping tests -----
+ {"/foo", L"/foo", "/foo", Component(0, 4), true},
+ // Valid escape sequence
+ {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
+ // Invalid escape sequence we should pass through unchanged.
+ {"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
+ {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
+ // Invalid escape sequence: bad characters should be treated the same as
+ // the surrounding text, not as escaped (in this case, UTF-8).
+ {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
+ {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true},
+ {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22),
+ true},
+ // Regular characters that are escaped should be unescaped
+ {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
+ // Funny characters that are unescaped should be escaped
+ {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
+ {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
+ // Invalid characters that are escaped should cause a failure.
+ {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
+ // Some characters should be passed through unchanged regardless of esc.
+ {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
+ true},
+ // Characters that are properly escaped should not have the case changed
+ // of hex letters.
+ {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13),
+ true},
+ // Funny characters that are unescaped should be escaped
+ {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
+ // Backslashes should get converted to forward slashes
+ {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
+ // Hashes found in paths (possibly only when the caller explicitly sets
+ // the path on an already-parsed URL) should be escaped.
+ {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
+ // %7f should be allowed and %3D should not be unescaped (these were wrong
+ // in a previous version).
+ {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd",
+ "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
+ // @ should be passed through unchanged (escaped or unescaped).
+ {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
+ // Nested escape sequences should result in escaping the leading '%' if
+ // unescaping would result in a new escape sequence.
+ {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
+ {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
+ {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
+ // Make sure truncated "nested" escapes don't result in reading off the
+ // string end.
+ {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
+ // Don't unescape the leading '%' if unescaping doesn't result in a valid
+ // new escape sequence.
+ {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
+ {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
+ // Don't erroneously downcast a UTF-16 character in a way that makes it
+ // look like part of an escape sequence.
+ {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
- // ----- encoding tests -----
- // Basic conversions
- {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
- L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
- Component(0, 37), true},
- // Invalid unicode characters should fail. We only do validation on
- // UTF-16 input, so this doesn't happen on 8-bit.
- {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
- {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
- };
+ // ----- encoding tests -----
+ // Basic conversions
+ {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+ L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
+ Component(0, 37), true},
+ // Invalid unicode characters should fail. We only do validation on
+ // UTF-16 input, so this doesn't happen on 8-bit.
+ {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
+ {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+};
- for (size_t i = 0; i < gurl_base::size(path_cases); i++) {
+typedef bool (*CanonFunc8Bit)(const char*,
+ const Component&,
+ CanonOutput*,
+ Component*);
+typedef bool (*CanonFunc16Bit)(const char16_t*,
+ const Component&,
+ CanonOutput*,
+ Component*);
+
+void DoPathTest(const DualComponentCase* path_cases,
+ size_t num_cases,
+ CanonFunc8Bit canon_func_8,
+ CanonFunc16Bit canon_func_16) {
+ for (size_t i = 0; i < num_cases; i++) {
+ testing::Message scope_message;
+ scope_message << path_cases[i].input8 << "," << path_cases[i].input16;
+ SCOPED_TRACE(scope_message);
if (path_cases[i].input8) {
int len = static_cast<int>(strlen(path_cases[i].input8));
Component in_comp(0, len);
@@ -1172,7 +1185,7 @@
std::string out_str;
StdStringCanonOutput output(&out_str);
bool success =
- CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp);
+ canon_func_8(path_cases[i].input8, in_comp, &output, &out_comp);
output.Complete();
EXPECT_EQ(path_cases[i].expected_success, success);
@@ -1182,7 +1195,7 @@
}
if (path_cases[i].input16) {
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(path_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1191,7 +1204,7 @@
StdStringCanonOutput output(&out_str);
bool success =
- CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp);
+ canon_func_16(input16.c_str(), in_comp, &output, &out_comp);
output.Complete();
EXPECT_EQ(path_cases[i].expected_success, success);
@@ -1200,6 +1213,11 @@
EXPECT_EQ(path_cases[i].expected, out_str);
}
}
+}
+
+TEST(URLCanonTest, Path) {
+ DoPathTest(kCommonPathCases, gurl_base::size(kCommonPathCases), CanonicalizePath,
+ CanonicalizePath);
// Manual test: embedded NULLs should be escaped and the URL should be marked
// as invalid.
@@ -1215,6 +1233,18 @@
EXPECT_EQ("/ab%00c", out_str);
}
+TEST(URLCanonTest, PartialPath) {
+ DualComponentCase partial_path_cases[] = {
+ {".html", L".html", ".html", Component(0, 5), true},
+ {"", L"", "", Component(0, 0), true},
+ };
+
+ DoPathTest(kCommonPathCases, gurl_base::size(kCommonPathCases),
+ CanonicalizePartialPath, CanonicalizePartialPath);
+ DoPathTest(partial_path_cases, gurl_base::size(partial_path_cases),
+ CanonicalizePartialPath, CanonicalizePartialPath);
+}
+
TEST(URLCanonTest, Query) {
struct QueryCase {
const char* input8;
@@ -1258,7 +1288,7 @@
}
if (query_cases[i].input16) {
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1332,7 +1362,7 @@
// 16-bit input
if (ref_cases[i].input16) {
- gurl_base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1360,8 +1390,8 @@
output.Complete();
EXPECT_EQ(1, out_comp.begin);
- EXPECT_EQ(3, out_comp.len);
- EXPECT_EQ("#abz", out_str);
+ EXPECT_EQ(6, out_comp.len);
+ EXPECT_EQ("#ab%00z", out_str);
}
TEST(URLCanonTest, CanonicalizeStandardURL) {
@@ -1821,20 +1851,28 @@
// Busted refs shouldn't make the whole thing fail.
{"file:///C:/asdf#\xc2", "file:///C:/asdf#%EF%BF%BD", true, Component(),
Component(7, 8)},
+ {"file:///./s:", "file:///S:", true, Component(), Component(7, 3)},
#else
// Unix-style paths
- {"file:///home/me", "file:///home/me", true, Component(), Component(7, 8)},
+ {"file:///home/me", "file:///home/me", true, Component(),
+ Component(7, 8)},
// Windowsy ones should get still treated as Unix-style.
- {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(), Component(7, 16)},
- {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, Component(), Component(7, 19)},
+ {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(),
+ Component(7, 16)},
+ {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true,
+ Component(), Component(7, 19)},
+ {"file:///./s:", "file:///s:", true, Component(), Component(7, 3)},
// file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html)
- {"//", "file:///", true, Component(), Component(7, 1)},
- {"///", "file:///", true, Component(), Component(7, 1)},
- {"///test", "file:///test", true, Component(), Component(7, 5)},
- {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)},
- {"file://localhost", "file://localhost/", true, Component(7, 9), Component(16, 1)},
- {"file://localhost/", "file://localhost/", true, Component(7, 9), Component(16, 1)},
- {"file://localhost/test", "file://localhost/test", true, Component(7, 9), Component(16, 5)},
+ {"//", "file:///", true, Component(), Component(7, 1)},
+ {"///", "file:///", true, Component(), Component(7, 1)},
+ {"///test", "file:///test", true, Component(), Component(7, 5)},
+ {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)},
+ {"file://localhost", "file://localhost/", true, Component(7, 9),
+ Component(16, 1)},
+ {"file://localhost/", "file://localhost/", true, Component(7, 9),
+ Component(16, 1)},
+ {"file://localhost/test", "file://localhost/test", true, Component(7, 9),
+ Component(16, 5)},
#endif // _WIN32
};
@@ -1952,6 +1990,53 @@
}
}
+TEST(URLCanonTest, CanonicalizePathURLPath) {
+ struct PathCase {
+ std::string input;
+ std::wstring input16;
+ std::string expected;
+ } path_cases[] = {
+ {"Foo", L"Foo", "Foo"},
+ {"\":This /is interesting;?#", L"\":This /is interesting;?#",
+ "\":This /is interesting;?#"},
+ {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+ };
+
+ for (size_t i = 0; i < gurl_base::size(path_cases); i++) {
+ // 8-bit string input
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ url::Component out_component;
+ CanonicalizePathURLPath(path_cases[i].input.data(),
+ Component(0, path_cases[i].input.size()), &output,
+ &out_component);
+ output.Complete();
+
+ EXPECT_EQ(path_cases[i].expected, out_str);
+
+ EXPECT_EQ(0, out_component.begin);
+ EXPECT_EQ(path_cases[i].expected.size(),
+ static_cast<size_t>(out_component.len));
+
+ // 16-bit string input
+ std::string out_str16;
+ StdStringCanonOutput output16(&out_str16);
+ url::Component out_component16;
+ std::u16string input16(
+ test_utils::TruncateWStringToUTF16(path_cases[i].input16.data()));
+ CanonicalizePathURLPath(input16.c_str(),
+ Component(0, path_cases[i].input16.size()),
+ &output16, &out_component16);
+ output16.Complete();
+
+ EXPECT_EQ(path_cases[i].expected, out_str16);
+
+ EXPECT_EQ(0, out_component16.begin);
+ EXPECT_EQ(path_cases[i].expected.size(),
+ static_cast<size_t>(out_component16.len));
+ }
+}
+
TEST(URLCanonTest, CanonicalizeMailtoURL) {
struct URLCase {
const char* input;
@@ -2086,17 +2171,17 @@
// We fill the buffer with 0xff to ensure that it's getting properly
// null-terminated. We also allocate one byte more than what we tell
// _itoa_s about, and ensure that the extra byte is untouched.
- gurl_base::char16 buf[6];
+ char16_t buf[6];
const char fill_mem = 0xff;
- const gurl_base::char16 fill_char = 0xffff;
+ const char16_t fill_char = 0xffff;
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("12"), gurl_base::string16(buf));
+ EXPECT_EQ(u"12", std::u16string(buf));
EXPECT_EQ(fill_char, buf[3]);
// Test the edge cases - exactly the buffer size and one over
EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("1234"), gurl_base::string16(buf));
+ EXPECT_EQ(u"1234", std::u16string(buf));
EXPECT_EQ(fill_char, buf[5]);
memset(buf, fill_mem, sizeof(buf));
@@ -2106,13 +2191,12 @@
// Test the template overload (note that this will see the full buffer)
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, 10));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("12"),
- gurl_base::string16(buf));
+ EXPECT_EQ(u"12", std::u16string(buf));
EXPECT_EQ(fill_char, buf[3]);
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12345, buf, 10));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("12345"), gurl_base::string16(buf));
+ EXPECT_EQ(u"12345", std::u16string(buf));
EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
}
@@ -2343,12 +2427,12 @@
// Override two components, the path with something short, and the query with
// something long enough to trigger the bug.
- Replacements<gurl_base::char16> repl;
- gurl_base::string16 new_query;
+ Replacements<char16_t> repl;
+ std::u16string new_query;
for (int i = 0; i < 4800; i++)
new_query.push_back('a');
- gurl_base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
+ std::u16string new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
repl.SetPath(new_path.c_str(), Component(0, 4));
repl.SetQuery(new_query.c_str(),
Component(0, static_cast<int>(new_query.length())));
@@ -2398,41 +2482,41 @@
RawCanonOutputW<1024> output;
// Basic ASCII test.
- gurl_base::string16 str = gurl_base::UTF8ToUTF16("hello");
+ std::u16string str = u"hello";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("hello"), gurl_base::string16(output.data()));
+ EXPECT_EQ(u"hello", std::u16string(output.data()));
output.set_length(0);
// Mixed ASCII/non-ASCII.
- str = gurl_base::UTF8ToUTF16("hellö");
+ str = u"hellö";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--hell-8qa"), gurl_base::string16(output.data()));
+ EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// All non-ASCII.
- str = gurl_base::UTF8ToUTF16("你好");
+ str = u"你好";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--6qq79v"), gurl_base::string16(output.data()));
+ EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data()));
output.set_length(0);
// Characters that need mapping (the resulting Punycode is the encoding for
// "1⁄4").
- str = gurl_base::UTF8ToUTF16("¼");
+ str = u"¼";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--14-c6t"), gurl_base::string16(output.data()));
+ EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and all ASCII. Should not
// modify the string.
- str = gurl_base::UTF8ToUTF16("xn--hell-8qa");
+ str = u"xn--hell-8qa";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(gurl_base::UTF8ToUTF16("xn--hell-8qa"), gurl_base::string16(output.data()));
+ EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
// Should fail, due to a special case: if the label starts with "xn--", it
// should be parsed as Punycode, which must be all ASCII.
- str = gurl_base::UTF8ToUTF16("xn--hellö");
+ str = u"xn--hellö";
EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
output.set_length(0);
@@ -2440,9 +2524,120 @@
// This tests that there is still an error for the character '⁄' (U+2044),
// which would be a valid ASCII character, U+0044, if the high byte were
// ignored.
- str = gurl_base::UTF8ToUTF16("xn--1⁄4");
+ str = u"xn--1⁄4";
EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
output.set_length(0);
}
+TEST(URLCanonTest, URLSafetyStatus) {
+ const struct {
+ const char* host;
+ HostSafetyStatus expected_safety_status;
+ } kTestCases[] = {
+ // Empty components are ok.
+ {"", HostSafetyStatus::kOk},
+ {".", HostSafetyStatus::kOk},
+ {"..", HostSafetyStatus::kOk},
+
+ // Hostnames with purely non-numeric components are ok.
+ {"com", HostSafetyStatus::kOk},
+ {"a.com", HostSafetyStatus::kOk},
+ {"a.b.com", HostSafetyStatus::kOk},
+
+ // Hostnames with components with letters and numbers are ok.
+ {"1com", HostSafetyStatus::kOk},
+ {"0a.0com", HostSafetyStatus::kOk},
+ {"0xa.0xb.0xcom", HostSafetyStatus::kOk},
+ {"com1", HostSafetyStatus::kOk},
+ {"a1.com1", HostSafetyStatus::kOk},
+ {"a1.b1.com1", HostSafetyStatus::kOk},
+
+ // Hostnames components that are numbers that are before a final
+ // non-numeric component are ok.
+ {"1.com", HostSafetyStatus::kOk},
+ {"0.1.2com", HostSafetyStatus::kOk},
+
+ // Invalid hostnames are ok.
+ {"[", HostSafetyStatus::kOk},
+
+ // IPv6 hostnames are ok.
+ {"[::]", HostSafetyStatus::kOk},
+ {"[2001:db8::1]", HostSafetyStatus::kOk},
+
+ // IPv4 hostnames are ok.
+ {"1.2.3.4", HostSafetyStatus::kOk},
+ // IPv4 hostnames with creative representations are ok.
+ {"01.02.03.04", HostSafetyStatus::kOk},
+ {"0x1.0x2.0x3.0x4", HostSafetyStatus::kOk},
+ {"1.2", HostSafetyStatus::kOk},
+ {"1.2.3", HostSafetyStatus::kOk},
+ {"0", HostSafetyStatus::kOk},
+ {"0x0", HostSafetyStatus::kOk},
+ {"07", HostSafetyStatus::kOk},
+
+ // Hostnames with a final problematic top level domain.
+ {"a.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.123", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.123456", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.999999999999999999", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.0x1", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.0xabcdef", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.0XABCDEF", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.07", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a.09", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {".0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"foo.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"1.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"a..0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+ {"1..0", HostSafetyStatus::kTopLevelDomainIsNumeric},
+
+ // Hostnames with problematic two highest level domains.
+ {"a.1.2", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
+ {"a.0x1.0x2f", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
+ {"a.06.09", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
+ };
+
+ for (const auto& test_case : kTestCases) {
+ // Test with ASCII.
+ SCOPED_TRACE(test_case.host);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(test_case.host,
+ Component(0, strlen(test_case.host))));
+
+ // Test with ASCII and terminal dot, which shouldn't affect results for
+ // anything that doesn't already end in a dot (or anything that only has
+ // dots).
+ std::string host_with_dot = test_case.host;
+ host_with_dot += ".";
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(host_with_dot.c_str(),
+ Component(0, host_with_dot.size())));
+
+ // Test with ASCII and characters that are not part of the component.
+ std::string host_with_bonus_characters = test_case.host;
+ host_with_bonus_characters = "00" + host_with_bonus_characters + "00";
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(host_with_bonus_characters.c_str(),
+ Component(2, strlen(test_case.host))));
+
+ // Test with UTF-16.
+ std::u16string utf16 = gurl_base::UTF8ToUTF16(test_case.host);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(utf16.c_str(), Component(0, utf16.size())));
+
+ // Test with UTF-16 and terminal dot.
+ std::u16string utf16_with_dot = gurl_base::UTF8ToUTF16(host_with_dot);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(utf16_with_dot.c_str(),
+ Component(0, utf16_with_dot.size())));
+
+ // Test with UTF-16 and characters that are not part of the component.
+ std::u16string utf16_with_bonus_characters =
+ gurl_base::UTF8ToUTF16(host_with_bonus_characters);
+ EXPECT_EQ(test_case.expected_safety_status,
+ CheckHostnameSafety(utf16_with_bonus_characters.c_str(),
+ Component(2, utf16.size())));
+ }
+}
+
} // namespace url
diff --git a/url/url_constants.cc b/url/url_constants.cc
index 69399e4..9da6426 100644
--- a/url/url_constants.cc
+++ b/url/url_constants.cc
@@ -28,6 +28,7 @@
// See also: https://www.iana.org/assignments/uri-schemes/prov/quic-transport
const char kQuicTransportScheme[] = "quic-transport";
const char kTelScheme[] = "tel";
+const char kUrnScheme[] = "urn";
const char kWsScheme[] = "ws";
const char kWssScheme[] = "wss";
diff --git a/url/url_constants.h b/url/url_constants.h
index 3c04d68..dcd7c90 100644
--- a/url/url_constants.h
+++ b/url/url_constants.h
@@ -32,6 +32,7 @@
COMPONENT_EXPORT(URL) extern const char kMailToScheme[];
COMPONENT_EXPORT(URL) extern const char kQuicTransportScheme[];
COMPONENT_EXPORT(URL) extern const char kTelScheme[];
+COMPONENT_EXPORT(URL) extern const char kUrnScheme[];
COMPONENT_EXPORT(URL) extern const char kWsScheme[];
COMPONENT_EXPORT(URL) extern const char kWssScheme[];
diff --git a/url/url_file.h b/url/url_file.h
index cfe047e..6ad79b4 100644
--- a/url/url_file.h
+++ b/url/url_file.h
@@ -16,7 +16,7 @@
#ifdef WIN32
// We allow both "c:" and "c|" as drive identifiers.
-inline bool IsWindowsDriveSeparator(gurl_base::char16 ch) {
+inline bool IsWindowsDriveSeparator(char16_t ch) {
return ch == ':' || ch == '|';
}
@@ -34,23 +34,44 @@
#ifdef WIN32
+// DoesContainWindowsDriveSpecUntil returns the least number between
+// start_offset and max_offset such that the spec has a valid drive
+// specification starting at that offset. Otherwise it returns -1. This function
+// gracefully handles, by returning -1, start_offset values that are equal to or
+// larger than the spec_len, and caps max_offset appropriately to simplify
+// callers. max_offset must be at least start_offset.
+template <typename CHAR>
+inline int DoesContainWindowsDriveSpecUntil(const CHAR* spec,
+ int start_offset,
+ int max_offset,
+ int spec_len) {
+ GURL_CHECK_LE(start_offset, max_offset);
+ if (start_offset > spec_len - 2)
+ return -1; // Not enough room.
+ if (max_offset > spec_len - 2)
+ max_offset = spec_len - 2;
+ for (int offset = start_offset; offset <= max_offset; ++offset) {
+ if (!gurl_base::IsAsciiAlpha(spec[offset]))
+ continue; // Doesn't contain a valid drive letter.
+ if (!IsWindowsDriveSeparator(spec[offset + 1]))
+ continue; // Isn't followed with a drive separator.
+ return offset;
+ }
+ return -1;
+}
+
// Returns true if the start_offset in the given spec looks like it begins a
// drive spec, for example "c:". This function explicitly handles start_offset
// values that are equal to or larger than the spec_len to simplify callers.
//
// If this returns true, the spec is guaranteed to have a valid drive letter
-// plus a colon starting at |start_offset|.
-template<typename CHAR>
-inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset,
+// plus a drive letter separator (a colon or a pipe) starting at |start_offset|.
+template <typename CHAR>
+inline bool DoesBeginWindowsDriveSpec(const CHAR* spec,
+ int start_offset,
int spec_len) {
- int remaining_len = spec_len - start_offset;
- if (remaining_len < 2)
- return false; // Not enough room.
- if (!gurl_base::IsAsciiAlpha(spec[start_offset]))
- return false; // Doesn't start with a valid drive letter.
- if (!IsWindowsDriveSeparator(spec[start_offset + 1]))
- return false; // Isn't followed with a drive separator.
- return true;
+ return DoesContainWindowsDriveSpecUntil(spec, start_offset, start_offset,
+ spec_len) == start_offset;
}
// Returns true if the start_offset in the given text looks like it begins a
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index d9256a2..4029d61 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc
@@ -11,7 +11,6 @@
#include <ostream>
#include "polyfills/base/check_op.h"
-#include "base/i18n/uchar.h"
#include "base/no_destructor.h"
#include <unicode/uidna.h>
#include <unicode/utypes.h>
@@ -22,7 +21,7 @@
namespace {
-// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
+// A wrapper to use gurl_base::NoDestructor with ICU's UIDNA, a C pointer to
// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().
//
// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
@@ -83,7 +82,7 @@
// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
// version with StringByteSink. That way, we can avoid C wrappers and additional
// string conversion.
-bool IDNToASCII(const gurl_base::char16* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
GURL_DCHECK(output->length() == 0); // Output buffer is assumed empty.
UIDNA* uidna = GetUIDNA();
@@ -91,17 +90,41 @@
while (true) {
UErrorCode err = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
- int output_length =
- uidna_nameToASCII(uidna, gurl_base::i18n::ToUCharPtr(src), src_len,
- gurl_base::i18n::ToUCharPtr(output->data()),
- output->capacity(), &info, &err);
+ int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
+ output->capacity(), &info, &err);
+
+ // Ignore various errors for web compatibility. The options are specified
+ // by the WHATWG URL Standard. See
+ // - https://unicode.org/reports/tr46/
+ // - https://url.spec.whatwg.org/#concept-domain-to-ascii
+ // (we set beStrict to false)
+
+ // Disable the "CheckHyphens" option in UTS #46. See
+ // - https://crbug.com/804688
+ // - https://github.com/whatwg/url/issues/267
+ info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
+ info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
+ info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
+
+ // Disable the "VerifyDnsLength" option in UTS #46.
+ info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
+ info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
+ info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+
if (U_SUCCESS(err) && info.errors == 0) {
+ // Per WHATWG URL, it is a failure if the ToASCII output is empty.
+ //
+ // ICU would usually return UIDNA_ERROR_EMPTY_LABEL in this case, but we
+ // want to continue allowing http://abc..def/ while forbidding http:///.
+ //
+ if (output_length == 0) {
+ return false;
+ }
+
output->set_length(output_length);
return true;
}
- // TODO(jungshik): Look at info.errors to handle them case-by-case basis
- // if necessary.
if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
return false; // Unknown error, give up.
diff --git a/url/url_parse_file.cc b/url/url_parse_file.cc
index c1c878a..ceb75d8 100644
--- a/url/url_parse_file.cc
+++ b/url/url_parse_file.cc
@@ -215,7 +215,7 @@
DoParseFileURL(url, url_len, parsed);
}
-void ParseFileURL(const gurl_base::char16* url, int url_len, Parsed* parsed) {
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseFileURL(url, url_len, parsed);
}
diff --git a/url/url_parse_internal.h b/url/url_parse_internal.h
index 6f86d86..4e2527a 100644
--- a/url/url_parse_internal.h
+++ b/url/url_parse_internal.h
@@ -12,13 +12,13 @@
namespace url {
// We treat slashes and backslashes the same for IE compatibility.
-inline bool IsURLSlash(gurl_base::char16 ch) {
+inline bool IsURLSlash(char16_t ch) {
return ch == '/' || ch == '\\';
}
// Returns true if we should trim this character from the URL because it is a
// space or a control character.
-inline bool ShouldTrimFromURL(gurl_base::char16 ch) {
+inline bool ShouldTrimFromURL(char16_t ch) {
return ch <= ' ';
}
@@ -67,13 +67,12 @@
Component* filepath,
Component* query,
Component* ref);
-void ParsePathInternal(const gurl_base::char16* spec,
+void ParsePathInternal(const char16_t* spec,
const Component& path,
Component* filepath,
Component* query,
Component* ref);
-
// Given a spec and a pointer to the character after the colon following the
// scheme, this parses it and fills in the structure, Every item in the parsed
// structure is filled EXCEPT for the scheme, which is untouched.
@@ -81,7 +80,7 @@
int spec_len,
int after_scheme,
Parsed* parsed);
-void ParseAfterScheme(const gurl_base::char16* spec,
+void ParseAfterScheme(const char16_t* spec,
int spec_len,
int after_scheme,
Parsed* parsed);
diff --git a/url/url_test_utils.h b/url/url_test_utils.h
index f8d40e1..bb75c74 100644
--- a/url/url_test_utils.h
+++ b/url/url_test_utils.h
@@ -10,7 +10,6 @@
#include <string>
-#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/url_canon_internal.h"
@@ -24,11 +23,11 @@
// in base bacause it passes invalid UTF-16 characters which is important for
// test purposes. As a result, this is not meant to handle true UTF-32 encoded
// strings.
-inline gurl_base::string16 TruncateWStringToUTF16(const wchar_t* src) {
- gurl_base::string16 str;
+inline std::u16string TruncateWStringToUTF16(const wchar_t* src) {
+ std::u16string str;
int length = static_cast<int>(wcslen(src));
for (int i = 0; i < length; ++i) {
- str.push_back(static_cast<gurl_base::char16>(src[i]));
+ str.push_back(static_cast<char16_t>(src[i]));
}
return str;
}
diff --git a/url/url_util.cc b/url/url_util.cc
index 13c30b3..0c35913 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -11,7 +11,6 @@
#include "polyfills/base/check_op.h"
#include "base/compiler_specific.h"
#include "base/no_destructor.h"
-#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "url/url_canon_internal.h"
#include "url/url_constants.h"
@@ -137,7 +136,8 @@
template<> struct CharToStringPiece<char> {
typedef gurl_base::StringPiece Piece;
};
-template<> struct CharToStringPiece<gurl_base::char16> {
+template <>
+struct CharToStringPiece<char16_t> {
typedef gurl_base::StringPiece16 Piece;
};
@@ -468,10 +468,10 @@
// the SchemeRegistry has been used.
//
// This normally means you're trying to set up a new scheme too late or using
- // the SchemeRegistry too early in your application's init process. Make sure
- // that you haven't added any static GURL initializers in tests.
+ // the SchemeRegistry too early in your application's init process.
GURL_DCHECK(!g_scheme_registries_used.load())
- << "Trying to add a scheme after the lists have been used.";
+ << "Trying to add a scheme after the lists have been used. "
+ "Make sure that you haven't added any static GURL initializers in tests.";
// If this assert triggers, it means you've called Add*Scheme after
// LockSchemeRegistries has been called (see the header file for
@@ -557,6 +557,15 @@
&GetSchemeRegistryWithoutLocking()->standard_schemes);
}
+std::vector<std::string> GetStandardSchemes() {
+ std::vector<std::string> result;
+ result.reserve(GetSchemeRegistry().standard_schemes.size());
+ for (const auto& entry : GetSchemeRegistry().standard_schemes) {
+ result.push_back(entry.scheme);
+ }
+ return result;
+}
+
void AddReferrerScheme(const char* new_scheme, SchemeType type) {
DoAddSchemeWithType(new_scheme, type,
&GetSchemeRegistryWithoutLocking()->referrer_schemes);
@@ -638,13 +647,13 @@
return DoIsStandard(spec, scheme, type);
}
-bool GetStandardSchemeType(const gurl_base::char16* spec,
+bool GetStandardSchemeType(const char16_t* spec,
const Component& scheme,
SchemeType* type) {
return DoIsStandard(spec, scheme, type);
}
-bool IsStandard(const gurl_base::char16* spec, const Component& scheme) {
+bool IsStandard(const char16_t* spec, const Component& scheme) {
SchemeType unused_scheme_type;
return DoIsStandard(spec, scheme, &unused_scheme_type);
}
@@ -662,7 +671,7 @@
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
-bool FindAndCompareScheme(const gurl_base::char16* str,
+bool FindAndCompareScheme(const char16_t* str,
int str_len,
const char* compare,
Component* found_scheme) {
@@ -723,7 +732,7 @@
charset_converter, output, output_parsed);
}
-bool Canonicalize(const gurl_base::char16* spec,
+bool Canonicalize(const char16_t* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
@@ -749,7 +758,7 @@
bool ResolveRelative(const char* base_spec,
int base_spec_len,
const Parsed& base_parsed,
- const gurl_base::char16* relative,
+ const char16_t* relative,
int relative_length,
CharsetConverter* charset_converter,
CanonOutput* output,
@@ -773,7 +782,7 @@
bool ReplaceComponents(const char* spec,
int spec_len,
const Parsed& parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* out_parsed) {
@@ -853,7 +862,7 @@
return DoCompareSchemeComponent(spec, component, compare_to);
}
-bool CompareSchemeComponent(const gurl_base::char16* spec,
+bool CompareSchemeComponent(const char16_t* spec,
const Component& component,
const char* compare_to) {
return DoCompareSchemeComponent(spec, component, compare_to);
diff --git a/url/url_util.h b/url/url_util.h
index 1816637..00399c2 100644
--- a/url/url_util.h
+++ b/url/url_util.h
@@ -10,7 +10,6 @@
#include <vector>
#include "polyfills/base/component_export.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -63,6 +62,14 @@
COMPONENT_EXPORT(URL)
void AddStandardScheme(const char* new_scheme, SchemeType scheme_type);
+// Returns the list of schemes registered for "standard" URLs. Note, this
+// should not be used if you just need to check if your protocol is standard
+// or not. Instead use the IsStandard() function above as its much more
+// efficient. This function should only be used where you need to perform
+// other operations against the standard scheme list.
+COMPONENT_EXPORT(URL)
+std::vector<std::string> GetStandardSchemes();
+
// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
COMPONENT_EXPORT(URL)
@@ -134,7 +141,7 @@
const char* compare,
Component* found_scheme);
COMPONENT_EXPORT(URL)
-bool FindAndCompareScheme(const gurl_base::char16* str,
+bool FindAndCompareScheme(const char16_t* str,
int str_len,
const char* compare,
Component* found_scheme);
@@ -144,7 +151,7 @@
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
compare, found_scheme);
}
-inline bool FindAndCompareScheme(const gurl_base::string16& str,
+inline bool FindAndCompareScheme(const std::u16string& str,
const char* compare,
Component* found_scheme) {
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
@@ -156,7 +163,7 @@
COMPONENT_EXPORT(URL)
bool IsStandard(const char* spec, const Component& scheme);
COMPONENT_EXPORT(URL)
-bool IsStandard(const gurl_base::char16* spec, const Component& scheme);
+bool IsStandard(const char16_t* spec, const Component& scheme);
// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of allowed schemes for referrers (see AddReferrerScheme).
@@ -171,7 +178,7 @@
const Component& scheme,
SchemeType* type);
COMPONENT_EXPORT(URL)
-bool GetStandardSchemeType(const gurl_base::char16* spec,
+bool GetStandardSchemeType(const char16_t* spec,
const Component& scheme,
SchemeType* type);
@@ -213,7 +220,7 @@
CanonOutput* output,
Parsed* output_parsed);
COMPONENT_EXPORT(URL)
-bool Canonicalize(const gurl_base::char16* spec,
+bool Canonicalize(const char16_t* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
@@ -243,7 +250,7 @@
bool ResolveRelative(const char* base_spec,
int base_spec_len,
const Parsed& base_parsed,
- const gurl_base::char16* relative,
+ const char16_t* relative,
int relative_length,
CharsetConverter* charset_converter,
CanonOutput* output,
@@ -265,7 +272,7 @@
bool ReplaceComponents(const char* spec,
int spec_len,
const Parsed& parsed,
- const Replacements<gurl_base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* out_parsed);
diff --git a/url/url_util_internal.h b/url/url_util_internal.h
index 08f8929..b2730b6 100644
--- a/url/url_util_internal.h
+++ b/url/url_util_internal.h
@@ -5,9 +5,6 @@
#ifndef URL_URL_UTIL_INTERNAL_H_
#define URL_URL_UTIL_INTERNAL_H_
-#include <string>
-
-#include "base/strings/string16.h"
#include "url/third_party/mozilla/url_parse.h"
namespace url {
@@ -17,7 +14,7 @@
bool CompareSchemeComponent(const char* spec,
const Component& component,
const char* compare_to);
-bool CompareSchemeComponent(const gurl_base::char16* spec,
+bool CompareSchemeComponent(const char16_t* spec,
const Component& component,
const char* compare_to);
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index a63294f..3dcfa76 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -136,6 +136,16 @@
&scheme_type));
}
+TEST_F(URLUtilTest, GetStandardSchemes) {
+ std::vector<std::string> expected = {
+ kHttpsScheme, kHttpScheme, kFileScheme,
+ kFtpScheme, kWssScheme, kWsScheme,
+ kFileSystemScheme, kQuicTransportScheme, "foo",
+ };
+ AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY);
+ EXPECT_EQ(expected, GetStandardSchemes());
+}
+
TEST_F(URLUtilTest, ReplaceComponents) {
Parsed parsed;
RawCanonOutputT<char> output;
@@ -236,34 +246,33 @@
for (size_t i = 0; i < gurl_base::size(decode_cases); i++) {
const char* input = decode_cases[i].input;
- RawCanonOutputT<gurl_base::char16> output;
+ RawCanonOutputT<char16_t> output;
DecodeURLEscapeSequences(input, strlen(input),
DecodeURLMode::kUTF8OrIsomorphic, &output);
- EXPECT_EQ(decode_cases[i].output,
- gurl_base::UTF16ToUTF8(gurl_base::string16(output.data(),
- output.length())));
+ EXPECT_EQ(decode_cases[i].output, gurl_base::UTF16ToUTF8(std::u16string(
+ output.data(), output.length())));
- RawCanonOutputT<gurl_base::char16> output_utf8;
+ RawCanonOutputT<char16_t> output_utf8;
DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
&output_utf8);
EXPECT_EQ(decode_cases[i].output,
gurl_base::UTF16ToUTF8(
- gurl_base::string16(output_utf8.data(), output_utf8.length())));
+ std::u16string(output_utf8.data(), output_utf8.length())));
}
// Our decode should decode %00
const char zero_input[] = "%00";
- RawCanonOutputT<gurl_base::char16> zero_output;
+ RawCanonOutputT<char16_t> zero_output;
DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8,
&zero_output);
- EXPECT_NE("%00", gurl_base::UTF16ToUTF8(
- gurl_base::string16(zero_output.data(), zero_output.length())));
+ EXPECT_NE("%00", gurl_base::UTF16ToUTF8(std::u16string(zero_output.data(),
+ zero_output.length())));
// Test the error behavior for invalid UTF-8.
struct Utf8DecodeCase {
const char* input;
- std::vector<gurl_base::char16> expected_iso;
- std::vector<gurl_base::char16> expected_utf8;
+ std::vector<char16_t> expected_iso;
+ std::vector<char16_t> expected_utf8;
} utf8_decode_cases[] = {
// %e5%a5%bd is a valid UTF-8 sequence. U+597D
{"%e4%a0%e5%a5%bd",
@@ -279,17 +288,17 @@
for (const auto& test : utf8_decode_cases) {
const char* input = test.input;
- RawCanonOutputT<gurl_base::char16> output_iso;
+ RawCanonOutputT<char16_t> output_iso;
DecodeURLEscapeSequences(input, strlen(input),
DecodeURLMode::kUTF8OrIsomorphic, &output_iso);
- EXPECT_EQ(gurl_base::string16(test.expected_iso.data()),
- gurl_base::string16(output_iso.data(), output_iso.length()));
+ EXPECT_EQ(std::u16string(test.expected_iso.data()),
+ std::u16string(output_iso.data(), output_iso.length()));
- RawCanonOutputT<gurl_base::char16> output_utf8;
+ RawCanonOutputT<char16_t> output_utf8;
DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
&output_utf8);
- EXPECT_EQ(gurl_base::string16(test.expected_utf8.data()),
- gurl_base::string16(output_utf8.data(), output_utf8.length()));
+ EXPECT_EQ(std::u16string(test.expected_utf8.data()),
+ std::u16string(output_utf8.data(), output_utf8.length()));
}
}