Update googleurl to the latest version This updates googleurl to the revision 21069c2dfa90d5bdd123c0d0d202dfc61f5f6920, Wed Jul 8 16:20:09 2020 +0000.
diff --git a/AUTHORS b/AUTHORS index a8d90ac..48cfe49 100644 --- a/AUTHORS +++ b/AUTHORS
@@ -37,6 +37,7 @@ Ajay Berwal <a.berwal@samsung.com> Ajay Berwal <ajay.berwal@samsung.com> Ajith Kumar V <ajith.v@samsung.com> +Akos Kiss <akiss@inf.u-szeged.hu> Aku Kotkavuo <a.kotkavuo@partner.samsung.com> Aldo Culquicondor <alculquicondor@gmail.com> Aleksandar Stojiljkovic <aleksandar.stojiljkovic@intel.com> @@ -76,6 +77,7 @@ anatoly techtonik <techtonik@gmail.com> Ancil George <ancilgeorge@samsung.com> Andra Paraschiv <andra.paraschiv@intel.com> +Andreas Papacharalampous <andreas@apap04.com> Andrei Borza <andrei.borza@gmail.com> Andrei Parvu <andrei.prv@gmail.com> Andrei Parvu <parvu@adobe.com> @@ -86,6 +88,7 @@ Andrew Jorgensen <ajorgens@amazon.com> Andrew MacPherson <andrew.macpherson@soundtrap.com> Andrew Tulloch <andrew@tullo.ch> +Andriy Rysin <arysin@gmail.com> Anish Patankar <anish.p@samsung.com> Ankit Kumar <ankit2.kumar@samsung.com> Ankur Verma <ankur1.verma@samsung.com> @@ -93,6 +96,7 @@ Anne Kao <annekao94@gmail.com> Anssi Hannula <anssi.hannula@iki.fi> Anthony Halliday <anth.halliday12@gmail.com> +Anton Bershanskiy <bershanskiy@pm.me> Anton Obzhirov <a.obzhirov@samsung.com> Antonin Hildebrand <antonin.hildebrand@gmail.com> Antonio Gomes <a1.gomes@sisa.samsung.com> @@ -147,6 +151,7 @@ Brian Luft <brian@electroly.com> Brian Merrell, Novell Inc. <bgmerrell@gmail.com> Brian Yip <itsbriany@gmail.com> +Brook Hong <hzgmaxwell@gmail.com> Bruno Calvignac <bruno@flock.com> Bruno de Oliveira Abinader <brunoabinader@gmail.com> Bruno Roy <brusi_roy@hotmail.com> @@ -164,6 +169,7 @@ Catalin Badea <badea@adobe.com> Cathie Chen <cathiechen@tencent.com> Cem Kocagil <cem.kocagil@gmail.com> +Cezary Kułakowski <cezary.kulakowski@gmail.com> Chakshu Ahuja <chakshu.a@samsung.com> Chamal De Silva <chamalsl@yahoo.com> Chandan Padhi <c.padhi@samsung.com> @@ -179,6 +185,7 @@ Chansik Yun <chansik.yun@gmail.com> Chaobin Zhang <zhchbin@gmail.com> Charles Vaughn <cvaughn@gmail.com> +Cheng Zhao <zcbenz@gmail.com> Choongwoo Han <cwhan.tunz@gmail.com> Chris Greene <cwgreene@amazon.com> Chris Harrelson <chrishtr@gmail.com> @@ -188,6 +195,7 @@ Chris Vasselli <clindsay@gmail.com> Christophe Dumez <ch.dumez@samsung.com> Christopher Dale <chrelad@gmail.com> +Chunbo Hua <chunbo.hua@intel.com> Claudio DeSouza <claudiomdsjr@gmail.com> Clemens Fruhwirth <clemens@endorphin.org> Clement Scheelfeldt Skau <clementskau@gmail.com> @@ -196,6 +204,7 @@ Conrad Irwin <conrad.irwin@gmail.com> Craig Schlenter <craig.schlenter@gmail.com> Csaba Osztrogonác <ossy.szeged@gmail.com> +Cynthia Revström <me@cynthia.re> Daegyu Lee <na7jun8gi@gmail.com> Dai Chunyang <chunyang.dai@intel.com> Daiwei Li <daiweili@suitabletech.com> @@ -223,6 +232,8 @@ David Erceg <erceg.david@gmail.com> David Fox <david@davidjfox.com> David Futcher <david.mike.futcher@gmail.com> +David Jin <davidjin@amazon.com> +David Lechner <david@pybricks.com> David Leen <davileen@amazon.com> David Manouchehri <david@davidmanouchehri.com> David McAllister <mcdavid@amazon.com> @@ -265,6 +276,7 @@ Eduardo Lima (Etrunko) <eduardo.lima@intel.com> Edward Baker <edward.baker@intel.com> Edward Crossman <tedoc2000@gmail.com> +Edward Trist <edwardtrist@gmail.com> Eero Häkkinen <e.hakkinen@samsung.com> Eero Häkkinen <eero.hakkinen@intel.com> Egor Starkov <egor.starkov@samsung.com> @@ -299,6 +311,7 @@ Francois Rauch <leopardb@gmail.com> Frankie Dintino <fdintino@theatlantic.com> Franklin Ta <fta2012@gmail.com> +Fred Ranking <luliang14@huawei.com> Frédéric Jacob <frederic.jacob.78@gmail.com> Frédéric Wang <fred.wang@free.fr> Fu Junwei <junwei.fu@intel.com> @@ -425,6 +438,7 @@ Jeffrey C <jeffreyca16@gmail.com> Jeongeun Kim <je_julie.kim@samsung.com> Jeongmin Kim <kimwjdalsl@gmail.com> +Jeongwoo Park <jwoo.park@navercorp.com> Jeongwoo Park <skeksk91@gmail.com> Jeremy Noring <jnoring@hirevue.com> Jeremy Spiegel <jeremysspiegel@gmail.com> @@ -442,6 +456,7 @@ Jiawei Shao <jiawei.shao@intel.com> Jiaxun Wei <leuisken@gmail.com> Jiaxun Yang <jiaxun.yang@flygoat.com> +Jidong Qin <qinjidong@qianxin.com> Jie Chen <jie.a.chen@intel.com> Jihan Chao <jihan@bluejeans.com> Jihoon Chung <j.c@navercorp.com> @@ -457,6 +472,7 @@ Jingyi Wei <wjywbs@gmail.com> Jinho Bang <jinho.bang@samsung.com> Jinsong Fan <fanjinsong@sogou-inc.com> +Jinsong Fan <jinsong.van@gmail.com> Jinwoo Song <jinwoo7.song@samsung.com> Jinyoung Hur <hurims@gmail.com> Jitendra Kumar Sahoo <jitendra.ks@samsung.com> @@ -525,6 +541,7 @@ Kaustubh Atrawalkar <kaustubh.ra@gmail.com> Ke He <ke.he@intel.com> Keene Pan <keenepan@linpus.com> +Keita Suzuki <keitasuzuki.park@gmail.com> Keita Yoshimoto <y073k3@gmail.com> Keith Chen <keitchen@amazon.com> Kenneth Rohde Christiansen <kenneth.r.christiansen@intel.com> @@ -565,6 +582,7 @@ Kyungtae Kim <ktf.kim@samsung.com> Kyungyoung Heo <bbvch13531@gmail.com> Lalit Chandivade <lalit.chandivade@einfochips.com> +Lam Lu <lamlu@amazon.com> Laszlo Gombos <l.gombos@samsung.com> Laszlo Radanyi <bekkra@gmail.com> Lauren Yeun Kim <lauren.yeun.kim@gmail.com> @@ -572,6 +590,7 @@ Lavar Askew <open.hyperion@gmail.com> Legend Lee <guanxian.li@intel.com> Leith Bade <leith@leithalweapon.geek.nz> +Lei Li <lli.kernel.kvm@gmail.com> Lenny Khazan <lenny.khazan@gmail.com> Leo Wolf <jclw@ymail.com> Leon Han <leon.han@intel.com> @@ -579,6 +598,7 @@ Li Yin <li.yin@intel.com> Lidwine Genevet <lgenevet@cisco.com> Lin Sun <lin.sun@intel.com> +Lingqi Chi <someway.bit@gmail.com> Lingyun Cai <lingyun.cai@intel.com> Lionel Landwerlin <lionel.g.landwerlin@intel.com> Lizhi Fan <lizhi.fan@samsung.com> @@ -589,6 +609,7 @@ Lucie Brozkova <lucinka.brozkova@gmail.com> Luiz Von Dentz <luiz.von.dentz@intel.com> Luka Dojcilovic <l.dojcilovic@gmail.com> +Lukasz Krakowiak <lukasz.krakowiak@mobica.com> Luke Inman-Semerau <luke.semerau@gmail.com> Luke Zarko <lukezarko@gmail.com> Luoxi Pan <l.panpax@gmail.com> @@ -616,6 +637,7 @@ Mark Seaborn <mrs@mythic-beasts.com> Martijn Croonen <martijn@martijnc.be> Martin Bednorz <m.s.bednorz@gmail.com> +Martin Persson <mnpn03@gmail.com> Martin Rogalla <martin@martinrogalla.com> Martina Kollarova <martina.kollarova@intel.com> Masahiro Yado <yado.masa@gmail.com> @@ -625,12 +647,14 @@ Mathias Bynens <mathias@qiwi.be> Mathieu Meisser <mmeisser@logitech.com> Matt Arpidone <mma.public@gmail.com> +Matt Fysh <mattfysh@gmail.com> Matt Strum <mstrum@amazon.com> Matt Zeunert <matt@mostlystatic.com> Matthew Bauer <mjbauer95@gmail.com> Matthew Demarest <demarem@amazon.com> Matthew Robertson <matthewrobertson03@gmail.com> Matthew Turk <matthewturk@gmail.com> +Matthew Webb <mwebbmwebb@gmail.com> Matthew Willis <appamatto@gmail.com> Matthias Reitinger <reimarvin@gmail.com> Matthieu Rigolot <matthieu.rigolot@gmail.com> @@ -643,6 +667,7 @@ Md Jobed Hossain <jrony15@gmail.com> Md Sami Uddin <md.sami@samsung.com> Michael Cirone <mikecirone@gmail.com> +Michael Constant <mconst@gmail.com> Michael Forney <mforney@mforney.org> Michael Gilbert <floppymaster@gmail.com> Michael Lopez <lopes92290@gmail.com> @@ -704,6 +729,7 @@ Nils Schneider <nils@nilsschneider.net> Ningxin Hu <ningxin.hu@intel.com> Nitish Mehrotra <nitish.m@samsung.com> +Noam Rosenthal <noam.j.rosenthal@gmail.com> Noj Vek <nojvek@gmail.com> Nolan Cao <nolan.robin.cao@gmail.com> Oleksii Kadurin <ovkadurin@gmail.com> @@ -812,11 +838,13 @@ Rosen Dash <nqk836@motorola.com> Rosen Dash <rosen.dash@gmail.com> Ross Kirsling <rkirsling@gmail.com> +Ross Wollman <ross.wollman@gmail.com> ruben <chromium@hybridsource.org> Ruben Bridgewater <ruben@bridgewater.de> Ruben Terrazas <rubentopo@gmail.com> Rufus Hamade <rufus.hamade@imgtec.com> Ruiyi Luo <luoruiyi2008@gmail.com> +Russell Davis <russell.davis@gmail.com> Ryan Ackley <ryanackley@gmail.com> Ryan Norton <rnorton10@gmail.com> Ryan Sleevi <ryan-chromium-dev@sleevi.com> @@ -833,6 +861,7 @@ Sanghyun Park <sh919.park@samsung.com> Sanghyup Lee <sh53.lee@samsung.com> Sangjoon Je <htamop@gmail.com> +Sangseok Jang <sangseok.jang@gmail.com> Sangseok Jang <sangseok.jang@navercorp.com> Sangwoo Ko <sangwoo.ko@navercorp.com> Sangwoo Ko <sangwoo108@gmail.com> @@ -840,6 +869,7 @@ Sanjoy Pal <sanjoy.pal@samsung.com> Sanne Wouda <sanne.wouda@gmail.com> Santosh Mahto <samahto@cisco.com> +Sarah Jochum <smjochum@gmail.com> Sarath Singapati <s.singapati@gmail.com> Sarath Singapati <s.singapati@samsung.com> Sarath Singapati <sarath.singapati@huawei.com> @@ -910,6 +940,7 @@ Staphany Park <stapark008@gmail.com> Stephan Hartmann <stha09@googlemail.com> Stephen Searles <stephen.searles@gmail.com> +Stephen Sigwart <ssigwart@gmail.com> Steve Sanders <steve@zanderz.com> Steven Pennington <spenn@engr.uvic.ca> Steven Roussey <sroussey@gmail.com> @@ -926,6 +957,7 @@ Sungmann Cho <sungmann.cho@navercorp.com> Sunil Ratnu <sunil.ratnu@samsung.com> Sunitha Srivatsa <srivats@amazon.com> +Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> Suvanjan Mukherjee <suvanjanmukherjee@gmail.com> Suyambulingam R M <suyambu.rm@samsung.com> Suyash Sengar <suyash.s@samsung.com> @@ -997,6 +1029,7 @@ Vivek Galatage <vivek.vg@samsung.com> Volker Sorge <volker.sorge@gmail.com> Waihung Fu <fufranci@amazon.com> +Wojciech Bielawski <wojciech.bielawski@gmail.com> Wanming Lin <wanming.lin@intel.com> Wei Li <wei.c.li@intel.com> Wenxiang Qian <leonwxqian@gmail.com> @@ -1007,6 +1040,7 @@ Will Hirsch <chromium@willhirsch.co.uk> Will Shackleton <w.shackleton@gmail.com> William Xie <william.xie@intel.com> +Winston Chen <winston.c1@samsung.com> Xiang Long <xiang.long@intel.com> Xiangze Zhang <xiangze.zhang@intel.com> Xiaofeng Zhang <xiaofeng.zhang@intel.com> @@ -1019,6 +1053,7 @@ Xu Samuel <samuel.xu@intel.com> Xu Xing <xing.xu@intel.com> Xuefei Ren <xrenishere@gmail.com> +Xuehui Xie <xuehui.xxh@alibaba-inc.com> Xueqing Huang <huangxueqing@xiaomi.com> Xun Sun <xun.sun@intel.com> Xunran Ding <xunran.ding@samsung.com> @@ -1076,10 +1111,13 @@ Zhifei Fang <facetothefate@gmail.com> Zhuoyu Qian <zhuoyu.qian@samsung.com> Ziran Sun <ziran.sun@samsung.com> +Zoltan Czirkos <czirkos.zoltan@gmail.com> Zoltan Herczeg <zherczeg.u-szeged@partner.samsung.com> Zoltan Kuscsik <zoltan.kuscsik@linaro.org> Zsolt Borbely <zsborbely.u-szeged@partner.samsung.com> 方觉 (Fang Jue) <fangjue23303@gmail.com> +Julian Geppert <spctstr@gmail.com> +Jiadong Chen <chenjiadong@huawei.com> # END individuals section. # BEGIN organizations section. @@ -1117,6 +1155,7 @@ Mediatek <*@mediatek.com> Microsoft <*@microsoft.com> MIPS Technologies, Inc. <*@mips.com> +Mobica Limited <*@mobica.com> Mozilla Corporation <*@mozilla.com> Neverware Inc. <*@neverware.com> NIKE, Inc. <*@nike.com> @@ -1129,6 +1168,7 @@ Seznam.cz, a.s. <*@firma.seznam.cz> Slack Technologies Inc. <*@slack-corp.com> Spotify AB <*@spotify.com> +Synaptics <*@synaptics.com> Tableau Software <*@tableau.com> TeamSpeak Systems GmbH <*@teamspeak.com> The Chromium Authors <*@chromium.org>
diff --git a/base/BUILD b/base/BUILD index efad13f..74b721d 100644 --- a/base/BUILD +++ b/base/BUILD
@@ -7,6 +7,9 @@ name = "base", hdrs = [ "compiler_specific.h", + "containers/checked_iterators.h", + "containers/span.h", + "containers/util.h", "debug/leak_annotations.h", "macros.h", "no_destructor.h",
diff --git a/base/compiler_specific.h b/base/compiler_specific.h index 2962537..0cd36dc 100644 --- a/base/compiler_specific.h +++ b/base/compiler_specific.h
@@ -47,6 +47,20 @@ #define ALWAYS_INLINE inline #endif +// Annotate a function indicating it should never be tail called. Useful to make +// sure callers of the annotated function are never omitted from call-stacks. +// To provide the complementary behavior (prevent the annotated function from +// being omitted) look at NOINLINE. Also note that this doesn't prevent code +// folding of multiple identical caller functions into a single signature. To +// prevent code folding, see gurl_base::debug::Alias. +// Use like: +// void NOT_TAIL_CALLED FooBar(); +#if defined(__clang__) && __has_attribute(not_tail_called) +#define NOT_TAIL_CALLED __attribute__((not_tail_called)) +#else +#define NOT_TAIL_CALLED +#endif + // Specify memory alignment for structs, classes, etc. // Use like: // class ALIGNAS(16) MyClass { ... } @@ -247,4 +261,35 @@ #define STACK_UNINITIALIZED #endif +// The ANALYZER_ASSUME_TRUE(bool arg) macro adds compiler-specific hints +// to Clang which control what code paths are statically analyzed, +// and is meant to be used in conjunction with assert & assert-like functions. +// The expression is passed straight through if analysis isn't enabled. +// +// ANALYZER_SKIP_THIS_PATH() suppresses static analysis for the current +// codepath and any other branching codepaths that might follow. +#if defined(__clang_analyzer__) + +inline constexpr bool AnalyzerNoReturn() __attribute__((analyzer_noreturn)) { + return false; +} + +inline constexpr bool AnalyzerAssumeTrue(bool arg) { + // AnalyzerNoReturn() is invoked and analysis is terminated if |arg| is + // false. + return arg || AnalyzerNoReturn(); +} + +#define ANALYZER_ASSUME_TRUE(arg) ::AnalyzerAssumeTrue(!!(arg)) +#define ANALYZER_SKIP_THIS_PATH() static_cast<void>(::AnalyzerNoReturn()) +#define ANALYZER_ALLOW_UNUSED(var) static_cast<void>(var); + +#else // !defined(__clang_analyzer__) + +#define ANALYZER_ASSUME_TRUE(arg) (arg) +#define ANALYZER_SKIP_THIS_PATH() +#define ANALYZER_ALLOW_UNUSED(var) static_cast<void>(var); + +#endif // defined(__clang_analyzer__) + #endif // BASE_COMPILER_SPECIFIC_H_
diff --git a/base/containers/checked_iterators.h b/base/containers/checked_iterators.h new file mode 100644 index 0000000..30c35bd --- /dev/null +++ b/base/containers/checked_iterators.h
@@ -0,0 +1,272 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_CONTAINERS_CHECKED_ITERATORS_H_ +#define BASE_CONTAINERS_CHECKED_ITERATORS_H_ + +#include <iterator> +#include <memory> +#include <type_traits> + +#include "polyfills/base/check_op.h" +#include "base/containers/util.h" + +namespace gurl_base { + +template <typename T> +class CheckedContiguousIterator { + public: + using difference_type = std::ptrdiff_t; + using value_type = std::remove_cv_t<T>; + using pointer = T*; + using reference = T&; + using iterator_category = std::random_access_iterator_tag; + + // Required for converting constructor below. + template <typename U> + friend class CheckedContiguousIterator; + + constexpr CheckedContiguousIterator() = default; + +#if defined(_LIBCPP_VERSION) + // The following using declaration, single argument implicit constructor and + // friended `__unwrap_iter` overload are required to use an optimized code + // path when using a CheckedContiguousIterator with libc++ algorithms such as + // std::copy(first, last, result), std::copy_backward(first, last, result), + // std::move(first, last, result) and std::move_backward(first, last, result). + // + // Each of these algorithms dispatches to a std::memmove if this is safe to do + // so, i.e. when all of `first`, `last` and `result` are iterators over + // contiguous storage of the same type modulo const qualifiers. + // + // libc++ implements this for its contiguous iterators by invoking the + // unqualified __unwrap_iter, which returns the underlying pointer for + // iterators over std::vector and std::string, and returns the original + // iterator otherwise. + // + // Thus in order to opt into this optimization for CCI, we need to provide our + // own __unwrap_iter, returning the underlying raw pointer if it is safe to do + // so. + // + // Furthermore, considering that std::copy is implemented as follows, the + // return type of __unwrap_iter(CCI) needs to be convertible to CCI, which is + // why an appropriate implicit single argument constructor is provided for the + // optimized case: + // + // template <class InIter, class OutIter> + // OutIter copy(InIter first, InIter last, OutIter result) { + // return __copy(__unwrap_iter(first), __unwrap_iter(last), + // __unwrap_iter(result)); + // } + // + // Unoptimized __copy() signature: + // template <class InIter, class OutIter> + // OutIter __copy(InIter first, InIter last, OutIter result); + // + // Optimized __copy() signature: + // template <class T, class U> + // U* __copy(T* first, T* last, U* result); + // + // Finally, this single argument constructor sets all internal fields to the + // passed in pointer. This allows the resulting CCI to be used in other + // optimized calls to std::copy (or std::move, std::copy_backward, + // std::move_backward). However, it should not be used otherwise, since + // invoking any of its public API will result in a GURL_CHECK failure. This also + // means that callers should never use the single argument constructor + // directly. + template <typename U> + using PtrIfSafeToMemmove = std::enable_if_t< + std::is_trivially_copy_assignable<std::remove_const_t<U>>::value, + U*>; + + template <int&... ExplicitArgumentBarrier, typename U = T> + constexpr CheckedContiguousIterator(PtrIfSafeToMemmove<U> ptr) + : start_(ptr), current_(ptr), end_(ptr) {} + + template <int&... ExplicitArgumentBarrier, typename U = T> + friend constexpr PtrIfSafeToMemmove<U> __unwrap_iter( + CheckedContiguousIterator iter) { + return iter.current_; + } +#endif + + constexpr CheckedContiguousIterator(T* start, const T* end) + : CheckedContiguousIterator(start, start, end) {} + constexpr CheckedContiguousIterator(const T* start, T* current, const T* end) + : start_(start), current_(current), end_(end) { + GURL_CHECK_LE(start, current); + GURL_CHECK_LE(current, end); + } + constexpr CheckedContiguousIterator(const CheckedContiguousIterator& other) = + default; + + // Converting constructor allowing conversions like CCI<T> to CCI<const T>, + // but disallowing CCI<const T> to CCI<T> or CCI<Derived> to CCI<Base>, which + // are unsafe. Furthermore, this is the same condition as used by the + // converting constructors of std::span<T> and std::unique_ptr<T[]>. + // See https://wg21.link/n4042 for details. + template < + typename U, + std::enable_if_t<std::is_convertible<U (*)[], T (*)[]>::value>* = nullptr> + constexpr CheckedContiguousIterator(const CheckedContiguousIterator<U>& other) + : start_(other.start_), current_(other.current_), end_(other.end_) { + // We explicitly don't delegate to the 3-argument constructor here. Its + // CHECKs would be redundant, since we expect |other| to maintain its own + // invariant. However, DCHECKs never hurt anybody. Presumably. + GURL_DCHECK_LE(other.start_, other.current_); + GURL_DCHECK_LE(other.current_, other.end_); + } + + ~CheckedContiguousIterator() = default; + + constexpr CheckedContiguousIterator& operator=( + const CheckedContiguousIterator& other) = default; + + friend constexpr bool operator==(const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ == rhs.current_; + } + + friend constexpr bool operator!=(const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ != rhs.current_; + } + + friend constexpr bool operator<(const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ < rhs.current_; + } + + friend constexpr bool operator<=(const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ <= rhs.current_; + } + friend constexpr bool operator>(const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ > rhs.current_; + } + + friend constexpr bool operator>=(const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ >= rhs.current_; + } + + constexpr CheckedContiguousIterator& operator++() { + GURL_CHECK_NE(current_, end_); + ++current_; + return *this; + } + + constexpr CheckedContiguousIterator operator++(int) { + CheckedContiguousIterator old = *this; + ++*this; + return old; + } + + constexpr CheckedContiguousIterator& operator--() { + GURL_CHECK_NE(current_, start_); + --current_; + return *this; + } + + constexpr CheckedContiguousIterator operator--(int) { + CheckedContiguousIterator old = *this; + --*this; + return old; + } + + constexpr CheckedContiguousIterator& operator+=(difference_type rhs) { + if (rhs > 0) { + GURL_CHECK_LE(rhs, end_ - current_); + } else { + GURL_CHECK_LE(-rhs, current_ - start_); + } + current_ += rhs; + return *this; + } + + constexpr CheckedContiguousIterator operator+(difference_type rhs) const { + CheckedContiguousIterator it = *this; + it += rhs; + return it; + } + + constexpr CheckedContiguousIterator& operator-=(difference_type rhs) { + if (rhs < 0) { + GURL_CHECK_LE(-rhs, end_ - current_); + } else { + GURL_CHECK_LE(rhs, current_ - start_); + } + current_ -= rhs; + return *this; + } + + constexpr CheckedContiguousIterator operator-(difference_type rhs) const { + CheckedContiguousIterator it = *this; + it -= rhs; + return it; + } + + constexpr friend difference_type operator-( + const CheckedContiguousIterator& lhs, + const CheckedContiguousIterator& rhs) { + lhs.CheckComparable(rhs); + return lhs.current_ - rhs.current_; + } + + constexpr reference operator*() const { + GURL_CHECK_NE(current_, end_); + return *current_; + } + + constexpr pointer operator->() const { + GURL_CHECK_NE(current_, end_); + return current_; + } + + constexpr reference operator[](difference_type rhs) const { + GURL_CHECK_GE(rhs, 0); + GURL_CHECK_LT(rhs, end_ - current_); + return current_[rhs]; + } + + static bool IsRangeMoveSafe(const CheckedContiguousIterator& from_begin, + const CheckedContiguousIterator& from_end, + const CheckedContiguousIterator& to) + WARN_UNUSED_RESULT { + if (from_end < from_begin) + return false; + const auto from_begin_uintptr = get_uintptr(from_begin.current_); + const auto from_end_uintptr = get_uintptr(from_end.current_); + const auto to_begin_uintptr = get_uintptr(to.current_); + const auto to_end_uintptr = + get_uintptr((to + std::distance(from_begin, from_end)).current_); + + return to_begin_uintptr >= from_end_uintptr || + to_end_uintptr <= from_begin_uintptr; + } + + private: + constexpr void CheckComparable(const CheckedContiguousIterator& other) const { + GURL_CHECK_EQ(start_, other.start_); + GURL_CHECK_EQ(end_, other.end_); + } + + const T* start_ = nullptr; + T* current_ = nullptr; + const T* end_ = nullptr; +}; + +template <typename T> +using CheckedContiguousConstIterator = CheckedContiguousIterator<const T>; + +} // namespace base + +#endif // BASE_CONTAINERS_CHECKED_ITERATORS_H_
diff --git a/base/containers/span.h b/base/containers/span.h new file mode 100644 index 0000000..45a322d --- /dev/null +++ b/base/containers/span.h
@@ -0,0 +1,474 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_CONTAINERS_SPAN_H_ +#define BASE_CONTAINERS_SPAN_H_ + +#include <stddef.h> + +#include <algorithm> +#include <array> +#include <iterator> +#include <limits> +#include <type_traits> +#include <utility> + +#include "polyfills/base/check_op.h" +#include "base/containers/checked_iterators.h" +#include "base/macros.h" +#include "base/stl_util.h" +#include "base/template_util.h" + +namespace gurl_base { + +// [views.constants] +constexpr size_t dynamic_extent = std::numeric_limits<size_t>::max(); + +template <typename T, size_t Extent = dynamic_extent> +class span; + +namespace internal { + +template <size_t I> +using size_constant = std::integral_constant<size_t, I>; + +template <typename T> +struct ExtentImpl : size_constant<dynamic_extent> {}; + +template <typename T, size_t N> +struct ExtentImpl<T[N]> : size_constant<N> {}; + +template <typename T, size_t N> +struct ExtentImpl<std::array<T, N>> : size_constant<N> {}; + +template <typename T, size_t N> +struct ExtentImpl<gurl_base::span<T, N>> : size_constant<N> {}; + +template <typename T> +using Extent = ExtentImpl<std::remove_cv_t<std::remove_reference_t<T>>>; + +template <typename T> +struct IsSpanImpl : std::false_type {}; + +template <typename T, size_t Extent> +struct IsSpanImpl<span<T, Extent>> : std::true_type {}; + +template <typename T> +using IsNotSpan = negation<IsSpanImpl<std::decay_t<T>>>; + +template <typename T> +struct IsStdArrayImpl : std::false_type {}; + +template <typename T, size_t N> +struct IsStdArrayImpl<std::array<T, N>> : std::true_type {}; + +template <typename T> +using IsNotStdArray = negation<IsStdArrayImpl<std::decay_t<T>>>; + +template <typename T> +using IsNotCArray = negation<std::is_array<std::remove_reference_t<T>>>; + +template <typename From, typename To> +using IsLegalDataConversion = std::is_convertible<From (*)[], To (*)[]>; + +template <typename Container, typename T> +using ContainerHasConvertibleData = IsLegalDataConversion< + std::remove_pointer_t<decltype(gurl_base::data(std::declval<Container>()))>, + T>; + +template <typename Container> +using ContainerHasIntegralSize = + std::is_integral<decltype(gurl_base::size(std::declval<Container>()))>; + +template <typename From, size_t FromExtent, typename To, size_t ToExtent> +using EnableIfLegalSpanConversion = + std::enable_if_t<(ToExtent == dynamic_extent || ToExtent == FromExtent) && + IsLegalDataConversion<From, To>::value>; + +// SFINAE check if Array can be converted to a span<T>. +template <typename Array, typename T, size_t Extent> +using EnableIfSpanCompatibleArray = + std::enable_if_t<(Extent == dynamic_extent || + Extent == internal::Extent<Array>::value) && + ContainerHasConvertibleData<Array, T>::value>; + +// SFINAE check if Container can be converted to a span<T>. +template <typename Container, typename T> +using IsSpanCompatibleContainer = + conjunction<IsNotSpan<Container>, + IsNotStdArray<Container>, + IsNotCArray<Container>, + ContainerHasConvertibleData<Container, T>, + ContainerHasIntegralSize<Container>>; + +template <typename Container, typename T> +using EnableIfSpanCompatibleContainer = + std::enable_if_t<IsSpanCompatibleContainer<Container, T>::value>; + +template <typename Container, typename T, size_t Extent> +using EnableIfSpanCompatibleContainerAndSpanIsDynamic = + std::enable_if_t<IsSpanCompatibleContainer<Container, T>::value && + Extent == dynamic_extent>; + +// A helper template for storing the size of a span. Spans with static extents +// don't require additional storage, since the extent itself is specified in the +// template parameter. +template <size_t Extent> +class ExtentStorage { + public: + constexpr explicit ExtentStorage(size_t size) noexcept {} + constexpr size_t size() const noexcept { return Extent; } +}; + +// Specialization of ExtentStorage for dynamic extents, which do require +// explicit storage for the size. +template <> +struct ExtentStorage<dynamic_extent> { + constexpr explicit ExtentStorage(size_t size) noexcept : size_(size) {} + constexpr size_t size() const noexcept { return size_; } + + private: + size_t size_; +}; + +} // namespace internal + +// A span is a value type that represents an array of elements of type T. Since +// it only consists of a pointer to memory with an associated size, it is very +// light-weight. It is cheap to construct, copy, move and use spans, so that +// users are encouraged to use it as a pass-by-value parameter. A span does not +// own the underlying memory, so care must be taken to ensure that a span does +// not outlive the backing store. +// +// span is somewhat analogous to StringPiece, but with arbitrary element types, +// allowing mutation if T is non-const. +// +// span is implicitly convertible from C++ arrays, as well as most [1] +// container-like types that provide a data() and size() method (such as +// std::vector<T>). A mutable span<T> can also be implicitly converted to an +// immutable span<const T>. +// +// Consider using a span for functions that take a data pointer and size +// parameter: it allows the function to still act on an array-like type, while +// allowing the caller code to be a bit more concise. +// +// For read-only data access pass a span<const T>: the caller can supply either +// a span<const T> or a span<T>, while the callee will have a read-only view. +// For read-write access a mutable span<T> is required. +// +// Without span: +// Read-Only: +// // std::string HexEncode(const uint8_t* data, size_t size); +// std::vector<uint8_t> data_buffer = GenerateData(); +// std::string r = HexEncode(data_buffer.data(), data_buffer.size()); +// +// Mutable: +// // ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt, Args...); +// char str_buffer[100]; +// SafeSNPrintf(str_buffer, sizeof(str_buffer), "Pi ~= %lf", 3.14); +// +// With span: +// Read-Only: +// // std::string HexEncode(gurl_base::span<const uint8_t> data); +// std::vector<uint8_t> data_buffer = GenerateData(); +// std::string r = HexEncode(data_buffer); +// +// Mutable: +// // ssize_t SafeSNPrintf(gurl_base::span<char>, const char* fmt, Args...); +// char str_buffer[100]; +// SafeSNPrintf(str_buffer, "Pi ~= %lf", 3.14); +// +// Spans with "const" and pointers +// ------------------------------- +// +// Const and pointers can get confusing. Here are vectors of pointers and their +// corresponding spans: +// +// const std::vector<int*> => gurl_base::span<int* const> +// std::vector<const int*> => gurl_base::span<const int*> +// const std::vector<const int*> => gurl_base::span<const int* const> +// +// Differences from the C++20 draft +// -------------------------------- +// +// http://eel.is/c++draft/views contains the latest C++20 draft of std::span. +// Chromium tries to follow the draft as close as possible. Differences between +// the draft and the implementation are documented in subsections below. +// +// Differences from [span.objectrep]: +// - as_bytes() and as_writable_bytes() return spans of uint8_t instead of +// std::byte (std::byte is a C++17 feature) +// +// Differences from [span.cons]: +// - Constructing a static span (i.e. Extent != dynamic_extent) from a dynamic +// sized container (e.g. std::vector) requires an explicit conversion (in the +// C++20 draft this is simply UB) +// +// Differences from [span.obs]: +// - empty() is marked with WARN_UNUSED_RESULT instead of [[nodiscard]] +// ([[nodiscard]] is a C++17 feature) +// +// Furthermore, all constructors and methods are marked noexcept due to the lack +// of exceptions in Chromium. +// +// Due to the lack of class template argument deduction guides in C++14 +// appropriate make_span() utility functions are provided. + +// [span], class template span +template <typename T, size_t Extent> +class span : public internal::ExtentStorage<Extent> { + private: + using ExtentStorage = internal::ExtentStorage<Extent>; + + public: + using element_type = T; + using value_type = std::remove_cv_t<T>; + using size_type = size_t; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; + using iterator = CheckedContiguousIterator<T>; + // TODO(https://crbug.com/828324): Drop the const_iterator typedef once gMock + // supports containers without this nested type. + using const_iterator = iterator; + using reverse_iterator = std::reverse_iterator<iterator>; + static constexpr size_t extent = Extent; + + // [span.cons], span constructors, copy, assignment, and destructor + constexpr span() noexcept : ExtentStorage(0), data_(nullptr) { + static_assert(Extent == dynamic_extent || Extent == 0, "Invalid Extent"); + } + + constexpr span(T* data, size_t size) noexcept + : ExtentStorage(size), data_(data) { + GURL_CHECK(Extent == dynamic_extent || Extent == size); + } + + // Artificially templatized to break ambiguity for span(ptr, 0). + template <typename = void> + constexpr span(T* begin, T* end) noexcept : span(begin, end - begin) { + // Note: GURL_CHECK_LE is not constexpr, hence regular GURL_CHECK must be used. + GURL_CHECK(begin <= end); + } + + template < + size_t N, + typename = internal::EnableIfSpanCompatibleArray<T (&)[N], T, Extent>> + constexpr span(T (&array)[N]) noexcept : span(gurl_base::data(array), N) {} + + template < + typename U, + size_t N, + typename = + internal::EnableIfSpanCompatibleArray<std::array<U, N>&, T, Extent>> + constexpr span(std::array<U, N>& array) noexcept + : span(gurl_base::data(array), N) {} + + template <typename U, + size_t N, + typename = internal:: + EnableIfSpanCompatibleArray<const std::array<U, N>&, T, Extent>> + constexpr span(const std::array<U, N>& array) noexcept + : span(gurl_base::data(array), N) {} + + // Conversion from a container that has compatible gurl_base::data() and integral + // gurl_base::size(). + template < + typename Container, + typename = + internal::EnableIfSpanCompatibleContainerAndSpanIsDynamic<Container&, + T, + Extent>> + constexpr span(Container& container) noexcept + : span(gurl_base::data(container), gurl_base::size(container)) {} + + template < + typename Container, + typename = internal::EnableIfSpanCompatibleContainerAndSpanIsDynamic< + const Container&, + T, + Extent>> + constexpr span(const Container& container) noexcept + : span(gurl_base::data(container), gurl_base::size(container)) {} + + constexpr span(const span& other) noexcept = default; + + // Conversions from spans of compatible types and extents: this allows a + // span<T> to be seamlessly used as a span<const T>, but not the other way + // around. If extent is not dynamic, OtherExtent has to be equal to Extent. + template < + typename U, + size_t OtherExtent, + typename = + internal::EnableIfLegalSpanConversion<U, OtherExtent, T, Extent>> + constexpr span(const span<U, OtherExtent>& other) + : span(other.data(), other.size()) {} + + constexpr span& operator=(const span& other) noexcept = default; + ~span() noexcept = default; + + // [span.sub], span subviews + template <size_t Count> + constexpr span<T, Count> first() const noexcept { + static_assert(Count <= Extent, "Count must not exceed Extent"); + GURL_CHECK(Extent != dynamic_extent || Count <= size()); + return {data(), Count}; + } + + template <size_t Count> + constexpr span<T, Count> last() const noexcept { + static_assert(Count <= Extent, "Count must not exceed Extent"); + GURL_CHECK(Extent != dynamic_extent || Count <= size()); + return {data() + (size() - Count), Count}; + } + + template <size_t Offset, size_t Count = dynamic_extent> + constexpr span<T, + (Count != dynamic_extent + ? Count + : (Extent != dynamic_extent ? Extent - Offset + : dynamic_extent))> + subspan() const noexcept { + static_assert(Offset <= Extent, "Offset must not exceed Extent"); + static_assert(Count == dynamic_extent || Count <= Extent - Offset, + "Count must not exceed Extent - Offset"); + GURL_CHECK(Extent != dynamic_extent || Offset <= size()); + GURL_CHECK(Extent != dynamic_extent || Count == dynamic_extent || + Count <= size() - Offset); + return {data() + Offset, Count != dynamic_extent ? Count : size() - Offset}; + } + + constexpr span<T, dynamic_extent> first(size_t count) const noexcept { + // Note: GURL_CHECK_LE is not constexpr, hence regular GURL_CHECK must be used. + GURL_CHECK(count <= size()); + return {data(), count}; + } + + constexpr span<T, dynamic_extent> last(size_t count) const noexcept { + // Note: GURL_CHECK_LE is not constexpr, hence regular GURL_CHECK must be used. + GURL_CHECK(count <= size()); + return {data() + (size() - count), count}; + } + + constexpr span<T, dynamic_extent> subspan(size_t offset, + size_t count = dynamic_extent) const + noexcept { + // Note: GURL_CHECK_LE is not constexpr, hence regular GURL_CHECK must be used. + GURL_CHECK(offset <= size()); + GURL_CHECK(count == dynamic_extent || count <= size() - offset); + return {data() + offset, count != dynamic_extent ? count : size() - offset}; + } + + // [span.obs], span observers + constexpr size_t size() const noexcept { return ExtentStorage::size(); } + constexpr size_t size_bytes() const noexcept { return size() * sizeof(T); } + constexpr bool empty() const noexcept WARN_UNUSED_RESULT { + return size() == 0; + } + + // [span.elem], span element access + constexpr T& operator[](size_t idx) const noexcept { + // Note: GURL_CHECK_LT is not constexpr, hence regular GURL_CHECK must be used. + GURL_CHECK(idx < size()); + return *(data() + idx); + } + + constexpr T& front() const noexcept { + static_assert(Extent == dynamic_extent || Extent > 0, + "Extent must not be 0"); + GURL_CHECK(Extent != dynamic_extent || !empty()); + return *data(); + } + + constexpr T& back() const noexcept { + static_assert(Extent == dynamic_extent || Extent > 0, + "Extent must not be 0"); + GURL_CHECK(Extent != dynamic_extent || !empty()); + return *(data() + size() - 1); + } + + constexpr T* data() const noexcept { return data_; } + + // [span.iter], span iterator support + constexpr iterator begin() const noexcept { + return iterator(data_, data_ + size()); + } + + constexpr iterator end() const noexcept { + return iterator(data_, data_ + size(), data_ + size()); + } + + constexpr reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()); + } + + constexpr reverse_iterator rend() const noexcept { + return reverse_iterator(begin()); + } + + private: + T* data_; +}; + +// span<T, Extent>::extent can not be declared inline prior to C++17, hence this +// definition is required. +template <class T, size_t Extent> +constexpr size_t span<T, Extent>::extent; + +// [span.objectrep], views of object representation +template <typename T, size_t X> +span<const uint8_t, (X == dynamic_extent ? dynamic_extent : sizeof(T) * X)> +as_bytes(span<T, X> s) noexcept { + return {reinterpret_cast<const uint8_t*>(s.data()), s.size_bytes()}; +} + +template <typename T, + size_t X, + typename = std::enable_if_t<!std::is_const<T>::value>> +span<uint8_t, (X == dynamic_extent ? dynamic_extent : sizeof(T) * X)> +as_writable_bytes(span<T, X> s) noexcept { + return {reinterpret_cast<uint8_t*>(s.data()), s.size_bytes()}; +} + +// Type-deducing helpers for constructing a span. +template <int&... ExplicitArgumentBarrier, typename T> +constexpr span<T> make_span(T* data, size_t size) noexcept { + return {data, size}; +} + +template <int&... ExplicitArgumentBarrier, typename T> +constexpr span<T> make_span(T* begin, T* end) noexcept { + return {begin, end}; +} + +// make_span utility function that deduces both the span's value_type and extent +// from the passed in argument. +// +// Usage: auto span = gurl_base::make_span(...); +template <int&... ExplicitArgumentBarrier, typename Container> +constexpr auto make_span(Container&& container) noexcept { + using T = + std::remove_pointer_t<decltype(gurl_base::data(std::declval<Container>()))>; + using Extent = internal::Extent<Container>; + return span<T, Extent::value>(std::forward<Container>(container)); +} + +// make_span utility function that allows callers to explicit specify the span's +// extent, the value_type is deduced automatically. This is useful when passing +// a dynamically sized container to a method expecting static spans, when the +// container is known to have the correct size. +// +// Note: This will GURL_CHECK that N indeed matches size(container). +// +// Usage: auto static_span = gurl_base::make_span<N>(...); +template <size_t N, int&... ExplicitArgumentBarrier, typename Container> +constexpr auto make_span(Container&& container) noexcept { + using T = + std::remove_pointer_t<decltype(gurl_base::data(std::declval<Container>()))>; + return span<T, N>(gurl_base::data(container), gurl_base::size(container)); +} + +} // namespace base + +#endif // BASE_CONTAINERS_SPAN_H_
diff --git a/base/containers/util.h b/base/containers/util.h new file mode 100644 index 0000000..14f012a --- /dev/null +++ b/base/containers/util.h
@@ -0,0 +1,21 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_CONTAINERS_UTIL_H_ +#define BASE_CONTAINERS_UTIL_H_ + +#include <stdint.h> + +namespace gurl_base { + +// TODO(crbug.com/817982): What we really need is for checked_math.h to be +// able to do checked arithmetic on pointers. +template <typename T> +static inline uintptr_t get_uintptr(const T* t) { + return reinterpret_cast<uintptr_t>(t); +} + +} // namespace base + +#endif // BASE_CONTAINERS_UTIL_H_
diff --git a/base/optional.h b/base/optional.h index a043122..f07cc66 100644 --- a/base/optional.h +++ b/base/optional.h
@@ -9,7 +9,7 @@ #include <type_traits> #include <utility> -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include "base/template_util.h" namespace gurl_base { @@ -359,27 +359,23 @@ // Helper to conditionally enable converting constructors and assign operators. template <typename T, typename U> -struct IsConvertibleFromOptional - : std::integral_constant< - bool, - std::is_constructible<T, Optional<U>&>::value || - std::is_constructible<T, const Optional<U>&>::value || - std::is_constructible<T, Optional<U>&&>::value || - std::is_constructible<T, const Optional<U>&&>::value || - std::is_convertible<Optional<U>&, T>::value || - std::is_convertible<const Optional<U>&, T>::value || - std::is_convertible<Optional<U>&&, T>::value || - std::is_convertible<const Optional<U>&&, T>::value> {}; +using IsConvertibleFromOptional = + disjunction<std::is_constructible<T, Optional<U>&>, + std::is_constructible<T, const Optional<U>&>, + std::is_constructible<T, Optional<U>&&>, + std::is_constructible<T, const Optional<U>&&>, + std::is_convertible<Optional<U>&, T>, + std::is_convertible<const Optional<U>&, T>, + std::is_convertible<Optional<U>&&, T>, + std::is_convertible<const Optional<U>&&, T>>; template <typename T, typename U> -struct IsAssignableFromOptional - : std::integral_constant< - bool, - IsConvertibleFromOptional<T, U>::value || - std::is_assignable<T&, Optional<U>&>::value || - std::is_assignable<T&, const Optional<U>&>::value || - std::is_assignable<T&, Optional<U>&&>::value || - std::is_assignable<T&, const Optional<U>&&>::value> {}; +using IsAssignableFromOptional = + disjunction<IsConvertibleFromOptional<T, U>, + std::is_assignable<T&, Optional<U>&>, + std::is_assignable<T&, const Optional<U>&>, + std::is_assignable<T&, Optional<U>&&>, + std::is_assignable<T&, const Optional<U>&&>>; // Forward compatibility for C++17. // Introduce one more deeper nested namespace to avoid leaking using std::swap.
diff --git a/base/stl_util.h b/base/stl_util.h index 7fc8108..14d2b6f 100644 --- a/base/stl_util.h +++ b/base/stl_util.h
@@ -23,7 +23,7 @@ #include <utility> #include <vector> -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include "base/optional.h" #include "base/template_util.h" @@ -561,14 +561,6 @@ return removed; } -template <class T, class Allocator, class Value> -size_t Erase(std::forward_list<T, Allocator>& container, const Value& value) { - // Unlike std::forward_list::remove, this function template accepts - // heterogeneous types and does not force a conversion to the container's - // value type before invoking the == operator. - return EraseIf(container, [&](const T& cur) { return cur == value; }); -} - template <class T, class Allocator, class Predicate> size_t EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) { // Note: std::forward_list does not have a size() API, thus we need to use the @@ -579,14 +571,6 @@ return old_size - std::distance(container.begin(), container.end()); } -template <class T, class Allocator, class Value> -size_t Erase(std::list<T, Allocator>& container, const Value& value) { - // Unlike std::list::remove, this function template accepts heterogeneous - // types and does not force a conversion to the container's value type before - // invoking the == operator. - return EraseIf(container, [&](const T& cur) { return cur == value; }); -} - template <class T, class Allocator, class Predicate> size_t EraseIf(std::list<T, Allocator>& container, Predicate pred) { size_t old_size = container.size(); @@ -661,6 +645,22 @@ return internal::IterateAndEraseIf(container, pred); } +template <class T, class Allocator, class Value> +size_t Erase(std::forward_list<T, Allocator>& container, const Value& value) { + // Unlike std::forward_list::remove, this function template accepts + // heterogeneous types and does not force a conversion to the container's + // value type before invoking the == operator. + return EraseIf(container, [&](const T& cur) { return cur == value; }); +} + +template <class T, class Allocator, class Value> +size_t Erase(std::list<T, Allocator>& container, const Value& value) { + // Unlike std::list::remove, this function template accepts heterogeneous + // types and does not force a conversion to the container's value type before + // invoking the == operator. + return EraseIf(container, [&](const T& cur) { return cur == value; }); +} + // A helper class to be used as the predicate with |EraseIf| to implement // in-place set intersection. Helps implement the algorithm of going through // each container an element at a time, erasing elements from the first
diff --git a/base/strings/BUILD b/base/strings/BUILD index 5043906..604cc81 100644 --- a/base/strings/BUILD +++ b/base/strings/BUILD
@@ -19,6 +19,7 @@ "string_piece.h", "string_piece_forward.h", "string_util.h", + "string_util_internal.h", "string_util_posix.h", "utf_string_conversion_utils.h", "utf_string_conversions.h",
diff --git a/base/strings/no_trigraphs_unittest.cc b/base/strings/no_trigraphs_unittest.cc new file mode 100644 index 0000000..91fbda5 --- /dev/null +++ b/base/strings/no_trigraphs_unittest.cc
@@ -0,0 +1,10 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/strcat.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(NoTrigraphs, Basic) { + EXPECT_EQ("??=", gurl_base::StrCat({"?", "?", "="})); +}
diff --git a/base/strings/safe_sprintf.cc b/base/strings/safe_sprintf.cc index ab6e112..726ccf9 100644 --- a/base/strings/safe_sprintf.cc +++ b/base/strings/safe_sprintf.cc
@@ -35,7 +35,7 @@ // errno = 13 (Access denied) // In most of the anticipated use cases, that's probably the preferred // behavior. -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #define DEBUG_CHECK RAW_CHECK #else #define DEBUG_CHECK(x) do { if (x) { } } while (0)
diff --git a/base/strings/safe_sprintf_unittest.cc b/base/strings/safe_sprintf_unittest.cc index b7a67fa..cde415d 100644 --- a/base/strings/safe_sprintf_unittest.cc +++ b/base/strings/safe_sprintf_unittest.cc
@@ -12,7 +12,7 @@ #include <limits> #include <memory> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "base/macros.h" #include "build/build_config.h" #include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/strcat.cc b/base/strings/strcat.cc index db41a69..6b007c7 100644 --- a/base/strings/strcat.cc +++ b/base/strings/strcat.cc
@@ -4,82 +4,42 @@ #include "base/strings/strcat.h" +#include <string> + +#include "base/strings/strcat_internal.h" + namespace gurl_base { -namespace { - -// Reserves an additional amount of capacity in the given string, growing by at -// least 2x if necessary. Used by StrAppendT(). -// -// The "at least 2x" growing rule duplicates the exponential growth of -// std::string. The problem is that most implementations of reserve() will grow -// exactly to the requested amount instead of exponentially growing like would -// happen when appending normally. If we didn't do this, an append after the -// call to StrAppend() would definitely cause a reallocation, and loops with -// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want -// StrAppend() to have the same semantics as std::string::append(). -template <typename String> -void ReserveAdditionalIfNeeded(String* str, - typename String::size_type additional) { - const size_t required = str->size() + additional; - // Check whether we need to reserve additional capacity at all. - if (required <= str->capacity()) - return; - - str->reserve(std::max(required, str->capacity() * 2)); -} - -template <typename DestString, typename InputString> -void StrAppendT(DestString* dest, span<const InputString> pieces) { - size_t additional_size = 0; - for (const auto& cur : pieces) - additional_size += cur.size(); - ReserveAdditionalIfNeeded(dest, additional_size); - - for (const auto& cur : pieces) - dest->append(cur.data(), cur.size()); -} - -} // namespace - std::string StrCat(span<const StringPiece> pieces) { - std::string result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } string16 StrCat(span<const StringPiece16> pieces) { - string16 result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } std::string StrCat(span<const std::string> pieces) { - std::string result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } string16 StrCat(span<const string16> pieces) { - string16 result; - StrAppendT(&result, pieces); - return result; + return internal::StrCatT(pieces); } void StrAppend(std::string* dest, span<const StringPiece> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } void StrAppend(string16* dest, span<const StringPiece16> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } void StrAppend(std::string* dest, span<const std::string> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } void StrAppend(string16* dest, span<const string16> pieces) { - StrAppendT(dest, pieces); + internal::StrAppendT(dest, pieces); } } // namespace base
diff --git a/base/strings/strcat.h b/base/strings/strcat.h index 220fa24..2d85304 100644 --- a/base/strings/strcat.h +++ b/base/strings/strcat.h
@@ -69,10 +69,11 @@ // Initializer list forwards to the array version. inline std::string StrCat(std::initializer_list<StringPiece> pieces) { - return StrCat(make_span(pieces.begin(), pieces.size())); + return StrCat(make_span(pieces)); } + inline string16 StrCat(std::initializer_list<StringPiece16> pieces) { - return StrCat(make_span(pieces.begin(), pieces.size())); + return StrCat(make_span(pieces)); } // StrAppend ------------------------------------------------------------------- @@ -91,13 +92,18 @@ // Initializer list forwards to the array version. inline void StrAppend(std::string* dest, std::initializer_list<StringPiece> pieces) { - return StrAppend(dest, make_span(pieces.begin(), pieces.size())); + StrAppend(dest, make_span(pieces)); } + inline void StrAppend(string16* dest, std::initializer_list<StringPiece16> pieces) { - return StrAppend(dest, make_span(pieces.begin(), pieces.size())); + StrAppend(dest, make_span(pieces)); } } // namespace base +#if defined(OS_WIN) +#include "base/strings/strcat_win.h" +#endif + #endif // BASE_STRINGS_STRCAT_H_
diff --git a/base/strings/strcat_internal.h b/base/strings/strcat_internal.h new file mode 100644 index 0000000..24387d6 --- /dev/null +++ b/base/strings/strcat_internal.h
@@ -0,0 +1,60 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRCAT_INTERNAL_H_ +#define BASE_STRINGS_STRCAT_INTERNAL_H_ + +#include <string> + +#include "base/containers/span.h" + +namespace gurl_base { + +namespace internal { + +// Reserves an additional amount of capacity in the given string, growing by at +// least 2x if necessary. Used by StrAppendT(). +// +// The "at least 2x" growing rule duplicates the exponential growth of +// std::string. The problem is that most implementations of reserve() will grow +// exactly to the requested amount instead of exponentially growing like would +// happen when appending normally. If we didn't do this, an append after the +// call to StrAppend() would definitely cause a reallocation, and loops with +// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want +// StrAppend() to have the same semantics as std::string::append(). +template <typename String> +void ReserveAdditionalIfNeeded(String* str, + typename String::size_type additional) { + const size_t required = str->size() + additional; + // Check whether we need to reserve additional capacity at all. + if (required <= str->capacity()) + return; + + str->reserve(std::max(required, str->capacity() * 2)); +} + +template <typename DestString, typename InputString> +void StrAppendT(DestString* dest, span<const InputString> pieces) { + size_t additional_size = 0; + for (const auto& cur : pieces) + additional_size += cur.size(); + ReserveAdditionalIfNeeded(dest, additional_size); + + for (const auto& cur : pieces) + dest->append(cur.data(), cur.size()); +} + +template <typename StringT> +auto StrCatT(span<const StringT> pieces) { + std::basic_string<typename StringT::value_type, typename StringT::traits_type> + result; + StrAppendT(&result, pieces); + return result; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRCAT_INTERNAL_H_
diff --git a/base/strings/strcat_win.cc b/base/strings/strcat_win.cc new file mode 100644 index 0000000..60b22bc --- /dev/null +++ b/base/strings/strcat_win.cc
@@ -0,0 +1,35 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/strcat_win.h" + +#include <string> + +#include "base/containers/span.h" +#include "base/strings/strcat_internal.h" +#include "base/strings/string_piece.h" + +namespace gurl_base { + +#if defined(BASE_STRING16_IS_STD_U16STRING) + +std::wstring StrCat(span<const WStringPiece> pieces) { + return internal::StrCatT(pieces); +} + +std::wstring StrCat(span<const std::wstring> pieces) { + return internal::StrCatT(pieces); +} + +void StrAppend(std::wstring* dest, span<const WStringPiece> pieces) { + internal::StrAppendT(dest, pieces); +} + +void StrAppend(std::wstring* dest, span<const std::wstring> pieces) { + internal::StrAppendT(dest, pieces); +} + +#endif + +} // namespace base
diff --git a/base/strings/strcat_win.h b/base/strings/strcat_win.h new file mode 100644 index 0000000..68f8a58 --- /dev/null +++ b/base/strings/strcat_win.h
@@ -0,0 +1,45 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRCAT_WIN_H_ +#define BASE_STRINGS_STRCAT_WIN_H_ + +#include <initializer_list> +#include <string> + +#include "polyfills/base/base_export.h" +#include "base/compiler_specific.h" +#include "base/containers/span.h" +#include "base/strings/string_piece.h" + +namespace gurl_base { + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring +// and gurl_base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT void StrAppend(std::wstring* dest, span<const WStringPiece> pieces); +BASE_EXPORT void StrAppend(std::wstring* dest, span<const std::wstring> pieces); + +inline void StrAppend(std::wstring* dest, + std::initializer_list<WStringPiece> pieces) { + StrAppend(dest, make_span(pieces)); +} + +BASE_EXPORT std::wstring StrCat(span<const WStringPiece> pieces) + WARN_UNUSED_RESULT; +BASE_EXPORT std::wstring StrCat(span<const std::wstring> pieces) + WARN_UNUSED_RESULT; + +inline std::wstring StrCat(std::initializer_list<WStringPiece> pieces) { + return StrCat(make_span(pieces)); +} +#endif // defined(BASE_STRING16_IS_STD_U16STRING) + +} // namespace base + +#endif // BASE_STRINGS_STRCAT_WIN_H_
diff --git a/base/strings/string16.cc b/base/strings/string16.cc index 6ac8b8b..426d5b6 100644 --- a/base/strings/string16.cc +++ b/base/strings/string16.cc
@@ -13,6 +13,8 @@ #elif defined(WCHAR_T_IS_UTF32) +#include <string.h> + #include <ostream> #include "base/strings/string_piece.h"
diff --git a/base/strings/string_number_conversions.cc b/base/strings/string_number_conversions.cc index 701d71c..d1886b1 100644 --- a/base/strings/string_number_conversions.cc +++ b/base/strings/string_number_conversions.cc
@@ -4,458 +4,120 @@ #include "base/strings/string_number_conversions.h" -#include <ctype.h> -#include <errno.h> -#include <stdlib.h> -#include <wctype.h> +#include <iterator> +#include <string> -#include <limits> -#include <type_traits> - +#include "base/containers/span.h" #include "polyfills/base/logging.h" -#include "base/no_destructor.h" -#include "base/numerics/safe_math.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "base/third_party/double_conversion/double-conversion/double-conversion.h" +#include "base/strings/string16.h" +#include "base/strings/string_number_conversions_internal.h" +#include "base/strings/string_piece.h" namespace gurl_base { -namespace { - -template <typename STR, typename INT> -struct IntToStringT { - static STR IntToString(INT value) { - // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. - // So round up to allocate 3 output characters per byte, plus 1 for '-'. - const size_t kOutputBufSize = - 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; - - // Create the string in a temporary buffer, write it back to front, and - // then return the substr of what we ended up using. - using CHR = typename STR::value_type; - CHR outbuf[kOutputBufSize]; - - // The ValueOrDie call below can never fail, because UnsignedAbs is valid - // for all valid inputs. - typename std::make_unsigned<INT>::type res = - CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); - - CHR* end = outbuf + kOutputBufSize; - CHR* i = end; - do { - --i; - GURL_DCHECK(i != outbuf); - *i = static_cast<CHR>((res % 10) + '0'); - res /= 10; - } while (res != 0); - if (IsValueNegative(value)) { - --i; - GURL_DCHECK(i != outbuf); - *i = static_cast<CHR>('-'); - } - return STR(i, end); - } -}; - -// Utility to convert a character to a digit in a given base -template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit { -}; - -// Faster specialization for bases <= 10 -template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> { - public: - static bool Convert(CHAR c, uint8_t* digit) { - if (c >= '0' && c < '0' + BASE) { - *digit = static_cast<uint8_t>(c - '0'); - return true; - } - return false; - } -}; - -// Specialization for bases where 10 < base <= 36 -template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> { - public: - static bool Convert(CHAR c, uint8_t* digit) { - if (c >= '0' && c <= '9') { - *digit = c - '0'; - } else if (c >= 'a' && c < 'a' + BASE - 10) { - *digit = c - 'a' + 10; - } else if (c >= 'A' && c < 'A' + BASE - 10) { - *digit = c - 'A' + 10; - } else { - return false; - } - return true; - } -}; - -template <int BASE, typename CHAR> -bool CharToDigit(CHAR c, uint8_t* digit) { - return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit); -} - -// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it -// is locale independent, whereas the functions we are replacing were -// locale-dependent. TBD what is desired, but for the moment let's not -// introduce a change in behaviour. -template<typename CHAR> class WhitespaceHelper { -}; - -template<> class WhitespaceHelper<char> { - public: - static bool Invoke(char c) { - return 0 != isspace(static_cast<unsigned char>(c)); - } -}; - -template<> class WhitespaceHelper<char16> { - public: - static bool Invoke(char16 c) { - return 0 != iswspace(c); - } -}; - -template<typename CHAR> bool LocalIsWhitespace(CHAR c) { - return WhitespaceHelper<CHAR>::Invoke(c); -} - -// IteratorRangeToNumberTraits should provide: -// - a typedef for iterator_type, the iterator type used as input. -// - a typedef for value_type, the target numeric type. -// - static functions min, max (returning the minimum and maximum permitted -// values) -// - constant kBase, the base in which to interpret the input -template<typename IteratorRangeToNumberTraits> -class IteratorRangeToNumber { - public: - typedef IteratorRangeToNumberTraits traits; - typedef typename traits::iterator_type const_iterator; - typedef typename traits::value_type value_type; - - // Generalized iterator-range-to-number conversion. - // - static bool Invoke(const_iterator begin, - const_iterator end, - value_type* output) { - bool valid = true; - - while (begin != end && LocalIsWhitespace(*begin)) { - valid = false; - ++begin; - } - - if (begin != end && *begin == '-') { - if (!std::numeric_limits<value_type>::is_signed) { - *output = 0; - valid = false; - } else if (!Negative::Invoke(begin + 1, end, output)) { - valid = false; - } - } else { - if (begin != end && *begin == '+') { - ++begin; - } - if (!Positive::Invoke(begin, end, output)) { - valid = false; - } - } - - return valid; - } - - private: - // Sign provides: - // - a static function, CheckBounds, that determines whether the next digit - // causes an overflow/underflow - // - a static function, Increment, that appends the next digit appropriately - // according to the sign of the number being parsed. - template<typename Sign> - class Base { - public: - static bool Invoke(const_iterator begin, const_iterator end, - typename traits::value_type* output) { - *output = 0; - - if (begin == end) { - return false; - } - - // Note: no performance difference was found when using template - // specialization to remove this check in bases other than 16 - if (traits::kBase == 16 && end - begin > 2 && *begin == '0' && - (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { - begin += 2; - } - - for (const_iterator current = begin; current != end; ++current) { - uint8_t new_digit = 0; - - if (!CharToDigit<traits::kBase>(*current, &new_digit)) { - return false; - } - - if (current != begin) { - if (!Sign::CheckBounds(output, new_digit)) { - return false; - } - *output *= traits::kBase; - } - - Sign::Increment(new_digit, output); - } - return true; - } - }; - - class Positive : public Base<Positive> { - public: - static bool CheckBounds(value_type* output, uint8_t new_digit) { - if (*output > static_cast<value_type>(traits::max() / traits::kBase) || - (*output == static_cast<value_type>(traits::max() / traits::kBase) && - new_digit > traits::max() % traits::kBase)) { - *output = traits::max(); - return false; - } - return true; - } - static void Increment(uint8_t increment, value_type* output) { - *output += increment; - } - }; - - class Negative : public Base<Negative> { - public: - static bool CheckBounds(value_type* output, uint8_t new_digit) { - if (*output < traits::min() / traits::kBase || - (*output == traits::min() / traits::kBase && - new_digit > 0 - traits::min() % traits::kBase)) { - *output = traits::min(); - return false; - } - return true; - } - static void Increment(uint8_t increment, value_type* output) { - *output -= increment; - } - }; -}; - -template<typename ITERATOR, typename VALUE, int BASE> -class BaseIteratorRangeToNumberTraits { - public: - typedef ITERATOR iterator_type; - typedef VALUE value_type; - static value_type min() { - return std::numeric_limits<value_type>::min(); - } - static value_type max() { - return std::numeric_limits<value_type>::max(); - } - static const int kBase = BASE; -}; - -template<typename ITERATOR> -class BaseHexIteratorRangeToIntTraits - : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> { -}; - -template <typename ITERATOR> -class BaseHexIteratorRangeToUIntTraits - : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {}; - -template <typename ITERATOR> -class BaseHexIteratorRangeToInt64Traits - : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {}; - -template <typename ITERATOR> -class BaseHexIteratorRangeToUInt64Traits - : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {}; - -typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator> - HexIteratorRangeToIntTraits; - -typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator> - HexIteratorRangeToUIntTraits; - -typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator> - HexIteratorRangeToInt64Traits; - -typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator> - HexIteratorRangeToUInt64Traits; - -template <typename VALUE, int BASE> -class StringPieceToNumberTraits - : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator, - VALUE, - BASE> { -}; - -template <typename VALUE> -bool StringToIntImpl(StringPiece input, VALUE* output) { - return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke( - input.begin(), input.end(), output); -} - -template <typename VALUE, int BASE> -class StringPiece16ToNumberTraits - : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator, - VALUE, - BASE> { -}; - -template <typename VALUE> -bool String16ToIntImpl(StringPiece16 input, VALUE* output) { - return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke( - input.begin(), input.end(), output); -} - -} // namespace - std::string NumberToString(int value) { - return IntToStringT<std::string, int>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(int value) { - return IntToStringT<string16, int>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(unsigned value) { - return IntToStringT<std::string, unsigned>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(unsigned value) { - return IntToStringT<string16, unsigned>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(long value) { - return IntToStringT<std::string, long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(long value) { - return IntToStringT<string16, long>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(unsigned long value) { - return IntToStringT<std::string, unsigned long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(unsigned long value) { - return IntToStringT<string16, unsigned long>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(long long value) { - return IntToStringT<std::string, long long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(long long value) { - return IntToStringT<string16, long long>::IntToString(value); + return internal::IntToStringT<string16>(value); } std::string NumberToString(unsigned long long value) { - return IntToStringT<std::string, unsigned long long>::IntToString(value); + return internal::IntToStringT<std::string>(value); } string16 NumberToString16(unsigned long long value) { - return IntToStringT<string16, unsigned long long>::IntToString(value); -} - -static const double_conversion::DoubleToStringConverter* -GetDoubleToStringConverter() { - static NoDestructor<double_conversion::DoubleToStringConverter> converter( - double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, - nullptr, nullptr, 'e', -6, 12, 0, 0); - return converter.get(); + return internal::IntToStringT<string16>(value); } std::string NumberToString(double value) { - char buffer[32]; - double_conversion::StringBuilder builder(buffer, sizeof(buffer)); - GetDoubleToStringConverter()->ToShortest(value, &builder); - return std::string(buffer, builder.position()); + return internal::DoubleToStringT<std::string>(value); } -gurl_base::string16 NumberToString16(double value) { - char buffer[32]; - double_conversion::StringBuilder builder(buffer, sizeof(buffer)); - GetDoubleToStringConverter()->ToShortest(value, &builder); - - // The number will be ASCII. This creates the string using the "input - // iterator" variant which promotes from 8-bit to 16-bit via "=". - return gurl_base::string16(&buffer[0], &buffer[builder.position()]); +string16 NumberToString16(double value) { + return internal::DoubleToStringT<string16>(value); } bool StringToInt(StringPiece input, int* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToInt(StringPiece16 input, int* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint(StringPiece input, unsigned* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint(StringPiece16 input, unsigned* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToInt64(StringPiece input, int64_t* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToInt64(StringPiece16 input, int64_t* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint64(StringPiece input, uint64_t* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToUint64(StringPiece16 input, uint64_t* output) { - return String16ToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToSizeT(StringPiece input, size_t* output) { - return StringToIntImpl(input, output); + return internal::StringToIntImpl(input, *output); } bool StringToSizeT(StringPiece16 input, size_t* output) { - return String16ToIntImpl(input, output); -} - -template <typename STRING, typename CHAR> -bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) { - static NoDestructor<double_conversion::StringToDoubleConverter> converter( - double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | - double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, - 0.0, 0, nullptr, nullptr); - - int processed_characters_count; - *output = converter->StringToDouble(data, input.size(), - &processed_characters_count); - - // Cases to return false: - // - If the input string is empty, there was nothing to parse. - // - If the value saturated to HUGE_VAL. - // - If the entire string was not processed, there are either characters - // remaining in the string after a parsed number, or the string does not - // begin with a parseable number. - // - If the first character is a space, there was leading whitespace - return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL && - static_cast<size_t>(processed_characters_count) == input.size() && - !IsUnicodeWhitespace(input[0]); + return internal::StringToIntImpl(input, *output); } bool StringToDouble(StringPiece input, double* output) { - return StringToDoubleImpl(input, input.data(), output); + return internal::StringToDoubleImpl(input, input.data(), *output); } bool StringToDouble(StringPiece16 input, double* output) { - return StringToDoubleImpl( - input, reinterpret_cast<const uint16_t*>(input.data()), output); + return internal::StringToDoubleImpl( + input, reinterpret_cast<const uint16_t*>(input.data()), *output); } std::string HexEncode(const void* bytes, size_t size) { @@ -477,69 +139,36 @@ } bool HexStringToInt(StringPiece input, int* output) { - return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke( - input.begin(), input.end(), output); + return internal::HexStringToIntImpl(input, *output); } bool HexStringToUInt(StringPiece input, uint32_t* output) { - return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke( - input.begin(), input.end(), output); + return internal::HexStringToIntImpl(input, *output); } bool HexStringToInt64(StringPiece input, int64_t* output) { - return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke( - input.begin(), input.end(), output); + return internal::HexStringToIntImpl(input, *output); } bool HexStringToUInt64(StringPiece input, uint64_t* output) { - return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke( - input.begin(), input.end(), output); -} - -template <typename Container> -static bool HexStringToByteContainer(StringPiece input, Container* output) { - GURL_DCHECK_EQ(output->size(), 0u); - size_t count = input.size(); - if (count == 0 || (count % 2) != 0) - return false; - for (uintptr_t i = 0; i < count / 2; ++i) { - uint8_t msb = 0; // most significant 4 bits - uint8_t lsb = 0; // least significant 4 bits - if (!CharToDigit<16>(input[i * 2], &msb) || - !CharToDigit<16>(input[i * 2 + 1], &lsb)) { - return false; - } - output->push_back((msb << 4) | lsb); - } - return true; + return internal::HexStringToIntImpl(input, *output); } bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) { - return HexStringToByteContainer(input, output); + GURL_DCHECK(output->empty()); + return internal::HexStringToByteContainer(input, std::back_inserter(*output)); } bool HexStringToString(StringPiece input, std::string* output) { - return HexStringToByteContainer(input, output); + GURL_DCHECK(output->empty()); + return internal::HexStringToByteContainer(input, std::back_inserter(*output)); } bool HexStringToSpan(StringPiece input, gurl_base::span<uint8_t> output) { - size_t count = input.size(); - if (count == 0 || (count % 2) != 0) + if (input.size() / 2 != output.size()) return false; - if (count / 2 != output.size()) - return false; - - for (uintptr_t i = 0; i < count / 2; ++i) { - uint8_t msb = 0; // most significant 4 bits - uint8_t lsb = 0; // least significant 4 bits - if (!CharToDigit<16>(input[i * 2], &msb) || - !CharToDigit<16>(input[i * 2 + 1], &lsb)) { - return false; - } - output[i] = (msb << 4) | lsb; - } - return true; + return internal::HexStringToByteContainer(input, output.begin()); } } // namespace base
diff --git a/base/strings/string_number_conversions.h b/base/strings/string_number_conversions.h index 872ead2..7bb51b6 100644 --- a/base/strings/string_number_conversions.h +++ b/base/strings/string_number_conversions.h
@@ -20,10 +20,6 @@ // ---------------------------------------------------------------------------- // IMPORTANT MESSAGE FROM YOUR SPONSOR // -// This file contains no "wstring" variants. New code should use string16. If -// you need to make old code work, use the UTF8 version and convert. Please do -// not add wstring variants. -// // Please do not add "convenience" functions for converting strings to integers // that return the value and ignore success/failure. That encourages people to // write code that doesn't properly handle the error conditions. @@ -154,4 +150,8 @@ } // namespace base +#if defined(OS_WIN) +#include "base/strings/string_number_conversions_win.h" +#endif + #endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h new file mode 100644 index 0000000..4f917f8 --- /dev/null +++ b/base/strings/string_number_conversions_internal.h
@@ -0,0 +1,303 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_ +#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_ + +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <wctype.h> + +#include <limits> + +#include "polyfills/base/check_op.h" +#include "polyfills/base/logging.h" +#include "base/no_destructor.h" +#include "base/numerics/safe_math.h" +#include "base/strings/string_util.h" +#include "base/third_party/double_conversion/double-conversion/double-conversion.h" + +namespace gurl_base { + +namespace internal { + +template <typename STR, typename INT> +static STR IntToStringT(INT value) { + // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. + // So round up to allocate 3 output characters per byte, plus 1 for '-'. + const size_t kOutputBufSize = + 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; + + // Create the string in a temporary buffer, write it back to front, and + // then return the substr of what we ended up using. + using CHR = typename STR::value_type; + CHR outbuf[kOutputBufSize]; + + // The ValueOrDie call below can never fail, because UnsignedAbs is valid + // for all valid inputs. + std::make_unsigned_t<INT> res = + CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); + + CHR* end = outbuf + kOutputBufSize; + CHR* i = end; + do { + --i; + GURL_DCHECK(i != outbuf); + *i = static_cast<CHR>((res % 10) + '0'); + res /= 10; + } while (res != 0); + if (IsValueNegative(value)) { + --i; + GURL_DCHECK(i != outbuf); + *i = static_cast<CHR>('-'); + } + return STR(i, end); +} + +// Utility to convert a character to a digit in a given base +template <int BASE, typename CHAR> +Optional<uint8_t> CharToDigit(CHAR c) { + static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]"); + if (c >= '0' && c < '0' + std::min(BASE, 10)) + return c - '0'; + + if (c >= 'a' && c < 'a' + BASE - 10) + return c - 'a' + 10; + + if (c >= 'A' && c < 'A' + BASE - 10) + return c - 'A' + 10; + + return gurl_base::nullopt; +} + +// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it +// is locale independent, whereas the functions we are replacing were +// locale-dependent. TBD what is desired, but for the moment let's not +// introduce a change in behaviour. +template <typename CHAR> +class WhitespaceHelper {}; + +template <> +class WhitespaceHelper<char> { + public: + static bool Invoke(char c) { + return 0 != isspace(static_cast<unsigned char>(c)); + } +}; + +template <> +class WhitespaceHelper<char16> { + public: + static bool Invoke(char16 c) { return 0 != iswspace(c); } +}; + +template <typename CHAR> +bool LocalIsWhitespace(CHAR c) { + return WhitespaceHelper<CHAR>::Invoke(c); +} + +template <typename Number, int kBase> +class StringToNumberParser { + public: + struct Result { + Number value = 0; + bool valid = false; + }; + + static constexpr Number kMin = std::numeric_limits<Number>::min(); + static constexpr Number kMax = std::numeric_limits<Number>::max(); + + // Sign provides: + // - a static function, CheckBounds, that determines whether the next digit + // causes an overflow/underflow + // - a static function, Increment, that appends the next digit appropriately + // according to the sign of the number being parsed. + template <typename Sign> + class Base { + public: + template <typename Iter> + static Result Invoke(Iter begin, Iter end) { + Number value = 0; + + if (begin == end) { + return {value, false}; + } + + // Note: no performance difference was found when using template + // specialization to remove this check in bases other than 16 + if (kBase == 16 && end - begin > 2 && *begin == '0' && + (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { + begin += 2; + } + + for (Iter current = begin; current != end; ++current) { + Optional<uint8_t> new_digit = CharToDigit<kBase>(*current); + + if (!new_digit) { + return {value, false}; + } + + if (current != begin) { + Result result = Sign::CheckBounds(value, *new_digit); + if (!result.valid) + return result; + + value *= kBase; + } + + value = Sign::Increment(value, *new_digit); + } + return {value, true}; + } + }; + + class Positive : public Base<Positive> { + public: + static Result CheckBounds(Number value, uint8_t new_digit) { + if (value > static_cast<Number>(kMax / kBase) || + (value == static_cast<Number>(kMax / kBase) && + new_digit > kMax % kBase)) { + return {kMax, false}; + } + return {value, true}; + } + static Number Increment(Number lhs, uint8_t rhs) { return lhs + rhs; } + }; + + class Negative : public Base<Negative> { + public: + static Result CheckBounds(Number value, uint8_t new_digit) { + if (value < kMin / kBase || + (value == kMin / kBase && new_digit > 0 - kMin % kBase)) { + return {kMin, false}; + } + return {value, true}; + } + static Number Increment(Number lhs, uint8_t rhs) { return lhs - rhs; } + }; +}; + +template <typename Number, int kBase, typename Str> +auto StringToNumber(BasicStringPiece<Str> input) { + using Parser = StringToNumberParser<Number, kBase>; + using Result = typename Parser::Result; + + bool has_leading_whitespace = false; + auto begin = input.begin(); + auto end = input.end(); + + while (begin != end && LocalIsWhitespace(*begin)) { + has_leading_whitespace = true; + ++begin; + } + + if (begin != end && *begin == '-') { + if (!std::numeric_limits<Number>::is_signed) { + return Result{0, false}; + } + + Result result = Parser::Negative::Invoke(begin + 1, end); + result.valid &= !has_leading_whitespace; + return result; + } + + if (begin != end && *begin == '+') { + ++begin; + } + + Result result = Parser::Positive::Invoke(begin, end); + result.valid &= !has_leading_whitespace; + return result; +} + +template <typename STR, typename VALUE> +bool StringToIntImpl(BasicStringPiece<STR> input, VALUE& output) { + auto result = StringToNumber<VALUE, 10>(input); + output = result.value; + return result.valid; +} + +template <typename STR, typename VALUE> +bool HexStringToIntImpl(BasicStringPiece<STR> input, VALUE& output) { + auto result = StringToNumber<VALUE, 16>(input); + output = result.value; + return result.valid; +} + +static const double_conversion::DoubleToStringConverter* +GetDoubleToStringConverter() { + static NoDestructor<double_conversion::DoubleToStringConverter> converter( + double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + nullptr, nullptr, 'e', -6, 12, 0, 0); + return converter.get(); +} + +// Converts a given (data, size) pair to a desired string type. For +// performance reasons, this dispatches to a different constructor if the +// passed-in data matches the string's value_type. +template <typename StringT> +StringT ToString(const typename StringT::value_type* data, size_t size) { + return StringT(data, size); +} + +template <typename StringT, typename CharT> +StringT ToString(const CharT* data, size_t size) { + return StringT(data, data + size); +} + +template <typename StringT> +StringT DoubleToStringT(double value) { + char buffer[32]; + double_conversion::StringBuilder builder(buffer, sizeof(buffer)); + GetDoubleToStringConverter()->ToShortest(value, &builder); + return ToString<StringT>(buffer, builder.position()); +} + +template <typename STRING, typename CHAR> +bool StringToDoubleImpl(STRING input, const CHAR* data, double& output) { + static NoDestructor<double_conversion::StringToDoubleConverter> converter( + double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | + double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, + 0.0, 0, nullptr, nullptr); + + int processed_characters_count; + output = converter->StringToDouble(data, input.size(), + &processed_characters_count); + + // Cases to return false: + // - If the input string is empty, there was nothing to parse. + // - If the value saturated to HUGE_VAL. + // - If the entire string was not processed, there are either characters + // remaining in the string after a parsed number, or the string does not + // begin with a parseable number. + // - If the first character is a space, there was leading whitespace + return !input.empty() && output != HUGE_VAL && output != -HUGE_VAL && + static_cast<size_t>(processed_characters_count) == input.size() && + !IsUnicodeWhitespace(input[0]); +} + +template <typename OutIter> +static bool HexStringToByteContainer(StringPiece input, OutIter output) { + size_t count = input.size(); + if (count == 0 || (count % 2) != 0) + return false; + for (uintptr_t i = 0; i < count / 2; ++i) { + // most significant 4 bits + Optional<uint8_t> msb = CharToDigit<16>(input[i * 2]); + // least significant 4 bits + Optional<uint8_t> lsb = CharToDigit<16>(input[i * 2 + 1]); + if (!msb || !lsb) { + return false; + } + *(output++) = (*msb << 4) | *lsb; + } + return true; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_
diff --git a/base/strings/string_number_conversions_win.cc b/base/strings/string_number_conversions_win.cc new file mode 100644 index 0000000..abae437 --- /dev/null +++ b/base/strings/string_number_conversions_win.cc
@@ -0,0 +1,79 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_number_conversions_win.h" + +#include <string> + +#include "base/strings/string_number_conversions_internal.h" +#include "base/strings/string_piece.h" + +namespace gurl_base { + +std::wstring NumberToWString(int value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(unsigned value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(unsigned long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(long long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(unsigned long long value) { + return internal::IntToStringT<std::wstring>(value); +} + +std::wstring NumberToWString(double value) { + return internal::DoubleToStringT<std::wstring>(value); +} + +#if defined(BASE_STRING16_IS_STD_U16STRING) +namespace internal { + +template <> +class WhitespaceHelper<wchar_t> { + public: + static bool Invoke(wchar_t c) { return 0 != iswspace(c); } +}; + +} // namespace internal + +bool StringToInt(WStringPiece input, int* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToUint(WStringPiece input, unsigned* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToInt64(WStringPiece input, int64_t* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToUint64(WStringPiece input, uint64_t* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToSizeT(WStringPiece input, size_t* output) { + return internal::StringToIntImpl(input, *output); +} + +bool StringToDouble(WStringPiece input, double* output) { + return internal::StringToDoubleImpl( + input, reinterpret_cast<const uint16_t*>(input.data()), *output); +} +#endif // defined(BASE_STRING16_IS_STD_U16STRING) + +} // namespace base
diff --git a/base/strings/string_number_conversions_win.h b/base/strings/string_number_conversions_win.h new file mode 100644 index 0000000..f8b645f --- /dev/null +++ b/base/strings/string_number_conversions_win.h
@@ -0,0 +1,40 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_ +#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_ + +#include <string> + +#include "polyfills/base/base_export.h" +#include "base/strings/string_piece.h" + +namespace gurl_base { + +BASE_EXPORT std::wstring NumberToWString(int value); +BASE_EXPORT std::wstring NumberToWString(unsigned int value); +BASE_EXPORT std::wstring NumberToWString(long value); +BASE_EXPORT std::wstring NumberToWString(unsigned long value); +BASE_EXPORT std::wstring NumberToWString(long long value); +BASE_EXPORT std::wstring NumberToWString(unsigned long long value); +BASE_EXPORT std::wstring NumberToWString(double value); + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring +// and gurl_base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT bool StringToInt(WStringPiece input, int* output); +BASE_EXPORT bool StringToUint(WStringPiece input, unsigned* output); +BASE_EXPORT bool StringToInt64(WStringPiece input, int64_t* output); +BASE_EXPORT bool StringToUint64(WStringPiece input, uint64_t* output); +BASE_EXPORT bool StringToSizeT(WStringPiece input, size_t* output); +BASE_EXPORT bool StringToDouble(WStringPiece input, double* output); +#endif // defined(BASE_STRING16_IS_STD_U16STRING) + +} // namespace base + +#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_WIN_H_
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc index 74f3335..ee043de 100644 --- a/base/strings/string_piece.cc +++ b/base/strings/string_piece.cc
@@ -6,11 +6,11 @@ #include "base/strings/string_piece.h" #include <limits.h> +#include <string.h> #include <algorithm> #include <ostream> -#include "polyfills/base/logging.h" #include "base/strings/utf_string_conversions.h" namespace gurl_base {
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h index 964ec67..d155496 100644 --- a/base/strings/string_piece.h +++ b/base/strings/string_piece.h
@@ -25,11 +25,12 @@ #include <stddef.h> #include <iosfwd> +#include <ostream> #include <string> #include <type_traits> #include "polyfills/base/base_export.h" -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "base/strings/char_traits.h" #include "base/strings/string16.h" #include "base/strings/string_piece_forward.h" @@ -148,6 +149,7 @@ public: // Standard STL container boilerplate. typedef size_t size_type; + typedef typename STRING_TYPE::traits_type traits_type; typedef typename STRING_TYPE::value_type value_type; typedef const value_type* pointer; typedef const value_type& reference; @@ -162,7 +164,7 @@ // We provide non-explicit singleton constructors so users can pass // in a "const char*" or a "string" wherever a "StringPiece" is // expected (likewise for char16, string16, StringPiece16). - constexpr BasicStringPiece() : ptr_(NULL), length_(0) {} + constexpr BasicStringPiece() : ptr_(nullptr), length_(0) {} // TODO(crbug.com/1049498): Construction from nullptr is not allowed for // std::basic_string_view, so remove the special handling for it. // Note: This doesn't just use STRING_TYPE::traits_type::length(), since that
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc index 3816501..40dedb7 100644 --- a/base/strings/string_split.cc +++ b/base/strings/string_split.cc
@@ -7,6 +7,7 @@ #include <stddef.h> #include "polyfills/base/logging.h" +#include "base/strings/string_split_internal.h" #include "base/strings/string_util.h" #include "base/third_party/icu/icu_utf.h" @@ -14,56 +15,6 @@ namespace { -// Returns either the ASCII or UTF-16 whitespace. -template<typename Str> BasicStringPiece<Str> WhitespaceForType(); -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -template <> -WStringPiece WhitespaceForType<std::wstring>() { - return kWhitespaceWide; -} -#endif - -template<> StringPiece16 WhitespaceForType<string16>() { - return kWhitespaceUTF16; -} -template<> StringPiece WhitespaceForType<std::string>() { - return kWhitespaceASCII; -} - -// General string splitter template. Can take 8- or 16-bit input, can produce -// the corresponding string or StringPiece output. -template <typename OutputStringType, typename Str> -static std::vector<OutputStringType> SplitStringT( - BasicStringPiece<Str> str, - BasicStringPiece<Str> delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - std::vector<OutputStringType> result; - if (str.empty()) - return result; - - size_t start = 0; - while (start != Str::npos) { - size_t end = str.find_first_of(delimiter, start); - - BasicStringPiece<Str> piece; - if (end == Str::npos) { - piece = str.substr(start); - start = Str::npos; - } else { - piece = str.substr(start, end - start); - start = end + 1; - } - - if (whitespace == TRIM_WHITESPACE) - piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); - - if (result_type == SPLIT_WANT_ALL || !piece.empty()) - result.emplace_back(piece); - } - return result; -} - bool AppendStringKeyValue(StringPiece input, char delimiter, StringPairs* result) { @@ -94,62 +45,38 @@ return true; } -template <typename OutputStringType, typename Str> -std::vector<OutputStringType> SplitStringUsingSubstrT( - BasicStringPiece<Str> input, - BasicStringPiece<Str> delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - using Piece = BasicStringPiece<Str>; - using size_type = typename Piece::size_type; - - std::vector<OutputStringType> result; - for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; - begin_index = end_index + delimiter.size()) { - end_index = input.find(delimiter, begin_index); - Piece term = end_index == Piece::npos - ? input.substr(begin_index) - : input.substr(begin_index, end_index - begin_index); - - if (whitespace == TRIM_WHITESPACE) - term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); - - if (result_type == SPLIT_WANT_ALL || !term.empty()) - result.emplace_back(term); - } - - return result; -} - } // namespace std::vector<std::string> SplitString(StringPiece input, StringPiece separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<std::string>(input, separators, whitespace, result_type); + return internal::SplitStringT<std::string>(input, separators, whitespace, + result_type); } std::vector<string16> SplitString(StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<string16>(input, separators, whitespace, result_type); + return internal::SplitStringT<string16>(input, separators, whitespace, + result_type); } std::vector<StringPiece> SplitStringPiece(StringPiece input, StringPiece separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<StringPiece>(input, separators, whitespace, result_type); + return internal::SplitStringT<StringPiece>(input, separators, whitespace, + result_type); } std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringT<StringPiece16>(input, separators, whitespace, - result_type); + return internal::SplitStringT<StringPiece16>(input, separators, whitespace, + result_type); } bool SplitStringIntoKeyValuePairs(StringPiece input, @@ -187,16 +114,16 @@ StringPiece16 delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<string16>(input, delimiter, + whitespace, result_type); } std::vector<std::string> SplitStringUsingSubstr(StringPiece input, StringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<std::string>( + input, delimiter, whitespace, result_type); } std::vector<StringPiece16> SplitStringPieceUsingSubstr( @@ -205,8 +132,8 @@ WhitespaceHandling whitespace, SplitResult result_type) { std::vector<StringPiece16> result; - return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<StringPiece16>( + input, delimiter, whitespace, result_type); } std::vector<StringPiece> SplitStringPieceUsingSubstr( @@ -214,41 +141,8 @@ StringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace, - result_type); + return internal::SplitStringUsingSubstrT<StringPiece>( + input, delimiter, whitespace, result_type); } -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -std::vector<std::wstring> SplitString(WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringT<std::wstring>(input, separators, whitespace, result_type); -} - -std::vector<WStringPiece> SplitStringPiece(WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringT<WStringPiece>(input, separators, whitespace, result_type); -} - -std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace, - result_type); -} - -std::vector<WStringPiece> SplitStringPieceUsingSubstr( - WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) { - return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace, - result_type); -} -#endif - } // namespace base
diff --git a/base/strings/string_split.h b/base/strings/string_split.h index 02c2c59..d9676c7 100644 --- a/base/strings/string_split.h +++ b/base/strings/string_split.h
@@ -138,32 +138,10 @@ WhitespaceHandling whitespace, SplitResult result_type) WARN_UNUSED_RESULT; -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) - WARN_UNUSED_RESULT; - -BASE_EXPORT std::vector<WStringPiece> SplitStringPiece( - WStringPiece input, - WStringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type) WARN_UNUSED_RESULT; - -BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr( - WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) WARN_UNUSED_RESULT; - -BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr( - WStringPiece input, - WStringPiece delimiter, - WhitespaceHandling whitespace, - SplitResult result_type) WARN_UNUSED_RESULT; -#endif - } // namespace base +#if defined(OS_WIN) +#include "base/strings/string_split_win.h" +#endif + #endif // BASE_STRINGS_STRING_SPLIT_H_
diff --git a/base/strings/string_split_internal.h b/base/strings/string_split_internal.h new file mode 100644 index 0000000..9dc3763 --- /dev/null +++ b/base/strings/string_split_internal.h
@@ -0,0 +1,100 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_SPLIT_INTERNAL_H_ +#define BASE_STRINGS_STRING_SPLIT_INTERNAL_H_ + +#include <vector> + +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" + +namespace gurl_base { + +namespace internal { + +// Returns either the ASCII or UTF-16 whitespace. +template <typename Str> +BasicStringPiece<Str> WhitespaceForType(); + +template <> +inline StringPiece16 WhitespaceForType<string16>() { + return kWhitespaceUTF16; +} +template <> +inline StringPiece WhitespaceForType<std::string>() { + return kWhitespaceASCII; +} + +// General string splitter template. Can take 8- or 16-bit input, can produce +// the corresponding string or StringPiece output. +template <typename OutputStringType, typename Str> +static std::vector<OutputStringType> SplitStringT( + BasicStringPiece<Str> str, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<OutputStringType> result; + if (str.empty()) + return result; + + size_t start = 0; + while (start != Str::npos) { + size_t end = str.find_first_of(delimiter, start); + + BasicStringPiece<Str> piece; + if (end == Str::npos) { + piece = str.substr(start); + start = Str::npos; + } else { + piece = str.substr(start, end - start); + start = end + 1; + } + + if (whitespace == TRIM_WHITESPACE) + piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !piece.empty()) + result.emplace_back(piece); + } + return result; +} + +template <typename OutputStringType, typename Str> +std::vector<OutputStringType> SplitStringUsingSubstrT( + BasicStringPiece<Str> input, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + using Piece = BasicStringPiece<Str>; + using size_type = typename Piece::size_type; + + std::vector<OutputStringType> result; + if (delimiter.size() == 0) { + result.emplace_back(input); + return result; + } + + for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; + begin_index = end_index + delimiter.size()) { + end_index = input.find(delimiter, begin_index); + Piece term = end_index == Piece::npos + ? input.substr(begin_index) + : input.substr(begin_index, end_index - begin_index); + + if (whitespace == TRIM_WHITESPACE) + term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !term.empty()) + result.emplace_back(term); + } + + return result; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRING_SPLIT_INTERNAL_H_
diff --git a/base/strings/string_split_unittest.cc b/base/strings/string_split_unittest.cc index f84d4b8..a3e13fa 100644 --- a/base/strings/string_split_unittest.cc +++ b/base/strings/string_split_unittest.cc
@@ -218,6 +218,13 @@ EXPECT_THAT(results, ElementsAre("")); } +TEST(SplitStringUsingSubstrTest, EmptyDelimiter) { + std::vector<std::string> results = SplitStringUsingSubstr( + "TEST", std::string(), TRIM_WHITESPACE, SPLIT_WANT_ALL); + ASSERT_EQ(1u, results.size()); + EXPECT_THAT(results, ElementsAre("TEST")); +} + TEST(StringUtilTest, SplitString_Basics) { std::vector<std::string> r;
diff --git a/base/strings/string_split_win.cc b/base/strings/string_split_win.cc new file mode 100644 index 0000000..297853c --- /dev/null +++ b/base/strings/string_split_win.cc
@@ -0,0 +1,59 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_split_win.h" + +#include <string> +#include <vector> + +#include "base/strings/string_piece.h" +#include "base/strings/string_split_internal.h" + +namespace gurl_base { + +#if defined(BASE_STRING16_IS_STD_U16STRING) +namespace internal { + +template <> +inline WStringPiece WhitespaceForType<std::wstring>() { + return kWhitespaceWide; +} + +} // namespace internal + +std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringT<std::wstring>(input, separators, whitespace, + result_type); +} + +std::vector<WStringPiece> SplitStringPiece(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringT<WStringPiece>(input, separators, whitespace, + result_type); +} + +std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringUsingSubstrT<std::wstring>( + input, delimiter, whitespace, result_type); +} + +std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return internal::SplitStringUsingSubstrT<WStringPiece>( + input, delimiter, whitespace, result_type); +} +#endif + +} // namespace base
diff --git a/base/strings/string_split_win.h b/base/strings/string_split_win.h new file mode 100644 index 0000000..080641c --- /dev/null +++ b/base/strings/string_split_win.h
@@ -0,0 +1,53 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_SPLIT_WIN_H_ +#define BASE_STRINGS_STRING_SPLIT_WIN_H_ + +#include <string> +#include <vector> + +#include "polyfills/base/base_export.h" +#include "base/compiler_specific.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_split.h" + +namespace gurl_base { + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring +// and gurl_base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPiece( + WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +#endif + +} // namespace base + +#endif // BASE_STRINGS_STRING_SPLIT_WIN_H_
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc index 742319a..c2e440f 100644 --- a/base/strings/string_util.cc +++ b/base/strings/string_util.cc
@@ -18,11 +18,13 @@ #include <algorithm> #include <limits> +#include <type_traits> #include <vector> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "base/no_destructor.h" #include "base/stl_util.h" +#include "base/strings/string_util_internal.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h" #include "base/third_party/icu/icu_utf.h" @@ -30,60 +32,6 @@ namespace gurl_base { -namespace { - -// Used by ReplaceStringPlaceholders to track the position in the string of -// replaced parameters. -struct ReplacementOffset { - ReplacementOffset(uintptr_t parameter, size_t offset) - : parameter(parameter), - offset(offset) {} - - // Index of the parameter. - uintptr_t parameter; - - // Starting position in the string. - size_t offset; -}; - -static bool CompareParameter(const ReplacementOffset& elem1, - const ReplacementOffset& elem2) { - return elem1.parameter < elem2.parameter; -} - -// Assuming that a pointer is the size of a "machine word", then -// uintptr_t is an integer type that is also a machine word. -using MachineWord = uintptr_t; - -inline bool IsMachineWordAligned(const void* pointer) { - return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1)); -} - -template <typename CharacterType> -struct NonASCIIMask; -template <> -struct NonASCIIMask<char> { - static constexpr MachineWord value() { - return static_cast<MachineWord>(0x8080808080808080ULL); - } -}; -template <> -struct NonASCIIMask<char16> { - static constexpr MachineWord value() { - return static_cast<MachineWord>(0xFF80FF80FF80FF80ULL); - } -}; -#if defined(WCHAR_T_IS_UTF32) -template <> -struct NonASCIIMask<wchar_t> { - static constexpr MachineWord value() { - return static_cast<MachineWord>(0xFFFFFF80FFFFFF80ULL); - } -}; -#endif // WCHAR_T_IS_UTF32 - -} // namespace - bool IsWprintfFormatPortable(const wchar_t* format) { for (const wchar_t* position = format; *position != '\0'; ++position) { if (*position == '%') { @@ -119,89 +67,38 @@ return true; } -namespace { - -template<typename StringType> -StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { - StringType ret; - ret.reserve(str.size()); - for (size_t i = 0; i < str.size(); i++) - ret.push_back(ToLowerASCII(str[i])); - return ret; -} - -template<typename StringType> -StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { - StringType ret; - ret.reserve(str.size()); - for (size_t i = 0; i < str.size(); i++) - ret.push_back(ToUpperASCII(str[i])); - return ret; -} - -} // namespace - std::string ToLowerASCII(StringPiece str) { - return ToLowerASCIIImpl<std::string>(str); + return internal::ToLowerASCIIImpl(str); } string16 ToLowerASCII(StringPiece16 str) { - return ToLowerASCIIImpl<string16>(str); + return internal::ToLowerASCIIImpl(str); } std::string ToUpperASCII(StringPiece str) { - return ToUpperASCIIImpl<std::string>(str); + return internal::ToUpperASCIIImpl(str); } string16 ToUpperASCII(StringPiece16 str) { - return ToUpperASCIIImpl<string16>(str); -} - -template<class StringType> -int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, - BasicStringPiece<StringType> b) { - // Find the first characters that aren't equal and compare them. If the end - // of one of the strings is found before a nonequal character, the lengths - // of the strings are compared. - size_t i = 0; - while (i < a.length() && i < b.length()) { - typename StringType::value_type lower_a = ToLowerASCII(a[i]); - typename StringType::value_type lower_b = ToLowerASCII(b[i]); - if (lower_a < lower_b) - return -1; - if (lower_a > lower_b) - return 1; - i++; - } - - // End of one string hit before finding a different character. Expect the - // common case to be "strings equal" at this point so check that first. - if (a.length() == b.length()) - return 0; - - if (a.length() < b.length()) - return -1; - return 1; + return internal::ToUpperASCIIImpl(str); } int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { - return CompareCaseInsensitiveASCIIT<std::string>(a, b); + return internal::CompareCaseInsensitiveASCIIT(a, b); } int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { - return CompareCaseInsensitiveASCIIT<string16>(a, b); + return internal::CompareCaseInsensitiveASCIIT(a, b); } bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { - if (a.length() != b.length()) - return false; - return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; + return a.size() == b.size() && + internal::CompareCaseInsensitiveASCIIT(a, b) == 0; } bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { - if (a.length() != b.length()) - return false; - return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0; + return a.size() == b.size() && + internal::CompareCaseInsensitiveASCIIT(a, b) == 0; } const std::string& EmptyString() { @@ -214,107 +111,56 @@ return *s16; } -template <class StringType> -bool ReplaceCharsT(const StringType& input, - BasicStringPiece<StringType> find_any_of_these, - BasicStringPiece<StringType> replace_with, - StringType* output); - -bool ReplaceChars(const string16& input, +bool ReplaceChars(StringPiece16 input, StringPiece16 replace_chars, StringPiece16 replace_with, string16* output) { - return ReplaceCharsT(input, replace_chars, replace_with, output); + return internal::ReplaceCharsT(input, replace_chars, replace_with, output); } -bool ReplaceChars(const std::string& input, +bool ReplaceChars(StringPiece input, StringPiece replace_chars, StringPiece replace_with, std::string* output) { - return ReplaceCharsT(input, replace_chars, replace_with, output); + return internal::ReplaceCharsT(input, replace_chars, replace_with, output); } -bool RemoveChars(const string16& input, +bool RemoveChars(StringPiece16 input, StringPiece16 remove_chars, string16* output) { - return ReplaceCharsT(input, remove_chars, StringPiece16(), output); + return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output); } -bool RemoveChars(const std::string& input, +bool RemoveChars(StringPiece input, StringPiece remove_chars, std::string* output) { - return ReplaceCharsT(input, remove_chars, StringPiece(), output); -} - -template <typename Str> -TrimPositions TrimStringT(BasicStringPiece<Str> input, - BasicStringPiece<Str> trim_chars, - TrimPositions positions, - Str* output) { - // Find the edges of leading/trailing whitespace as desired. Need to use - // a StringPiece version of input to be able to call find* on it with the - // StringPiece version of trim_chars (normally the trim_chars will be a - // constant so avoid making a copy). - const size_t last_char = input.length() - 1; - const size_t first_good_char = - (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; - const size_t last_good_char = (positions & TRIM_TRAILING) - ? input.find_last_not_of(trim_chars) - : last_char; - - // When the string was all trimmed, report that we stripped off characters - // from whichever position the caller was interested in. For empty input, we - // stripped no characters, but we still need to clear |output|. - if (input.empty() || first_good_char == Str::npos || - last_good_char == Str::npos) { - bool input_was_empty = input.empty(); // in case output == &input - output->clear(); - return input_was_empty ? TRIM_NONE : positions; - } - - // Trim. - output->assign(input.data() + first_good_char, - last_good_char - first_good_char + 1); - - // Return where we trimmed from. - return static_cast<TrimPositions>( - (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) | - (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); + return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output); } bool TrimString(StringPiece16 input, StringPiece16 trim_chars, string16* output) { - return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; + return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != + TRIM_NONE; } bool TrimString(StringPiece input, StringPiece trim_chars, std::string* output) { - return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; -} - -template<typename Str> -BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, - BasicStringPiece<Str> trim_chars, - TrimPositions positions) { - size_t begin = (positions & TRIM_LEADING) ? - input.find_first_not_of(trim_chars) : 0; - size_t end = (positions & TRIM_TRAILING) ? - input.find_last_not_of(trim_chars) + 1 : input.size(); - return input.substr(begin, end - begin); + return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != + TRIM_NONE; } StringPiece16 TrimString(StringPiece16 input, StringPiece16 trim_chars, TrimPositions positions) { - return TrimStringPieceT(input, trim_chars, positions); + return internal::TrimStringPieceT(input, trim_chars, positions); } StringPiece TrimString(StringPiece input, StringPiece trim_chars, TrimPositions positions) { - return TrimStringPieceT(input, trim_chars, positions); + return internal::TrimStringPieceT(input, trim_chars, positions); } void TruncateUTF8ToByteSize(const std::string& input, @@ -357,74 +203,36 @@ TrimPositions TrimWhitespace(StringPiece16 input, TrimPositions positions, string16* output) { - return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); + return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16), + positions, output); } StringPiece16 TrimWhitespace(StringPiece16 input, TrimPositions positions) { - return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); + return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), + positions); } TrimPositions TrimWhitespaceASCII(StringPiece input, TrimPositions positions, std::string* output) { - return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); + return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions, + output); } StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { - return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions); + return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII), + positions); } -template<typename STR> -STR CollapseWhitespaceT(const STR& text, - bool trim_sequences_with_line_breaks) { - STR result; - result.resize(text.size()); - - // Set flags to pretend we're already in a trimmed whitespace sequence, so we - // will trim any leading whitespace. - bool in_whitespace = true; - bool already_trimmed = true; - - int chars_written = 0; - for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { - if (IsUnicodeWhitespace(*i)) { - if (!in_whitespace) { - // Reduce all whitespace sequences to a single space. - in_whitespace = true; - result[chars_written++] = L' '; - } - if (trim_sequences_with_line_breaks && !already_trimmed && - ((*i == '\n') || (*i == '\r'))) { - // Whitespace sequences containing CR or LF are eliminated entirely. - already_trimmed = true; - --chars_written; - } - } else { - // Non-whitespace chracters are copied straight across. - in_whitespace = false; - already_trimmed = false; - result[chars_written++] = *i; - } - } - - if (in_whitespace && !already_trimmed) { - // Any trailing whitespace is eliminated. - --chars_written; - } - - result.resize(chars_written); - return result; -} - -string16 CollapseWhitespace(const string16& text, +string16 CollapseWhitespace(StringPiece16 text, bool trim_sequences_with_line_breaks) { - return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); + return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); } -std::string CollapseWhitespaceASCII(const std::string& text, +std::string CollapseWhitespaceASCII(StringPiece text, bool trim_sequences_with_line_breaks) { - return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); + return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); } bool ContainsOnlyChars(StringPiece input, StringPiece characters) { @@ -435,198 +243,63 @@ return input.find_first_not_of(characters) == StringPiece16::npos; } -template <class Char> -inline bool DoIsStringASCII(const Char* characters, size_t length) { - if (!length) - return true; - constexpr MachineWord non_ascii_bit_mask = NonASCIIMask<Char>::value(); - MachineWord all_char_bits = 0; - const Char* end = characters + length; - - // Prologue: align the input. - while (!IsMachineWordAligned(characters) && characters < end) - all_char_bits |= *characters++; - if (all_char_bits & non_ascii_bit_mask) - return false; - - // Compare the values of CPU word size. - constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char); - constexpr int batch_count = 16; - while (characters <= end - batch_count * chars_per_word) { - all_char_bits = 0; - for (int i = 0; i < batch_count; ++i) { - all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); - characters += chars_per_word; - } - if (all_char_bits & non_ascii_bit_mask) - return false; - } - - // Process the remaining words. - all_char_bits = 0; - while (characters <= end - chars_per_word) { - all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); - characters += chars_per_word; - } - - // Process the remaining bytes. - while (characters < end) - all_char_bits |= *characters++; - - return !(all_char_bits & non_ascii_bit_mask); -} bool IsStringASCII(StringPiece str) { - return DoIsStringASCII(str.data(), str.length()); + return internal::DoIsStringASCII(str.data(), str.length()); } bool IsStringASCII(StringPiece16 str) { - return DoIsStringASCII(str.data(), str.length()); + return internal::DoIsStringASCII(str.data(), str.length()); } #if defined(WCHAR_T_IS_UTF32) bool IsStringASCII(WStringPiece str) { - return DoIsStringASCII(str.data(), str.length()); + return internal::DoIsStringASCII(str.data(), str.length()); } #endif -template <bool (*Validator)(uint32_t)> -inline static bool DoIsStringUTF8(StringPiece str) { - const char* src = str.data(); - int32_t src_len = static_cast<int32_t>(str.length()); - int32_t char_index = 0; - - while (char_index < src_len) { - int32_t code_point; - CBU8_NEXT(src, char_index, src_len, code_point); - if (!Validator(code_point)) - return false; - } - return true; -} - bool IsStringUTF8(StringPiece str) { - return DoIsStringUTF8<IsValidCharacter>(str); + return internal::DoIsStringUTF8<IsValidCharacter>(str); } bool IsStringUTF8AllowingNoncharacters(StringPiece str) { - return DoIsStringUTF8<IsValidCodepoint>(str); -} - -// Implementation note: Normally this function will be called with a hardcoded -// constant for the lowercase_ascii parameter. Constructing a StringPiece from -// a C constant requires running strlen, so the result will be two passes -// through the buffers, one to file the length of lowercase_ascii, and one to -// compare each letter. -// -// This function could have taken a const char* to avoid this and only do one -// pass through the string. But the strlen is faster than the case-insensitive -// compares and lets us early-exit in the case that the strings are different -// lengths (will often be the case for non-matches). So whether one approach or -// the other will be faster depends on the case. -// -// The hardcoded strings are typically very short so it doesn't matter, and the -// string piece gives additional flexibility for the caller (doesn't have to be -// null terminated) so we choose the StringPiece route. -template<typename Str> -static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, - StringPiece lowercase_ascii) { - if (str.size() != lowercase_ascii.size()) - return false; - for (size_t i = 0; i < str.size(); i++) { - if (ToLowerASCII(str[i]) != lowercase_ascii[i]) - return false; - } - return true; + return internal::DoIsStringUTF8<IsValidCodepoint>(str); } bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { - return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); + return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii); } bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { - return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii); + return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii); } bool EqualsASCII(StringPiece16 str, StringPiece ascii) { - if (str.length() != ascii.length()) - return false; - return std::equal(ascii.begin(), ascii.end(), str.begin()); -} - -template<typename Str> -bool StartsWithT(BasicStringPiece<Str> str, - BasicStringPiece<Str> search_for, - CompareCase case_sensitivity) { - if (search_for.size() > str.size()) - return false; - - BasicStringPiece<Str> source = str.substr(0, search_for.size()); - - switch (case_sensitivity) { - case CompareCase::SENSITIVE: - return source == search_for; - - case CompareCase::INSENSITIVE_ASCII: - return std::equal( - search_for.begin(), search_for.end(), - source.begin(), - CaseInsensitiveCompareASCII<typename Str::value_type>()); - - default: - GURL_NOTREACHED(); - return false; - } + return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end()); } bool StartsWith(StringPiece str, StringPiece search_for, CompareCase case_sensitivity) { - return StartsWithT<std::string>(str, search_for, case_sensitivity); + return internal::StartsWithT(str, search_for, case_sensitivity); } bool StartsWith(StringPiece16 str, StringPiece16 search_for, CompareCase case_sensitivity) { - return StartsWithT<string16>(str, search_for, case_sensitivity); -} - -template <typename Str> -bool EndsWithT(BasicStringPiece<Str> str, - BasicStringPiece<Str> search_for, - CompareCase case_sensitivity) { - if (search_for.size() > str.size()) - return false; - - BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(), - search_for.size()); - - switch (case_sensitivity) { - case CompareCase::SENSITIVE: - return source == search_for; - - case CompareCase::INSENSITIVE_ASCII: - return std::equal( - source.begin(), source.end(), - search_for.begin(), - CaseInsensitiveCompareASCII<typename Str::value_type>()); - - default: - GURL_NOTREACHED(); - return false; - } + return internal::StartsWithT(str, search_for, case_sensitivity); } bool EndsWith(StringPiece str, StringPiece search_for, CompareCase case_sensitivity) { - return EndsWithT<std::string>(str, search_for, case_sensitivity); + return internal::EndsWithT(str, search_for, case_sensitivity); } bool EndsWith(StringPiece16 str, StringPiece16 search_for, CompareCase case_sensitivity) { - return EndsWithT<string16>(str, search_for, case_sensitivity); + return internal::EndsWithT(str, search_for, case_sensitivity); } char HexDigitToInt(wchar_t c) { @@ -680,384 +353,93 @@ return ASCIIToUTF16(buf); } -// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. -template <class StringType> -struct SubstringMatcher { - BasicStringPiece<StringType> find_this; - - size_t Find(const StringType& input, size_t pos) { - return input.find(find_this.data(), pos, find_this.length()); - } - size_t MatchSize() { return find_this.length(); } -}; - -// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. -template <class StringType> -struct CharacterMatcher { - BasicStringPiece<StringType> find_any_of_these; - - size_t Find(const StringType& input, size_t pos) { - return input.find_first_of(find_any_of_these.data(), pos, - find_any_of_these.length()); - } - constexpr size_t MatchSize() { return 1; } -}; - -enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; - -// Runs in O(n) time in the length of |str|, and transforms the string without -// reallocating when possible. Returns |true| if any matches were found. -// -// This is parameterized on a |Matcher| traits type, so that it can be the -// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). -template <class StringType, class Matcher> -bool DoReplaceMatchesAfterOffset(StringType* str, - size_t initial_offset, - Matcher matcher, - BasicStringPiece<StringType> replace_with, - ReplaceType replace_type) { - using CharTraits = typename StringType::traits_type; - - const size_t find_length = matcher.MatchSize(); - if (!find_length) - return false; - - // If the find string doesn't appear, there's nothing to do. - size_t first_match = matcher.Find(*str, initial_offset); - if (first_match == StringType::npos) - return false; - - // If we're only replacing one instance, there's no need to do anything - // complicated. - const size_t replace_length = replace_with.length(); - if (replace_type == ReplaceType::REPLACE_FIRST) { - str->replace(first_match, find_length, replace_with.data(), replace_length); - return true; - } - - // If the find and replace strings are the same length, we can simply use - // replace() on each instance, and finish the entire operation in O(n) time. - if (find_length == replace_length) { - auto* buffer = &((*str)[0]); - for (size_t offset = first_match; offset != StringType::npos; - offset = matcher.Find(*str, offset + replace_length)) { - CharTraits::copy(buffer + offset, replace_with.data(), replace_length); - } - return true; - } - - // Since the find and replace strings aren't the same length, a loop like the - // one above would be O(n^2) in the worst case, as replace() will shift the - // entire remaining string each time. We need to be more clever to keep things - // O(n). - // - // When the string is being shortened, it's possible to just shift the matches - // down in one pass while finding, and truncate the length at the end of the - // search. - // - // If the string is being lengthened, more work is required. The strategy used - // here is to make two find() passes through the string. The first pass counts - // the number of matches to determine the new size. The second pass will - // either construct the new string into a new buffer (if the existing buffer - // lacked capacity), or else -- if there is room -- create a region of scratch - // space after |first_match| by shifting the tail of the string to a higher - // index, and doing in-place moves from the tail to lower indices thereafter. - size_t str_length = str->length(); - size_t expansion = 0; - if (replace_length > find_length) { - // This operation lengthens the string; determine the new length by counting - // matches. - const size_t expansion_per_match = (replace_length - find_length); - size_t num_matches = 0; - for (size_t match = first_match; match != StringType::npos; - match = matcher.Find(*str, match + find_length)) { - expansion += expansion_per_match; - ++num_matches; - } - const size_t final_length = str_length + expansion; - - if (str->capacity() < final_length) { - // If we'd have to allocate a new buffer to grow the string, build the - // result directly into the new allocation via append(). - StringType src(str->get_allocator()); - str->swap(src); - str->reserve(final_length); - - size_t pos = 0; - for (size_t match = first_match;; match = matcher.Find(src, pos)) { - str->append(src, pos, match - pos); - str->append(replace_with.data(), replace_length); - pos = match + find_length; - - // A mid-loop test/break enables skipping the final Find() call; the - // number of matches is known, so don't search past the last one. - if (!--num_matches) - break; - } - - // Handle substring after the final match. - str->append(src, pos, str_length - pos); - return true; - } - - // Prepare for the copy/move loop below -- expand the string to its final - // size by shifting the data after the first match to the end of the resized - // string. - size_t shift_src = first_match + find_length; - size_t shift_dst = shift_src + expansion; - - // Big |expansion| factors (relative to |str_length|) require padding up to - // |shift_dst|. - if (shift_dst > str_length) - str->resize(shift_dst); - - str->replace(shift_dst, str_length - shift_src, *str, shift_src, - str_length - shift_src); - str_length = final_length; - } - - // We can alternate replacement and move operations. This won't overwrite the - // unsearched region of the string so long as |write_offset| <= |read_offset|; - // that condition is always satisfied because: - // - // (a) If the string is being shortened, |expansion| is zero and - // |write_offset| grows slower than |read_offset|. - // - // (b) If the string is being lengthened, |write_offset| grows faster than - // |read_offset|, but |expansion| is big enough so that |write_offset| - // will only catch up to |read_offset| at the point of the last match. - auto* buffer = &((*str)[0]); - size_t write_offset = first_match; - size_t read_offset = first_match + expansion; - do { - if (replace_length) { - CharTraits::copy(buffer + write_offset, replace_with.data(), - replace_length); - write_offset += replace_length; - } - read_offset += find_length; - - // min() clamps StringType::npos (the largest unsigned value) to str_length. - size_t match = std::min(matcher.Find(*str, read_offset), str_length); - - size_t length = match - read_offset; - if (length) { - CharTraits::move(buffer + write_offset, buffer + read_offset, length); - write_offset += length; - read_offset += length; - } - } while (read_offset < str_length); - - // If we're shortening the string, truncate it now. - str->resize(write_offset); - return true; -} - -template <class StringType> -bool ReplaceCharsT(const StringType& input, - BasicStringPiece<StringType> find_any_of_these, - BasicStringPiece<StringType> replace_with, - StringType* output) { - // Commonly, this is called with output and input being the same string; in - // that case, this assignment is inexpensive. - *output = input; - - return DoReplaceMatchesAfterOffset( - output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, - ReplaceType::REPLACE_ALL); -} - void ReplaceFirstSubstringAfterOffset(string16* str, size_t start_offset, StringPiece16 find_this, StringPiece16 replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<string16>{find_this}, - replace_with, ReplaceType::REPLACE_FIRST); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<string16>{find_this}, + replace_with, internal::ReplaceType::REPLACE_FIRST); } void ReplaceFirstSubstringAfterOffset(std::string* str, size_t start_offset, StringPiece find_this, StringPiece replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<std::string>{find_this}, - replace_with, ReplaceType::REPLACE_FIRST); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::string>{find_this}, + replace_with, internal::ReplaceType::REPLACE_FIRST); } void ReplaceSubstringsAfterOffset(string16* str, size_t start_offset, StringPiece16 find_this, StringPiece16 replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<string16>{find_this}, - replace_with, ReplaceType::REPLACE_ALL); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<string16>{find_this}, + replace_with, internal::ReplaceType::REPLACE_ALL); } void ReplaceSubstringsAfterOffset(std::string* str, size_t start_offset, StringPiece find_this, StringPiece replace_with) { - DoReplaceMatchesAfterOffset(str, start_offset, - SubstringMatcher<std::string>{find_this}, - replace_with, ReplaceType::REPLACE_ALL); -} - -template <class string_type> -inline typename string_type::value_type* WriteIntoT(string_type* str, - size_t length_with_null) { - GURL_DCHECK_GE(length_with_null, 1u); - str->reserve(length_with_null); - str->resize(length_with_null - 1); - return &((*str)[0]); + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::string>{find_this}, + replace_with, internal::ReplaceType::REPLACE_ALL); } char* WriteInto(std::string* str, size_t length_with_null) { - return WriteIntoT(str, length_with_null); + return internal::WriteIntoT(str, length_with_null); } char16* WriteInto(string16* str, size_t length_with_null) { - return WriteIntoT(str, length_with_null); + return internal::WriteIntoT(str, length_with_null); } -// Generic version for all JoinString overloads. |list_type| must be a sequence -// (std::vector or std::initializer_list) of strings/StringPieces (std::string, -// string16, StringPiece or StringPiece16). |string_type| is either std::string -// or string16. -template <typename list_type, typename string_type> -static string_type JoinStringT(const list_type& parts, - BasicStringPiece<string_type> sep) { - if (gurl_base::empty(parts)) - return string_type(); - - // Pre-allocate the eventual size of the string. Start with the size of all of - // the separators (note that this *assumes* parts.size() > 0). - size_t total_size = (parts.size() - 1) * sep.size(); - for (const auto& part : parts) - total_size += part.size(); - string_type result; - result.reserve(total_size); - - auto iter = parts.begin(); - GURL_DCHECK(iter != parts.end()); - result.append(iter->data(), iter->size()); - ++iter; - - for (; iter != parts.end(); ++iter) { - result.append(sep.data(), sep.size()); - result.append(iter->data(), iter->size()); - } - - // Sanity-check that we pre-allocated correctly. - GURL_DCHECK_EQ(total_size, result.size()); - - return result; +std::string JoinString(span<const std::string> parts, StringPiece separator) { + return internal::JoinStringT(parts, separator); } -std::string JoinString(const std::vector<std::string>& parts, - StringPiece separator) { - return JoinStringT(parts, separator); +string16 JoinString(span<const string16> parts, StringPiece16 separator) { + return internal::JoinStringT(parts, separator); } -string16 JoinString(const std::vector<string16>& parts, - StringPiece16 separator) { - return JoinStringT(parts, separator); +std::string JoinString(span<const StringPiece> parts, StringPiece separator) { + return internal::JoinStringT(parts, separator); } -std::string JoinString(const std::vector<StringPiece>& parts, - StringPiece separator) { - return JoinStringT(parts, separator); -} - -string16 JoinString(const std::vector<StringPiece16>& parts, - StringPiece16 separator) { - return JoinStringT(parts, separator); +string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator) { + return internal::JoinStringT(parts, separator); } std::string JoinString(std::initializer_list<StringPiece> parts, StringPiece separator) { - return JoinStringT(parts, separator); + return internal::JoinStringT(parts, separator); } string16 JoinString(std::initializer_list<StringPiece16> parts, StringPiece16 separator) { - return JoinStringT(parts, separator); + return internal::JoinStringT(parts, separator); } -template<class FormatStringType, class OutStringType> -OutStringType DoReplaceStringPlaceholders( - const FormatStringType& format_string, - const std::vector<OutStringType>& subst, - std::vector<size_t>* offsets) { - size_t substitutions = subst.size(); - GURL_DCHECK_LT(substitutions, 10U); - - size_t sub_length = 0; - for (const auto& cur : subst) - sub_length += cur.length(); - - OutStringType formatted; - formatted.reserve(format_string.length() + sub_length); - - std::vector<ReplacementOffset> r_offsets; - for (auto i = format_string.begin(); i != format_string.end(); ++i) { - if ('$' == *i) { - if (i + 1 != format_string.end()) { - ++i; - if ('$' == *i) { - while (i != format_string.end() && '$' == *i) { - formatted.push_back('$'); - ++i; - } - --i; - } else { - if (*i < '1' || *i > '9') { - GURL_DLOG(ERROR) << "Invalid placeholder: $" << *i; - continue; - } - uintptr_t index = *i - '1'; - if (offsets) { - ReplacementOffset r_offset(index, - static_cast<int>(formatted.size())); - r_offsets.insert( - std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset, - &CompareParameter), - r_offset); - } - if (index < substitutions) - formatted.append(subst.at(index)); - } - } - } else { - formatted.push_back(*i); - } - } - if (offsets) { - for (const auto& cur : r_offsets) - offsets->push_back(cur.offset); - } - return formatted; -} - -string16 ReplaceStringPlaceholders(const string16& format_string, +string16 ReplaceStringPlaceholders(StringPiece16 format_string, const std::vector<string16>& subst, std::vector<size_t>* offsets) { - return DoReplaceStringPlaceholders(format_string, subst, offsets); + return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } std::string ReplaceStringPlaceholders(StringPiece format_string, const std::vector<std::string>& subst, std::vector<size_t>* offsets) { - return DoReplaceStringPlaceholders(format_string, subst, offsets); + return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); } string16 ReplaceStringPlaceholders(const string16& format_string, const string16& a, size_t* offset) { std::vector<size_t> offsets; - std::vector<string16> subst; - subst.push_back(a); - string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); + string16 result = ReplaceStringPlaceholders(format_string, {a}, &offsets); GURL_DCHECK_EQ(1U, offsets.size()); if (offset) @@ -1065,65 +447,11 @@ return result; } -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) - -TrimPositions TrimWhitespace(WStringPiece input, - TrimPositions positions, - std::wstring* output) { - return TrimStringT(input, WStringPiece(kWhitespaceWide), positions, output); -} - -WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) { - return TrimStringPieceT(input, WStringPiece(kWhitespaceWide), positions); -} - -bool TrimString(WStringPiece input, - WStringPiece trim_chars, - std::wstring* output) { - return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; -} - -WStringPiece TrimString(WStringPiece input, - WStringPiece trim_chars, - TrimPositions positions) { - return TrimStringPieceT(input, trim_chars, positions); -} - -wchar_t* WriteInto(std::wstring* str, size_t length_with_null) { - return WriteIntoT(str, length_with_null); -} - -#endif - -// The following code is compatible with the OpenBSD lcpy interface. See: -// http://www.gratisoft.us/todd/papers/strlcpy.html -// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c - -namespace { - -template <typename CHAR> -size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { - for (size_t i = 0; i < dst_size; ++i) { - if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. - return i; - } - - // We were left off at dst_size. We over copied 1 byte. Null terminate. - if (dst_size != 0) - dst[dst_size - 1] = 0; - - // Count the rest of the |src|, and return it's length in characters. - while (src[dst_size]) ++dst_size; - return dst_size; -} - -} // namespace - size_t strlcpy(char* dst, const char* src, size_t dst_size) { - return lcpyT<char>(dst, src, dst_size); + return internal::lcpyT(dst, src, dst_size); } size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { - return lcpyT<wchar_t>(dst, src, dst_size); + return internal::lcpyT(dst, src, dst_size); } } // namespace base
diff --git a/base/strings/string_util.h b/base/strings/string_util.h index ed3118d..1445283 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h
@@ -14,10 +14,12 @@ #include <initializer_list> #include <string> +#include <type_traits> #include <vector> #include "polyfills/base/base_export.h" #include "base/compiler_specific.h" +#include "base/containers/span.h" #include "base/stl_util.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" // For implicit conversions. @@ -84,19 +86,17 @@ // ASCII-specific tolower. The standard library's tolower is locale sensitive, // so we don't want to use it here. -inline char ToLowerASCII(char c) { - return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; -} -inline char16 ToLowerASCII(char16 c) { +template <typename CharT, + typename = std::enable_if_t<std::is_integral<CharT>::value>> +CharT ToLowerASCII(CharT c) { return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; } // ASCII-specific toupper. The standard library's toupper is locale sensitive, // so we don't want to use it here. -inline char ToUpperASCII(char c) { - return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; -} -inline char16 ToUpperASCII(char16 c) { +template <typename CharT, + typename = std::enable_if_t<std::is_integral<CharT>::value>> +CharT ToUpperASCII(CharT c) { return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; } @@ -170,10 +170,10 @@ // Removes characters in |remove_chars| from anywhere in |input|. Returns true // if any characters were removed. |remove_chars| must be null-terminated. // NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool RemoveChars(const string16& input, +BASE_EXPORT bool RemoveChars(StringPiece16 input, StringPiece16 remove_chars, string16* output); -BASE_EXPORT bool RemoveChars(const std::string& input, +BASE_EXPORT bool RemoveChars(StringPiece input, StringPiece remove_chars, std::string* output); @@ -182,11 +182,11 @@ // the |replace_with| string. Returns true if any characters were replaced. // |replace_chars| must be null-terminated. // NOTE: Safe to use the same variable for both |input| and |output|. -BASE_EXPORT bool ReplaceChars(const string16& input, +BASE_EXPORT bool ReplaceChars(StringPiece16 input, StringPiece16 replace_chars, StringPiece16 replace_with, string16* output); -BASE_EXPORT bool ReplaceChars(const std::string& input, +BASE_EXPORT bool ReplaceChars(StringPiece input, StringPiece replace_chars, StringPiece replace_with, std::string* output); @@ -226,69 +226,6 @@ const size_t byte_size, std::string* output); -#if defined(WCHAR_T_IS_UTF16) -// Utility functions to access the underlying string buffer as a wide char -// pointer. -// -// Note: These functions violate strict aliasing when char16 and wchar_t are -// unrelated types. We thus pass -fno-strict-aliasing to the compiler on -// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2]. -// -// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244 -// [2] -// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949 -inline wchar_t* as_writable_wcstr(char16* str) { - return reinterpret_cast<wchar_t*>(str); -} - -inline wchar_t* as_writable_wcstr(string16& str) { - return reinterpret_cast<wchar_t*>(data(str)); -} - -inline const wchar_t* as_wcstr(const char16* str) { - return reinterpret_cast<const wchar_t*>(str); -} - -inline const wchar_t* as_wcstr(StringPiece16 str) { - return reinterpret_cast<const wchar_t*>(str.data()); -} - -// Utility functions to access the underlying string buffer as a char16 pointer. -inline char16* as_writable_u16cstr(wchar_t* str) { - return reinterpret_cast<char16*>(str); -} - -inline char16* as_writable_u16cstr(std::wstring& str) { - return reinterpret_cast<char16*>(data(str)); -} - -inline const char16* as_u16cstr(const wchar_t* str) { - return reinterpret_cast<const char16*>(str); -} - -inline const char16* as_u16cstr(WStringPiece str) { - return reinterpret_cast<const char16*>(str.data()); -} - -// Utility functions to convert between gurl_base::WStringPiece and -// gurl_base::StringPiece16. -inline WStringPiece AsWStringPiece(StringPiece16 str) { - return WStringPiece(as_wcstr(str.data()), str.size()); -} - -inline StringPiece16 AsStringPiece16(WStringPiece str) { - return StringPiece16(as_u16cstr(str.data()), str.size()); -} - -inline std::wstring AsWString(StringPiece16 str) { - return std::wstring(as_wcstr(str.data()), str.size()); -} - -inline string16 AsString16(WStringPiece str) { - return string16(as_u16cstr(str.data()), str.size()); -} -#endif // defined(WCHAR_T_IS_UTF16) - // Trims any whitespace from either end of the input string. // // The StringPiece versions return a substring referencing the input buffer. @@ -315,11 +252,10 @@ // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace // sequences containing a CR or LF are trimmed. // (3) All other whitespace sequences are converted to single spaces. -BASE_EXPORT string16 CollapseWhitespace( - const string16& text, - bool trim_sequences_with_line_breaks); +BASE_EXPORT string16 CollapseWhitespace(StringPiece16 text, + bool trim_sequences_with_line_breaks); BASE_EXPORT std::string CollapseWhitespaceASCII( - const std::string& text, + StringPiece text, bool trim_sequences_with_line_breaks); // Returns true if |input| is empty or contains only characters found in @@ -347,6 +283,7 @@ // does not leave early if it is not the case. BASE_EXPORT bool IsStringASCII(StringPiece str); BASE_EXPORT bool IsStringASCII(StringPiece16 str); + #if defined(WCHAR_T_IS_UTF32) BASE_EXPORT bool IsStringASCII(WStringPiece str); #endif @@ -488,8 +425,8 @@ BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); -// Joins a vector or list of strings into a single string, inserting |separator| -// (which may be empty) in between all elements. +// Joins a list of strings into a single string, inserting |separator| (which +// may be empty) in between all elements. // // Note this is inverse of SplitString()/SplitStringPiece() defined in // string_split.h. @@ -501,13 +438,13 @@ // copies of those strings are created until the final join operation. // // Use StrCat (in base/strings/strcat.h) if you don't need a separator. -BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts, +BASE_EXPORT std::string JoinString(span<const std::string> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, +BASE_EXPORT string16 JoinString(span<const string16> parts, StringPiece16 separator); -BASE_EXPORT std::string JoinString(const std::vector<StringPiece>& parts, +BASE_EXPORT std::string JoinString(span<const StringPiece> parts, StringPiece separator); -BASE_EXPORT string16 JoinString(const std::vector<StringPiece16>& parts, +BASE_EXPORT string16 JoinString(span<const StringPiece16> parts, StringPiece16 separator); // Explicit initializer_list overloads are required to break ambiguity when used // with a literal initializer list (otherwise the compiler would not be able to @@ -521,10 +458,10 @@ // Additionally, any number of consecutive '$' characters is replaced by that // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be // NULL. This only allows you to use up to nine replacements. -BASE_EXPORT string16 ReplaceStringPlaceholders( - const string16& format_string, - const std::vector<string16>& subst, - std::vector<size_t>* offsets); +BASE_EXPORT string16 +ReplaceStringPlaceholders(StringPiece16 format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); BASE_EXPORT std::string ReplaceStringPlaceholders( StringPiece format_string, @@ -536,25 +473,6 @@ const string16& a, size_t* offset); -#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) -BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input, - TrimPositions positions, - std::wstring* output); - -BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input, - TrimPositions positions); - -BASE_EXPORT bool TrimString(WStringPiece input, - WStringPiece trim_chars, - std::wstring* output); - -BASE_EXPORT WStringPiece TrimString(WStringPiece input, - WStringPiece trim_chars, - TrimPositions positions); - -BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); -#endif - } // namespace base #if defined(OS_WIN)
diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h new file mode 100644 index 0000000..ace0665 --- /dev/null +++ b/base/strings/string_util_internal.h
@@ -0,0 +1,625 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_INTERNAL_H_ +#define BASE_STRINGS_STRING_UTIL_INTERNAL_H_ + +#include "polyfills/base/logging.h" +#include "polyfills/base/notreached.h" +#include "base/strings/string_piece.h" +#include "base/third_party/icu/icu_utf.h" + +namespace gurl_base { + +namespace internal { + +// Used by ReplaceStringPlaceholders to track the position in the string of +// replaced parameters. +struct ReplacementOffset { + ReplacementOffset(uintptr_t parameter, size_t offset) + : parameter(parameter), offset(offset) {} + + // Index of the parameter. + uintptr_t parameter; + + // Starting position in the string. + size_t offset; +}; + +static bool CompareParameter(const ReplacementOffset& elem1, + const ReplacementOffset& elem2) { + return elem1.parameter < elem2.parameter; +} + +// Assuming that a pointer is the size of a "machine word", then +// uintptr_t is an integer type that is also a machine word. +using MachineWord = uintptr_t; + +inline bool IsMachineWordAligned(const void* pointer) { + return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1)); +} + +template <typename StringType> +StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToLowerASCII(str[i])); + return ret; +} + +template <typename StringType> +StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToUpperASCII(str[i])); + return ret; +} + +template <class StringType> +int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, + BasicStringPiece<StringType> b) { + // Find the first characters that aren't equal and compare them. If the end + // of one of the strings is found before a nonequal character, the lengths + // of the strings are compared. + size_t i = 0; + while (i < a.length() && i < b.length()) { + typename StringType::value_type lower_a = ToLowerASCII(a[i]); + typename StringType::value_type lower_b = ToLowerASCII(b[i]); + if (lower_a < lower_b) + return -1; + if (lower_a > lower_b) + return 1; + i++; + } + + // End of one string hit before finding a different character. Expect the + // common case to be "strings equal" at this point so check that first. + if (a.length() == b.length()) + return 0; + + if (a.length() < b.length()) + return -1; + return 1; +} + +template <typename Str> +TrimPositions TrimStringT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions, + Str* output) { + // Find the edges of leading/trailing whitespace as desired. Need to use + // a StringPiece version of input to be able to call find* on it with the + // StringPiece version of trim_chars (normally the trim_chars will be a + // constant so avoid making a copy). + const size_t last_char = input.length() - 1; + const size_t first_good_char = + (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; + const size_t last_good_char = (positions & TRIM_TRAILING) + ? input.find_last_not_of(trim_chars) + : last_char; + + // When the string was all trimmed, report that we stripped off characters + // from whichever position the caller was interested in. For empty input, we + // stripped no characters, but we still need to clear |output|. + if (input.empty() || first_good_char == Str::npos || + last_good_char == Str::npos) { + bool input_was_empty = input.empty(); // in case output == &input + output->clear(); + return input_was_empty ? TRIM_NONE : positions; + } + + // Trim. + output->assign(input.data() + first_good_char, + last_good_char - first_good_char + 1); + + // Return where we trimmed from. + return static_cast<TrimPositions>( + (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) | + (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); +} + +template <typename Str> +BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions) { + size_t begin = + (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; + size_t end = (positions & TRIM_TRAILING) + ? input.find_last_not_of(trim_chars) + 1 + : input.size(); + return input.substr(begin, end - begin); +} + +template <typename STR> +STR CollapseWhitespaceT(BasicStringPiece<STR> text, + bool trim_sequences_with_line_breaks) { + STR result; + result.resize(text.size()); + + // Set flags to pretend we're already in a trimmed whitespace sequence, so we + // will trim any leading whitespace. + bool in_whitespace = true; + bool already_trimmed = true; + + int chars_written = 0; + for (auto c : text) { + if (IsUnicodeWhitespace(c)) { + if (!in_whitespace) { + // Reduce all whitespace sequences to a single space. + in_whitespace = true; + result[chars_written++] = L' '; + } + if (trim_sequences_with_line_breaks && !already_trimmed && + ((c == '\n') || (c == '\r'))) { + // Whitespace sequences containing CR or LF are eliminated entirely. + already_trimmed = true; + --chars_written; + } + } else { + // Non-whitespace characters are copied straight across. + in_whitespace = false; + already_trimmed = false; + result[chars_written++] = c; + } + } + + if (in_whitespace && !already_trimmed) { + // Any trailing whitespace is eliminated. + --chars_written; + } + + result.resize(chars_written); + return result; +} + +template <class Char> +bool DoIsStringASCII(const Char* characters, size_t length) { + // Bitmasks to detect non ASCII characters for character sizes of 8, 16 and 32 + // bits. + constexpr MachineWord NonASCIIMasks[] = { + 0, MachineWord(0x8080808080808080ULL), MachineWord(0xFF80FF80FF80FF80ULL), + 0, MachineWord(0xFFFFFF80FFFFFF80ULL), + }; + + if (!length) + return true; + constexpr MachineWord non_ascii_bit_mask = NonASCIIMasks[sizeof(Char)]; + static_assert(non_ascii_bit_mask, "Error: Invalid Mask"); + MachineWord all_char_bits = 0; + const Char* end = characters + length; + + // Prologue: align the input. + while (!IsMachineWordAligned(characters) && characters < end) + all_char_bits |= *characters++; + if (all_char_bits & non_ascii_bit_mask) + return false; + + // Compare the values of CPU word size. + constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char); + constexpr int batch_count = 16; + while (characters <= end - batch_count * chars_per_word) { + all_char_bits = 0; + for (int i = 0; i < batch_count; ++i) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += chars_per_word; + } + if (all_char_bits & non_ascii_bit_mask) + return false; + } + + // Process the remaining words. + all_char_bits = 0; + while (characters <= end - chars_per_word) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += chars_per_word; + } + + // Process the remaining bytes. + while (characters < end) + all_char_bits |= *characters++; + + return !(all_char_bits & non_ascii_bit_mask); +} + +template <bool (*Validator)(uint32_t)> +inline static bool DoIsStringUTF8(StringPiece str) { + const char* src = str.data(); + int32_t src_len = static_cast<int32_t>(str.length()); + int32_t char_index = 0; + + while (char_index < src_len) { + int32_t code_point; + CBU8_NEXT(src, char_index, src_len, code_point); + if (!Validator(code_point)) + return false; + } + return true; +} + +// Implementation note: Normally this function will be called with a hardcoded +// constant for the lowercase_ascii parameter. Constructing a StringPiece from +// a C constant requires running strlen, so the result will be two passes +// through the buffers, one to file the length of lowercase_ascii, and one to +// compare each letter. +// +// This function could have taken a const char* to avoid this and only do one +// pass through the string. But the strlen is faster than the case-insensitive +// compares and lets us early-exit in the case that the strings are different +// lengths (will often be the case for non-matches). So whether one approach or +// the other will be faster depends on the case. +// +// The hardcoded strings are typically very short so it doesn't matter, and the +// string piece gives additional flexibility for the caller (doesn't have to be +// null terminated) so we choose the StringPiece route. +template <typename Str> +static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, + StringPiece lowercase_ascii) { + return std::equal( + str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(), + [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; }); +} + +template <typename Str> +bool StartsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = str.substr(0, search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + search_for.begin(), search_for.end(), source.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + GURL_NOTREACHED(); + return false; + } +} + +template <typename Str> +bool EndsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = + str.substr(str.size() - search_for.size(), search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + source.begin(), source.end(), search_for.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + GURL_NOTREACHED(); + return false; + } +} + +// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. +template <class StringType> +struct SubstringMatcher { + BasicStringPiece<StringType> find_this; + + size_t Find(const StringType& input, size_t pos) { + return input.find(find_this.data(), pos, find_this.length()); + } + size_t MatchSize() { return find_this.length(); } +}; + +// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. +template <class StringType> +struct CharacterMatcher { + BasicStringPiece<StringType> find_any_of_these; + + size_t Find(const StringType& input, size_t pos) { + return input.find_first_of(find_any_of_these.data(), pos, + find_any_of_these.length()); + } + constexpr size_t MatchSize() { return 1; } +}; + +enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; + +// Runs in O(n) time in the length of |str|, and transforms the string without +// reallocating when possible. Returns |true| if any matches were found. +// +// This is parameterized on a |Matcher| traits type, so that it can be the +// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). +template <class StringType, class Matcher> +bool DoReplaceMatchesAfterOffset(StringType* str, + size_t initial_offset, + Matcher matcher, + BasicStringPiece<StringType> replace_with, + ReplaceType replace_type) { + using CharTraits = typename StringType::traits_type; + + const size_t find_length = matcher.MatchSize(); + if (!find_length) + return false; + + // If the find string doesn't appear, there's nothing to do. + size_t first_match = matcher.Find(*str, initial_offset); + if (first_match == StringType::npos) + return false; + + // If we're only replacing one instance, there's no need to do anything + // complicated. + const size_t replace_length = replace_with.length(); + if (replace_type == ReplaceType::REPLACE_FIRST) { + str->replace(first_match, find_length, replace_with.data(), replace_length); + return true; + } + + // If the find and replace strings are the same length, we can simply use + // replace() on each instance, and finish the entire operation in O(n) time. + if (find_length == replace_length) { + auto* buffer = &((*str)[0]); + for (size_t offset = first_match; offset != StringType::npos; + offset = matcher.Find(*str, offset + replace_length)) { + CharTraits::copy(buffer + offset, replace_with.data(), replace_length); + } + return true; + } + + // Since the find and replace strings aren't the same length, a loop like the + // one above would be O(n^2) in the worst case, as replace() will shift the + // entire remaining string each time. We need to be more clever to keep things + // O(n). + // + // When the string is being shortened, it's possible to just shift the matches + // down in one pass while finding, and truncate the length at the end of the + // search. + // + // If the string is being lengthened, more work is required. The strategy used + // here is to make two find() passes through the string. The first pass counts + // the number of matches to determine the new size. The second pass will + // either construct the new string into a new buffer (if the existing buffer + // lacked capacity), or else -- if there is room -- create a region of scratch + // space after |first_match| by shifting the tail of the string to a higher + // index, and doing in-place moves from the tail to lower indices thereafter. + size_t str_length = str->length(); + size_t expansion = 0; + if (replace_length > find_length) { + // This operation lengthens the string; determine the new length by counting + // matches. + const size_t expansion_per_match = (replace_length - find_length); + size_t num_matches = 0; + for (size_t match = first_match; match != StringType::npos; + match = matcher.Find(*str, match + find_length)) { + expansion += expansion_per_match; + ++num_matches; + } + const size_t final_length = str_length + expansion; + + if (str->capacity() < final_length) { + // If we'd have to allocate a new buffer to grow the string, build the + // result directly into the new allocation via append(). + StringType src(str->get_allocator()); + str->swap(src); + str->reserve(final_length); + + size_t pos = 0; + for (size_t match = first_match;; match = matcher.Find(src, pos)) { + str->append(src, pos, match - pos); + str->append(replace_with.data(), replace_length); + pos = match + find_length; + + // A mid-loop test/break enables skipping the final Find() call; the + // number of matches is known, so don't search past the last one. + if (!--num_matches) + break; + } + + // Handle substring after the final match. + str->append(src, pos, str_length - pos); + return true; + } + + // Prepare for the copy/move loop below -- expand the string to its final + // size by shifting the data after the first match to the end of the resized + // string. + size_t shift_src = first_match + find_length; + size_t shift_dst = shift_src + expansion; + + // Big |expansion| factors (relative to |str_length|) require padding up to + // |shift_dst|. + if (shift_dst > str_length) + str->resize(shift_dst); + + str->replace(shift_dst, str_length - shift_src, *str, shift_src, + str_length - shift_src); + str_length = final_length; + } + + // We can alternate replacement and move operations. This won't overwrite the + // unsearched region of the string so long as |write_offset| <= |read_offset|; + // that condition is always satisfied because: + // + // (a) If the string is being shortened, |expansion| is zero and + // |write_offset| grows slower than |read_offset|. + // + // (b) If the string is being lengthened, |write_offset| grows faster than + // |read_offset|, but |expansion| is big enough so that |write_offset| + // will only catch up to |read_offset| at the point of the last match. + auto* buffer = &((*str)[0]); + size_t write_offset = first_match; + size_t read_offset = first_match + expansion; + do { + if (replace_length) { + CharTraits::copy(buffer + write_offset, replace_with.data(), + replace_length); + write_offset += replace_length; + } + read_offset += find_length; + + // min() clamps StringType::npos (the largest unsigned value) to str_length. + size_t match = std::min(matcher.Find(*str, read_offset), str_length); + + size_t length = match - read_offset; + if (length) { + CharTraits::move(buffer + write_offset, buffer + read_offset, length); + write_offset += length; + read_offset += length; + } + } while (read_offset < str_length); + + // If we're shortening the string, truncate it now. + str->resize(write_offset); + return true; +} + +template <class StringType> +bool ReplaceCharsT(BasicStringPiece<StringType> input, + BasicStringPiece<StringType> find_any_of_these, + BasicStringPiece<StringType> replace_with, + StringType* output) { + // Commonly, this is called with output and input being the same string; in + // that case, skip the copy. + if (input.data() != output->data() || input.size() != output->size()) + output->assign(input.data(), input.size()); + + return DoReplaceMatchesAfterOffset( + output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, + ReplaceType::REPLACE_ALL); +} + +template <class string_type> +inline typename string_type::value_type* WriteIntoT(string_type* str, + size_t length_with_null) { + GURL_DCHECK_GE(length_with_null, 1u); + str->reserve(length_with_null); + str->resize(length_with_null - 1); + return &((*str)[0]); +} + +// Generic version for all JoinString overloads. |list_type| must be a sequence +// (gurl_base::span or std::initializer_list) of strings/StringPieces (std::string, +// string16, StringPiece or StringPiece16). |string_type| is either std::string +// or string16. +template <typename list_type, typename string_type> +static string_type JoinStringT(list_type parts, + BasicStringPiece<string_type> sep) { + if (gurl_base::empty(parts)) + return string_type(); + + // Pre-allocate the eventual size of the string. Start with the size of all of + // the separators (note that this *assumes* parts.size() > 0). + size_t total_size = (parts.size() - 1) * sep.size(); + for (const auto& part : parts) + total_size += part.size(); + string_type result; + result.reserve(total_size); + + auto iter = parts.begin(); + GURL_DCHECK(iter != parts.end()); + result.append(iter->data(), iter->size()); + ++iter; + + for (; iter != parts.end(); ++iter) { + result.append(sep.data(), sep.size()); + result.append(iter->data(), iter->size()); + } + + // Sanity-check that we pre-allocated correctly. + GURL_DCHECK_EQ(total_size, result.size()); + + return result; +} + +template <class StringType> +StringType DoReplaceStringPlaceholders( + BasicStringPiece<StringType> format_string, + const std::vector<StringType>& subst, + std::vector<size_t>* offsets) { + size_t substitutions = subst.size(); + GURL_DCHECK_LT(substitutions, 10U); + + size_t sub_length = 0; + for (const auto& cur : subst) + sub_length += cur.length(); + + StringType formatted; + formatted.reserve(format_string.length() + sub_length); + + std::vector<ReplacementOffset> r_offsets; + for (auto i = format_string.begin(); i != format_string.end(); ++i) { + if ('$' == *i) { + if (i + 1 != format_string.end()) { + ++i; + if ('$' == *i) { + while (i != format_string.end() && '$' == *i) { + formatted.push_back('$'); + ++i; + } + --i; + } else { + if (*i < '1' || *i > '9') { + GURL_DLOG(ERROR) << "Invalid placeholder: $" << *i; + continue; + } + uintptr_t index = *i - '1'; + if (offsets) { + ReplacementOffset r_offset(index, + static_cast<int>(formatted.size())); + r_offsets.insert( + std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset, + &CompareParameter), + r_offset); + } + if (index < substitutions) + formatted.append(subst.at(index)); + } + } + } else { + formatted.push_back(*i); + } + } + if (offsets) { + for (const auto& cur : r_offsets) + offsets->push_back(cur.offset); + } + return formatted; +} + +// The following code is compatible with the OpenBSD lcpy interface. See: +// http://www.gratisoft.us/todd/papers/strlcpy.html +// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c + +template <typename CHAR> +size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) + dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) + ++dst_size; + return dst_size; +} + +} // namespace internal + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_INTERNAL_H_
diff --git a/base/strings/string_util_posix.h b/base/strings/string_util_posix.h index e1ba7c3..457e258 100644 --- a/base/strings/string_util_posix.h +++ b/base/strings/string_util_posix.h
@@ -11,7 +11,7 @@ #include <string.h> #include <wchar.h> -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" namespace gurl_base {
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc index a2aab42..f73b895 100644 --- a/base/strings/string_util_unittest.cc +++ b/base/strings/string_util_unittest.cc
@@ -12,6 +12,7 @@ #include <algorithm> #include <type_traits> +#include "base/bits.h" #include "base/stl_util.h" #include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" @@ -678,28 +679,28 @@ int64_t bytes; const char* expected; } cases[] = { - // Expected behavior: we show one post-decimal digit when we have - // under two pre-decimal digits, except in cases where it makes no - // sense (zero or bytes). - // Since we switch units once we cross the 1000 mark, this keeps - // the display of file sizes or bytes consistently around three - // digits. - {0, "0 B"}, - {512, "512 B"}, - {1024*1024, "1.0 MB"}, - {1024*1024*1024, "1.0 GB"}, - {10LL*1024*1024*1024, "10.0 GB"}, - {99LL*1024*1024*1024, "99.0 GB"}, - {105LL*1024*1024*1024, "105 GB"}, - {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, - {~(1LL << 63), "8192 PB"}, + // Expected behavior: we show one post-decimal digit when we have + // under two pre-decimal digits, except in cases where it makes no + // sense (zero or bytes). + // Since we switch units once we cross the 1000 mark, this keeps + // the display of file sizes or bytes consistently around three + // digits. + {0, "0 B"}, + {512, "512 B"}, + {1024 * 1024, "1.0 MB"}, + {1024 * 1024 * 1024, "1.0 GB"}, + {10LL * 1024 * 1024 * 1024, "10.0 GB"}, + {99LL * 1024 * 1024 * 1024, "99.0 GB"}, + {105LL * 1024 * 1024 * 1024, "105 GB"}, + {105LL * 1024 * 1024 * 1024 + 500LL * 1024 * 1024, "105 GB"}, + {~(bits::LeftmostBit<int64_t>()), "8192 PB"}, - {99*1024 + 103, "99.1 kB"}, - {1024*1024 + 103, "1.0 MB"}, - {1024*1024 + 205 * 1024, "1.2 MB"}, - {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, - {10LL*1024*1024*1024, "10.0 GB"}, - {100LL*1024*1024*1024, "100 GB"}, + {99 * 1024 + 103, "99.1 kB"}, + {1024 * 1024 + 103, "1.0 MB"}, + {1024 * 1024 + 205 * 1024, "1.2 MB"}, + {1024 * 1024 * 1024 + (927 * 1024 * 1024), "1.9 GB"}, + {10LL * 1024 * 1024 * 1024, "10.0 GB"}, + {100LL * 1024 * 1024 * 1024, "100 GB"}, }; for (const auto& i : cases) {
diff --git a/base/strings/string_util_win.cc b/base/strings/string_util_win.cc new file mode 100644 index 0000000..1a98101 --- /dev/null +++ b/base/strings/string_util_win.cc
@@ -0,0 +1,145 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_util_win.h" + +#include "base/strings/string_util_internal.h" + +namespace gurl_base { + +#if defined(BASE_STRING16_IS_STD_U16STRING) +bool IsStringASCII(WStringPiece str) { + return internal::DoIsStringASCII(str.data(), str.length()); +} + +std::wstring ToLowerASCII(WStringPiece str) { + return internal::ToLowerASCIIImpl(str); +} + +std::wstring ToUpperASCII(WStringPiece str) { + return internal::ToUpperASCIIImpl(str); +} + +int CompareCaseInsensitiveASCII(WStringPiece a, WStringPiece b) { + return internal::CompareCaseInsensitiveASCIIT(a, b); +} + +bool EqualsCaseInsensitiveASCII(WStringPiece a, WStringPiece b) { + return a.size() == b.size() && + internal::CompareCaseInsensitiveASCIIT(a, b) == 0; +} + +bool RemoveChars(WStringPiece input, + WStringPiece remove_chars, + std::wstring* output) { + return internal::ReplaceCharsT(input, remove_chars, WStringPiece(), output); +} + +bool ReplaceChars(WStringPiece input, + WStringPiece replace_chars, + WStringPiece replace_with, + std::wstring* output) { + return internal::ReplaceCharsT(input, replace_chars, replace_with, output); +} + +bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::wstring* output) { + return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) != + TRIM_NONE; +} + +WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions) { + return internal::TrimStringPieceT(input, trim_chars, positions); +} + +TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output) { + return internal::TrimStringT(input, WStringPiece(kWhitespaceWide), positions, + output); +} + +WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) { + return internal::TrimStringPieceT(input, WStringPiece(kWhitespaceWide), + positions); +} + +std::wstring CollapseWhitespace(WStringPiece text, + bool trim_sequences_with_line_breaks) { + return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks); +} + +bool ContainsOnlyChars(WStringPiece input, WStringPiece characters) { + return input.find_first_not_of(characters) == StringPiece::npos; +} + +bool LowerCaseEqualsASCII(WStringPiece str, StringPiece lowercase_ascii) { + return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii); +} + +bool EqualsASCII(WStringPiece str, StringPiece ascii) { + return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end()); +} + +bool StartsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity) { + return internal::StartsWithT(str, search_for, case_sensitivity); +} + +bool EndsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity) { + return internal::EndsWithT(str, search_for, case_sensitivity); +} + +void ReplaceFirstSubstringAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with) { + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::wstring>{find_this}, + replace_with, internal::ReplaceType::REPLACE_FIRST); +} + +void ReplaceSubstringsAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with) { + internal::DoReplaceMatchesAfterOffset( + str, start_offset, internal::SubstringMatcher<std::wstring>{find_this}, + replace_with, internal::ReplaceType::REPLACE_ALL); +} + +wchar_t* WriteInto(std::wstring* str, size_t length_with_null) { + return internal::WriteIntoT(str, length_with_null); +} + +std::wstring JoinString(span<const std::wstring> parts, + WStringPiece separator) { + return internal::JoinStringT(parts, separator); +} + +std::wstring JoinString(span<const WStringPiece> parts, + WStringPiece separator) { + return internal::JoinStringT(parts, separator); +} + +std::wstring JoinString(std::initializer_list<WStringPiece> parts, + WStringPiece separator) { + return internal::JoinStringT(parts, separator); +} + +std::wstring ReplaceStringPlaceholders(WStringPiece format_string, + const std::vector<std::wstring>& subst, + std::vector<size_t>* offsets) { + return internal::DoReplaceStringPlaceholders(format_string, subst, offsets); +} + +#endif + +} // namespace base
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h index 710d574..2765748 100644 --- a/base/strings/string_util_win.h +++ b/base/strings/string_util_win.h
@@ -11,7 +11,14 @@ #include <string.h> #include <wchar.h> -#include "polyfills/base/logging.h" +#include <string> +#include <vector> + +#include "polyfills/base/check.h" +#include "base/containers/span.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" namespace gurl_base { @@ -39,6 +46,154 @@ return length; } +// Utility functions to access the underlying string buffer as a wide char +// pointer. +// +// Note: These functions violate strict aliasing when char16 and wchar_t are +// unrelated types. We thus pass -fno-strict-aliasing to the compiler on +// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2]. +// +// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244 +// [2] +// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949 +inline wchar_t* as_writable_wcstr(char16* str) { + return reinterpret_cast<wchar_t*>(str); +} + +inline wchar_t* as_writable_wcstr(string16& str) { + return reinterpret_cast<wchar_t*>(data(str)); +} + +inline const wchar_t* as_wcstr(const char16* str) { + return reinterpret_cast<const wchar_t*>(str); +} + +inline const wchar_t* as_wcstr(StringPiece16 str) { + return reinterpret_cast<const wchar_t*>(str.data()); +} + +// Utility functions to access the underlying string buffer as a char16 pointer. +inline char16* as_writable_u16cstr(wchar_t* str) { + return reinterpret_cast<char16*>(str); +} + +inline char16* as_writable_u16cstr(std::wstring& str) { + return reinterpret_cast<char16*>(data(str)); +} + +inline const char16* as_u16cstr(const wchar_t* str) { + return reinterpret_cast<const char16*>(str); +} + +inline const char16* as_u16cstr(WStringPiece str) { + return reinterpret_cast<const char16*>(str.data()); +} + +// Utility functions to convert between gurl_base::WStringPiece and +// gurl_base::StringPiece16. +inline WStringPiece AsWStringPiece(StringPiece16 str) { + return WStringPiece(as_wcstr(str.data()), str.size()); +} + +inline StringPiece16 AsStringPiece16(WStringPiece str) { + return StringPiece16(as_u16cstr(str.data()), str.size()); +} + +inline std::wstring AsWString(StringPiece16 str) { + return std::wstring(as_wcstr(str.data()), str.size()); +} + +inline string16 AsString16(WStringPiece str) { + return string16(as_u16cstr(str.data()), str.size()); +} + +// The following section contains overloads of the cross-platform APIs for +// std::wstring and gurl_base::WStringPiece. These are only enabled if std::wstring +// and gurl_base::string16 are distinct types, as otherwise this would result in an +// ODR violation. +// TODO(crbug.com/911896): Remove those guards once gurl_base::string16 is +// std::u16string. +#if defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT bool IsStringASCII(WStringPiece str); + +BASE_EXPORT std::wstring ToLowerASCII(WStringPiece str); + +BASE_EXPORT std::wstring ToUpperASCII(WStringPiece str); + +BASE_EXPORT int CompareCaseInsensitiveASCII(WStringPiece a, WStringPiece b); + +BASE_EXPORT bool EqualsCaseInsensitiveASCII(WStringPiece a, WStringPiece b); + +BASE_EXPORT bool RemoveChars(WStringPiece input, + WStringPiece remove_chars, + std::wstring* output); + +BASE_EXPORT bool ReplaceChars(WStringPiece input, + WStringPiece replace_chars, + WStringPiece replace_with, + std::wstring* output); + +BASE_EXPORT bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::string* output); + +BASE_EXPORT WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions); + +BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output); + +BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input, + TrimPositions positions); + +BASE_EXPORT std::wstring CollapseWhitespace( + WStringPiece text, + bool trim_sequences_with_line_breaks); + +BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters); + +BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str, + StringPiece lowecase_ascii); + +BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii); + +BASE_EXPORT bool StartsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity); + +BASE_EXPORT bool EndsWith(WStringPiece str, + WStringPiece search_for, + CompareCase case_sensitivity); + +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with); + +BASE_EXPORT void ReplaceSubstringsAfterOffset(std::wstring* str, + size_t start_offset, + WStringPiece find_this, + WStringPiece replace_with); + +BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); + +BASE_EXPORT std::wstring JoinString(span<const std::wstring> parts, + WStringPiece separator); + +BASE_EXPORT std::wstring JoinString(span<const WStringPiece> parts, + WStringPiece separator); + +BASE_EXPORT std::wstring JoinString(std::initializer_list<WStringPiece> parts, + WStringPiece separator); + +BASE_EXPORT std::wstring ReplaceStringPlaceholders( + WStringPiece format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); +#endif + } // namespace base #endif // BASE_STRINGS_STRING_UTIL_WIN_H_
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc index cc58087..e1a18c9 100644 --- a/base/strings/stringprintf.cc +++ b/base/strings/stringprintf.cc
@@ -9,6 +9,7 @@ #include <vector> +#include "polyfills/base/logging.h" #include "base/scoped_clear_last_error.h" #include "base/stl_util.h" #include "base/strings/string_util.h" @@ -62,7 +63,7 @@ va_list ap_copy; va_copy(ap_copy, ap); - gurl_base::internal::ScopedClearLastError last_error; + gurl_base::ScopedClearLastError last_error; int result = vsnprintfT(stack_buf, gurl_base::size(stack_buf), format, ap_copy); va_end(ap_copy);
diff --git a/base/strings/sys_string_conversions.h b/base/strings/sys_string_conversions.h index 7c3c575..a2b4cce 100644 --- a/base/strings/sys_string_conversions.h +++ b/base/strings/sys_string_conversions.h
@@ -20,6 +20,9 @@ #if defined(OS_MACOSX) #include <CoreFoundation/CoreFoundation.h> + +#include "base/mac/scoped_cftyperef.h" + #ifdef __OBJC__ @class NSString; #else @@ -66,10 +69,10 @@ // Creates a string, and returns it with a refcount of 1. You are responsible // for releasing it. Returns NULL on failure. -BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8) +BASE_EXPORT ScopedCFTypeRef<CFStringRef> SysUTF8ToCFStringRef(StringPiece utf8) WARN_UNUSED_RESULT; -BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16) - WARN_UNUSED_RESULT; +BASE_EXPORT ScopedCFTypeRef<CFStringRef> SysUTF16ToCFStringRef( + StringPiece16 utf16) WARN_UNUSED_RESULT; // Same, but returns an autoreleased NSString. BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8) WARN_UNUSED_RESULT;
diff --git a/base/strings/sys_string_conversions_posix.cc b/base/strings/sys_string_conversions_posix.cc index 80f01e6..f9f7312 100644 --- a/base/strings/sys_string_conversions_posix.cc +++ b/base/strings/sys_string_conversions_posix.cc
@@ -5,6 +5,7 @@ #include "base/strings/sys_string_conversions.h" #include <stddef.h> +#include <string.h> #include <wchar.h> #include "base/strings/string_piece.h" @@ -57,7 +58,6 @@ // Handle any errors and return an empty string. case static_cast<size_t>(-1): return std::string(); - break; case 0: // We hit an embedded null byte, keep going. ++num_out_chars; @@ -85,7 +85,6 @@ // Handle any errors and return an empty string. case static_cast<size_t>(-1): return std::string(); - break; case 0: // We hit an embedded null byte, keep going. ++j; // Output is already zeroed. @@ -114,7 +113,6 @@ case static_cast<size_t>(-2): case static_cast<size_t>(-1): return std::wstring(); - break; case 0: // We hit an embedded null byte, keep going. i += 1; @@ -144,7 +142,6 @@ case static_cast<size_t>(-2): case static_cast<size_t>(-1): return std::wstring(); - break; case 0: i += 1; // Skip null byte. break;
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc index 7d00bb4..5a492d6 100644 --- a/base/strings/utf_offset_string_conversions.cc +++ b/base/strings/utf_offset_string_conversions.cc
@@ -9,7 +9,7 @@ #include <algorithm> #include <memory> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "base/strings/string_piece.h" #include "base/strings/utf_string_conversion_utils.h"
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc index 4691cb3..8416524 100644 --- a/base/strings/utf_offset_string_conversions_unittest.cc +++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -6,7 +6,6 @@ #include <algorithm> -#include "polyfills/base/logging.h" #include "base/stl_util.h" #include "base/strings/string_piece.h" #include "base/strings/utf_offset_string_conversions.h"
diff --git a/base/strings/utf_string_conversions.cc b/base/strings/utf_string_conversions.cc index aaf4a40..92333e2 100644 --- a/base/strings/utf_string_conversions.cc +++ b/base/strings/utf_string_conversions.cc
@@ -339,4 +339,16 @@ return std::string(utf16.begin(), utf16.end()); } +#if defined(WCHAR_T_IS_UTF16) +std::wstring ASCIIToWide(StringPiece ascii) { + GURL_DCHECK(IsStringASCII(ascii)) << ascii; + return std::wstring(ascii.begin(), ascii.end()); +} + +std::string WideToASCII(WStringPiece wide) { + GURL_DCHECK(IsStringASCII(wide)) << wide; + return std::string(wide.begin(), wide.end()); +} +#endif // defined(WCHAR_T_IS_UTF16) + } // namespace base
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h index 745372c..043b6ae 100644 --- a/base/strings/utf_string_conversions.h +++ b/base/strings/utf_string_conversions.h
@@ -12,6 +12,7 @@ #include "polyfills/base/base_export.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" +#include "build/build_config.h" namespace gurl_base { @@ -49,6 +50,16 @@ // beforehand. BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT; +#if defined(WCHAR_T_IS_UTF16) +// This converts an ASCII string, typically a hardcoded constant, to a wide +// string. +BASE_EXPORT std::wstring ASCIIToWide(StringPiece ascii) WARN_UNUSED_RESULT; + +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII +// beforehand. +BASE_EXPORT std::string WideToASCII(WStringPiece wide) WARN_UNUSED_RESULT; +#endif // defined(WCHAR_T_IS_UTF16) + } // namespace base #endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
diff --git a/base/strings/utf_string_conversions_unittest.cc b/base/strings/utf_string_conversions_unittest.cc index 22b167b..6cffe99 100644 --- a/base/strings/utf_string_conversions_unittest.cc +++ b/base/strings/utf_string_conversions_unittest.cc
@@ -4,7 +4,6 @@ #include <stddef.h> -#include "polyfills/base/logging.h" #include "base/stl_util.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h"
diff --git a/base/template_util.h b/base/template_util.h index 5384355..8c2b185 100644 --- a/base/template_util.h +++ b/base/template_util.h
@@ -180,6 +180,50 @@ static constexpr bool value = true; }; +// C++14 implementation of C++17's std::bool_constant. +// +// Reference: https://en.cppreference.com/w/cpp/types/integral_constant +// Specification: https://wg21.link/meta.type.synop +template <bool B> +using bool_constant = std::integral_constant<bool, B>; + +// C++14 implementation of C++17's std::conjunction. +// +// Reference: https://en.cppreference.com/w/cpp/types/conjunction +// Specification: https://wg21.link/meta.logical#1.itemdecl:1 +template <typename...> +struct conjunction : std::true_type {}; + +template <typename B1> +struct conjunction<B1> : B1 {}; + +template <typename B1, typename... Bn> +struct conjunction<B1, Bn...> + : std::conditional_t<static_cast<bool>(B1::value), conjunction<Bn...>, B1> { +}; + +// C++14 implementation of C++17's std::disjunction. +// +// Reference: https://en.cppreference.com/w/cpp/types/disjunction +// Specification: https://wg21.link/meta.logical#itemdecl:2 +template <typename...> +struct disjunction : std::false_type {}; + +template <typename B1> +struct disjunction<B1> : B1 {}; + +template <typename B1, typename... Bn> +struct disjunction<B1, Bn...> + : std::conditional_t<static_cast<bool>(B1::value), B1, disjunction<Bn...>> { +}; + +// C++14 implementation of C++17's std::negation. +// +// Reference: https://en.cppreference.com/w/cpp/types/negation +// Specification: https://wg21.link/meta.logical#itemdecl:3 +template <typename B> +struct negation : bool_constant<!static_cast<bool>(B::value)> {}; + } // namespace base #undef CR_USE_FALLBACKS_FOR_GCC_WITH_LIBCXX
diff --git a/build/build_config.h b/build/build_config.h index 688b779..6a6028d 100644 --- a/build/build_config.h +++ b/build/build_config.h
@@ -64,7 +64,7 @@ #elif defined(_AIX) #define OS_AIX 1 #elif defined(__asmjs__) || defined(__wasm__) -#define OS_ASMJS +#define OS_ASMJS 1 #else #error Please add support for your platform in build/build_config.h #endif @@ -86,7 +86,8 @@ #define OS_POSIX 1 #endif -// Compiler detection. +// Compiler detection. Note: clang masquerades as GCC on POSIX and as MSVC on +// Windows. #if defined(__GNUC__) #define COMPILER_GCC 1 #elif defined(_MSC_VER)
diff --git a/copy.bara.sky b/copy.bara.sky index 02a615d..328fbab 100644 --- a/copy.bara.sky +++ b/copy.bara.sky
@@ -13,6 +13,9 @@ "AUTHORS", "LICENSE", "base/compiler_specific.h", + "base/containers/checked_iterators.h", + "base/containers/span.h", + "base/containers/util.h", "base/macros.h", "base/debug/leak_annotations.h", "base/no_destructor.h", @@ -49,10 +52,13 @@ # Should be in sync with //polyfill/BUILD. polyfilled_headers = [ "base/base_export.h", + "base/check.h", + "base/check_op.h", "base/component_export.h", "base/debug/alias.h", "base/export_template.h", "base/logging.h", + "base/notreached.h", "base/trace_event/memory_usage_estimator.h", ]
diff --git a/polyfills/BUILD b/polyfills/BUILD index beeac1a..820c63d 100644 --- a/polyfills/BUILD +++ b/polyfills/BUILD
@@ -7,10 +7,13 @@ name = "polyfills", hdrs = [ "base/base_export.h", + "base/check.h", + "base/check_op.h", "base/component_export.h", "base/debug/alias.h", "base/export_template.h", "base/logging.h", + "base/notreached.h", "base/trace_event/memory_usage_estimator.h", ], copts = build_config.default_copts,
diff --git a/polyfills/base/check.h b/polyfills/base/check.h new file mode 100644 index 0000000..15de08b --- /dev/null +++ b/polyfills/base/check.h
@@ -0,0 +1,10 @@ +// Copyright (c) 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef POLYFILLS_BASE_CHECK_H_ +#define POLYFILLS_BASE_CHECK_H_ + +#include "polyfills/base/logging.h" + +#endif /* POLYFILLS_BASE_CHECK_H_ */
diff --git a/polyfills/base/check_op.h b/polyfills/base/check_op.h new file mode 100644 index 0000000..ecc127a --- /dev/null +++ b/polyfills/base/check_op.h
@@ -0,0 +1,10 @@ +// Copyright (c) 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef POLYFILLS_BASE_CHECK_OP_H_ +#define POLYFILLS_BASE_CHECK_OP_H_ + +#include "polyfills/base/logging.h" + +#endif /* POLYFILLS_BASE_CHECK_OP_H_ */
diff --git a/polyfills/base/logging.h b/polyfills/base/logging.h index 41ddacd..afe296e 100644 --- a/polyfills/base/logging.h +++ b/polyfills/base/logging.h
@@ -20,8 +20,10 @@ GurlFakeLogSink& operator<<(const T&) { return *this; } }; +#define GURL_CHECK_GE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK_LE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2}) +#define GURL_CHECK_EQ(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK(statement) GurlFakeLogSink({statement}) #define GURL_DCHECK_EQ(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_DCHECK_GE(statement, statement2) GurlFakeLogSink({statement, statement2})
diff --git a/polyfills/base/notreached.h b/polyfills/base/notreached.h new file mode 100644 index 0000000..564d64c --- /dev/null +++ b/polyfills/base/notreached.h
@@ -0,0 +1,10 @@ +// Copyright (c) 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef POLYFILLS_BASE_NOTREACHED_H_ +#define POLYFILLS_BASE_NOTREACHED_H_ + +#include "polyfills/base/logging.h" + +#endif /* POLYFILLS_BASE_NOTREACHED_H_ */
diff --git a/url/gurl.cc b/url/gurl.cc index 36c5ee2..5ac3ea1 100644 --- a/url/gurl.cc +++ b/url/gurl.cc
@@ -10,7 +10,7 @@ #include <ostream> #include <utility> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "base/no_destructor.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" @@ -288,7 +288,7 @@ } GURL GURL::GetAsReferrer() const { - if (!SchemeIsValidForReferrer()) + if (!is_valid() || !IsReferrerScheme(spec_.data(), parsed_.scheme)) return GURL(); if (!has_ref() && !has_username() && !has_password()) @@ -354,10 +354,6 @@ return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme); } -bool GURL::SchemeIsValidForReferrer() const { - return is_valid_ && IsReferrerScheme(spec_.data(), parsed_.scheme); -} - bool GURL::SchemeIsWSOrWSS() const { return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme); } @@ -519,7 +515,9 @@ } bool operator==(const GURL& x, const gurl_base::StringPiece& spec) { - GURL_DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec); + GURL_DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec) + << "Comparisons of GURLs and strings must ensure as a precondition that " + "the string is fully canonicalized."; return x.possibly_invalid_spec() == spec; }
diff --git a/url/gurl.h b/url/gurl.h index 73d2b43..aa33094 100644 --- a/url/gurl.h +++ b/url/gurl.h
@@ -229,9 +229,6 @@ // Returns true if the scheme is "http" or "https". bool SchemeIsHTTPOrHTTPS() const; - // Returns true if the scheme is valid for use as a referrer. - bool SchemeIsValidForReferrer() const; - // Returns true is the scheme is "ws" or "wss". bool SchemeIsWSOrWSS() const;
diff --git a/url/gurl_fuzzer.cc b/url/gurl_fuzzer.cc index 71f3540..a07c195 100644 --- a/url/gurl_fuzzer.cc +++ b/url/gurl_fuzzer.cc
@@ -15,6 +15,9 @@ TestCase* test_case = new TestCase(); +// Empty replacements cause no change when applied. +GURL::Replacements* no_op = new GURL::Replacements(); + // Entry point for LibFuzzer. extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { if (size < 1) @@ -23,6 +26,13 @@ gurl_base::StringPiece string_piece_input(reinterpret_cast<const char*>(data), size); GURL url_from_string_piece(string_piece_input); + // Copying by applying empty replacements exercises interesting code paths. + // This can help discover issues like https://crbug.com/1075515. + GURL copy = url_from_string_piece.ReplaceComponents(*no_op); + GURL_CHECK_EQ(url_from_string_piece.is_valid(), copy.is_valid()); + if (url_from_string_piece.is_valid()) { + GURL_CHECK_EQ(url_from_string_piece.spec(), copy.spec()); + } // Test for StringPiece16 if size is even. if (size % 2 == 0) {
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index b114920..1030416 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc
@@ -5,9 +5,11 @@ #include <stddef.h> #include "base/stl_util.h" +#include "base/strings/string_number_conversions.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" +#include "url/origin.h" #include "url/url_canon.h" #include "url/url_test_utils.h" @@ -223,16 +225,17 @@ TEST(GURLTest, IsValid) { const char* valid_cases[] = { - "http://google.com", - "unknown://google.com", - "http://user:pass@google.com", - "http://google.com:12345", - "http://google.com/path", - "http://google.com//path", - "http://google.com?k=v#fragment", - "http://user:pass@google.com:12345/path?k=v#fragment", - "http:/path", - "http:path", + "http://google.com", + "unknown://google.com", + "http://user:pass@google.com", + "http://google.com:12345", + "http://google.com:0", // 0 is a valid port + "http://google.com/path", + "http://google.com//path", + "http://google.com?k=v#fragment", + "http://user:pass@google.com:12345/path?k=v#fragment", + "http:/path", + "http:path", }; for (size_t i = 0; i < gurl_base::size(valid_cases); i++) { EXPECT_TRUE(GURL(valid_cases[i]).is_valid()) @@ -240,12 +243,14 @@ } const char* invalid_cases[] = { - "http://?k=v", - "http:://google.com", - "http//google.com", - "http://google.com:12three45", - "://google.com", - "path", + "http://?k=v", + "http:://google.com", + "http//google.com", + "http://google.com:12three45", + "file://server:123", // file: URLs cannot have a port + "file://server:0", + "://google.com", + "path", }; for (size_t i = 0; i < gurl_base::size(invalid_cases); i++) { EXPECT_FALSE(GURL(invalid_cases[i]).is_valid()) @@ -979,4 +984,43 @@ EXPECT_STREQ("https://foo.com/bar", url_debug_alias); } +TEST(GURLTest, PortZero) { + GURL port_zero_url("http://127.0.0.1:0/blah"); + + // https://url.spec.whatwg.org/#port-state says that the port 1) consists of + // ASCII digits (this excludes negative numbers) and 2) cannot be greater than + // 2^16-1. This means that port=0 should be valid. + EXPECT_TRUE(port_zero_url.is_valid()); + EXPECT_EQ("0", port_zero_url.port()); + EXPECT_EQ("127.0.0.1", port_zero_url.host()); + EXPECT_EQ("http", port_zero_url.scheme()); + + // https://crbug.com/1065532: SchemeHostPort would previously incorrectly + // consider port=0 to be invalid. + SchemeHostPort scheme_host_port(port_zero_url); + EXPECT_TRUE(scheme_host_port.IsValid()); + EXPECT_EQ(port_zero_url.scheme(), scheme_host_port.scheme()); + EXPECT_EQ(port_zero_url.host(), scheme_host_port.host()); + EXPECT_EQ(port_zero_url.port(), + gurl_base::NumberToString(scheme_host_port.port())); + + // https://crbug.com/1065532: The SchemeHostPort problem above would lead to + // bizarre results below - resolved origin would incorrectly be returned as an + // opaque origin derived from |another_origin|. + url::Origin another_origin = url::Origin::Create(GURL("http://other.com")); + url::Origin resolved_origin = + url::Origin::Resolve(port_zero_url, another_origin); + EXPECT_FALSE(resolved_origin.opaque()); + EXPECT_EQ(port_zero_url.scheme(), resolved_origin.scheme()); + EXPECT_EQ(port_zero_url.host(), resolved_origin.host()); + EXPECT_EQ(port_zero_url.port(), gurl_base::NumberToString(resolved_origin.port())); + + // port=0 and default HTTP port are different. + GURL default_port("http://127.0.0.1/foo"); + EXPECT_EQ(0, SchemeHostPort(port_zero_url).port()); + EXPECT_EQ(80, SchemeHostPort(default_port).port()); + url::Origin default_port_origin = url::Origin::Create(default_port); + EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin)); +} + } // namespace url
diff --git a/url/origin.cc b/url/origin.cc index 16e93b0..574c512 100644 --- a/url/origin.cc +++ b/url/origin.cc
@@ -10,8 +10,8 @@ #include <vector> #include "base/base64.h" +#include "polyfills/base/check_op.h" #include "base/containers/span.h" -#include "polyfills/base/logging.h" #include "base/pickle.h" #include "base/stl_util.h" #include "base/strings/strcat.h" @@ -65,10 +65,10 @@ return base_origin.DeriveNewOpaqueOrigin(); } -Origin::Origin(const Origin& other) = default; -Origin& Origin::operator=(const Origin& other) = default; -Origin::Origin(Origin&& other) = default; -Origin& Origin::operator=(Origin&& other) = default; +Origin::Origin(const Origin&) = default; +Origin& Origin::operator=(const Origin&) = default; +Origin::Origin(Origin&&) noexcept = default; +Origin& Origin::operator=(Origin&&) noexcept = default; Origin::~Origin() = default; // static @@ -376,7 +376,7 @@ return Origin::Create(a).IsSameOriginWith(Origin::Create(b)); } -Origin::Nonce::Nonce() {} +Origin::Nonce::Nonce() = default; Origin::Nonce::Nonce(const gurl_base::UnguessableToken& token) : token_(token) { GURL_CHECK(!token_.is_empty()); }
diff --git a/url/origin.h b/url/origin.h index 351c482..2aef330 100644 --- a/url/origin.h +++ b/url/origin.h
@@ -172,8 +172,8 @@ // Copyable and movable. Origin(const Origin&); Origin& operator=(const Origin&); - Origin(Origin&&); - Origin& operator=(Origin&&); + Origin(Origin&&) noexcept; + Origin& operator=(Origin&&) noexcept; // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters // must be valid and canonicalized. Returns nullopt if any parameter is not @@ -338,8 +338,8 @@ // moving it does not. Nonce(const Nonce&); Nonce& operator=(const Nonce&); - Nonce(Nonce&&); - Nonce& operator=(Nonce&&); + Nonce(Nonce&&) noexcept; + Nonce& operator=(Nonce&&) noexcept; // Note that operator<, used by maps type containers, will trigger |token_| // lazy-initialization. Equality comparisons do not.
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc index 514169b..8fe1cef 100644 --- a/url/origin_unittest.cc +++ b/url/origin_unittest.cc
@@ -5,7 +5,6 @@ #include <stddef.h> #include <stdint.h> -#include "polyfills/base/logging.h" #include "base/macros.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" @@ -483,7 +482,8 @@ {"http", "example.com", 123}, {"https", "example.com", 443}, {"https", "example.com", 123}, - {"file", "", 0}, + {"http", "example.com", 0}, // 0 is a valid port for http. + {"file", "", 0}, // 0 indicates "no port" for file: scheme. {"file", "example.com", 0}, }; @@ -539,12 +539,10 @@ {"http", "example.com\rnot-example.com"}, {"http", "example.com\n"}, {"http", "example.com\r"}, - {"http", "example.com", 0}, {"unknown-scheme", "example.com"}, {"host-only", "\r", 0}, {"host-only", "example.com", 22}, - {"host-port-only", "example.com", 0}, - {"file", ""}}; + {"file", "", 123}}; // file: shouldn't have a port. for (const auto& test : cases) { SCOPED_TRACE(testing::Message()
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc index c90e0a7..c541cae 100644 --- a/url/scheme_host_port.cc +++ b/url/scheme_host_port.cc
@@ -9,7 +9,8 @@ #include <tuple> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" +#include "polyfills/base/notreached.h" #include "base/numerics/safe_conversions.h" #include "base/stl_util.h" #include "base/strings/string_number_conversions.h" @@ -79,10 +80,12 @@ switch (scheme_type) { case SCHEME_WITH_HOST_AND_PORT: case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION: - // A URL with |scheme| is required to have the host and port (may be - // omitted in a serialization if it's the same as the default value). - // Return an invalid instance if either of them is not given. - if (host.empty() || port == 0) + // A URL with |scheme| is required to have the host and port, so return an + // invalid instance if host is not given. Note that a valid port is + // always provided by SchemeHostPort(const GURL&) constructor (a missing + // port is replaced with a default port if needed by + // GURL::EffectiveIntPort()). + if (host.empty()) return false; // Don't do an expensive canonicalization if the host is already @@ -125,14 +128,12 @@ } // namespace -SchemeHostPort::SchemeHostPort() : port_(0) { -} +SchemeHostPort::SchemeHostPort() = default; SchemeHostPort::SchemeHostPort(std::string scheme, std::string host, uint16_t port, - ConstructPolicy policy) - : port_(0) { + ConstructPolicy policy) { if (!IsValidInput(scheme, host, port, policy)) { GURL_DCHECK(!IsValid()); return; @@ -153,7 +154,7 @@ port, ConstructPolicy::CHECK_CANONICALIZATION) {} -SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) { +SchemeHostPort::SchemeHostPort(const GURL& url) { if (!url.is_valid()) return;
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h index 903a398..8d5acd9 100644 --- a/url/scheme_host_port.h +++ b/url/scheme_host_port.h
@@ -80,8 +80,8 @@ // Creates a (scheme, host, port) tuple. |host| must be a canonicalized // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme| - // must be a standard scheme. |port| must not be 0, unless |scheme| does not - // support ports (e.g. 'file'). In that case, |port| must be 0. + // must be a standard scheme. |port| must be 0 if |scheme| does not support + // ports (e.g. 'file'). // // Copies the data in |scheme| and |host|. SchemeHostPort(gurl_base::StringPiece scheme, @@ -111,8 +111,8 @@ // Copyable and movable. SchemeHostPort(const SchemeHostPort&) = default; SchemeHostPort& operator=(const SchemeHostPort&) = default; - SchemeHostPort(SchemeHostPort&&) = default; - SchemeHostPort& operator=(SchemeHostPort&&) = default; + SchemeHostPort(SchemeHostPort&&) noexcept = default; + SchemeHostPort& operator=(SchemeHostPort&&) noexcept = default; ~SchemeHostPort(); @@ -158,7 +158,7 @@ std::string scheme_; std::string host_; - uint16_t port_; + uint16_t port_ = 0; }; COMPONENT_EXPORT(URL)
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc index e3bcade..3e7bf65 100644 --- a/url/scheme_host_port_unittest.cc +++ b/url/scheme_host_port_unittest.cc
@@ -91,9 +91,10 @@ } cases[] = { {"http", "example.com", 80}, {"http", "example.com", 123}, + {"http", "example.com", 0}, // 0 is a valid port for http. {"https", "example.com", 443}, {"https", "example.com", 123}, - {"file", "", 0}, + {"file", "", 0}, // 0 indicates "no port" for file: scheme. {"file", "example.com", 0}, }; @@ -130,8 +131,7 @@ {"http", "example.com\rnot-example.com", 80}, {"http", "example.com\n", 80}, {"http", "example.com\r", 80}, - {"http", "example.com", 0}, - {"file", "", 80}}; + {"file", "", 80}}; // Can''t have a port for file: scheme. for (const auto& test : cases) { SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc index 8756cf7..4fd3a8e 100644 --- a/url/third_party/mozilla/url_parse.cc +++ b/url/third_party/mozilla/url_parse.cc
@@ -38,7 +38,7 @@ #include <stdlib.h> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "url/url_parse_internal.h" #include "url/url_util.h" #include "url/url_util_internal.h"
diff --git a/url/url_canon.h b/url/url_canon.h index 7e4a0ee..c50cceb 100644 --- a/url/url_canon.h +++ b/url/url_canon.h
@@ -28,8 +28,7 @@ template<typename T> class CanonOutputT { public: - CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) { - } + CanonOutputT() : buffer_(nullptr), buffer_len_(0), cur_len_(0) {} virtual ~CanonOutputT() { } @@ -698,15 +697,14 @@ // will make them all NULL, which is no replacement. The caller would then // override the components they want to replace. URLComponentSource() - : scheme(NULL), - username(NULL), - password(NULL), - host(NULL), - port(NULL), - path(NULL), - query(NULL), - ref(NULL) { - } + : scheme(nullptr), + username(nullptr), + password(nullptr), + host(nullptr), + port(nullptr), + path(nullptr), + query(nullptr), + ref(nullptr) {} // Constructor normally used internally to initialize all the components to // point to the same spec.
diff --git a/url/url_canon_filesystemurl.cc b/url/url_canon_filesystemurl.cc index 9def892..9a642cd 100644 --- a/url/url_canon_filesystemurl.cc +++ b/url/url_canon_filesystemurl.cc
@@ -39,7 +39,7 @@ output->Append("filesystem:", 11); new_parsed->scheme.len = 10; - if (!parsed.inner_parsed() || !parsed.inner_parsed()->scheme.is_valid()) + if (!inner_parsed || !inner_parsed->scheme.is_valid()) return false; bool success = true; @@ -57,8 +57,8 @@ inner_scheme_type = SCHEME_WITH_HOST_AND_PORT; } success = CanonicalizeStandardURL( - spec, parsed.inner_parsed()->Length(), *parsed.inner_parsed(), - inner_scheme_type, charset_converter, output, &new_inner_parsed); + spec, inner_parsed->Length(), *inner_parsed, inner_scheme_type, + charset_converter, output, &new_inner_parsed); } else { // TODO(ericu): The URL is wrong, but should we try to output more of what // we were given? Echoing back filesystem:mailto etc. doesn't seem all that @@ -66,7 +66,7 @@ return false; } // The filesystem type must be more than just a leading slash for validity. - success &= parsed.inner_parsed()->path.len > 1; + success &= new_inner_parsed.path.len > 1; success &= CanonicalizePath(source.path, parsed.path, output, &new_parsed->path);
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc index ef654c7..4622c6e 100644 --- a/url/url_canon_fileurl.cc +++ b/url/url_canon_fileurl.cc
@@ -76,8 +76,8 @@ Component sub_path = MakeRange(after_drive, path.end()); Component fake_output_path; success = CanonicalizePath(spec, sub_path, output, &fake_output_path); - } else { - // No input path, canonicalize to a slash. + } else if (after_drive == path.begin) { + // No input path and no drive spec, canonicalize to a slash. output->push_back('/'); }
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc index f83dacb..28a7c38 100644 --- a/url/url_canon_host.cc +++ b/url/url_canon_host.cc
@@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include "url/url_canon.h" #include "url/url_canon_internal.h"
diff --git a/url/url_canon_icu.cc b/url/url_canon_icu.cc index a9a32fd..614e338 100644 --- a/url/url_canon_icu.cc +++ b/url/url_canon_icu.cc
@@ -8,7 +8,7 @@ #include <stdlib.h> #include <string.h> -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include <unicode/ucnv.h> #include <unicode/ucnv_cb.h> #include <unicode/utypes.h>
diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc index 55fd58f..3f3025b 100644 --- a/url/url_canon_icu_unittest.cc +++ b/url/url_canon_icu_unittest.cc
@@ -4,6 +4,7 @@ #include <stddef.h> +#include "polyfills/base/logging.h" #include "base/stl_util.h" #include "testing/gtest/include/gtest/gtest.h" #include <unicode/ucnv.h> @@ -22,6 +23,10 @@ explicit UConvScoper(const char* charset_name) { UErrorCode err = U_ZERO_ERROR; converter_ = ucnv_open(charset_name, &err); + if (!converter_) { + GURL_LOG(ERROR) << "Failed to open charset " << charset_name << ": " + << u_errorName(err); + } } ~UConvScoper() {
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h index e0c7567..4a282b1 100644 --- a/url/url_canon_internal.h +++ b/url/url_canon_internal.h
@@ -14,7 +14,7 @@ #include <stdlib.h> #include "polyfills/base/component_export.h" -#include "polyfills/base/logging.h" +#include "polyfills/base/notreached.h" #include "url/url_canon.h" namespace url {
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc index f7c5700..c214217 100644 --- a/url/url_canon_ip.cc +++ b/url/url_canon_ip.cc
@@ -8,7 +8,7 @@ #include <stdlib.h> #include <limits> -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include "url/url_canon_internal.h" namespace url {
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc index ee18aa2..22002b5 100644 --- a/url/url_canon_path.cc +++ b/url/url_canon_path.cc
@@ -4,7 +4,7 @@ #include <limits.h> -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" #include "url/url_parse_internal.h"
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc index a5ec808..e148128 100644 --- a/url/url_canon_relative.cc +++ b/url/url_canon_relative.cc
@@ -5,8 +5,9 @@ // Canonicalizer functions for working with and resolving relative URLs. #include <algorithm> +#include <ostream> -#include "polyfills/base/logging.h" +#include "polyfills/base/check_op.h" #include "base/strings/string_util.h" #include "url/url_canon.h" #include "url/url_canon_internal.h"
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index c3f02fb..8f709be 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc
@@ -1517,6 +1517,10 @@ {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, // Replace nothing {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, + {"file:///Y:", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y:"}, + {"file:///Y:/", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y:/"}, + {"file:///./Y", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y"}, + {"file:///./Y:", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///Y:"}, // Clear non-path components (common) {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"}, // Replace path with something that doesn't begin with a slash and make @@ -1532,6 +1536,7 @@ for (size_t i = 0; i < gurl_base::size(replace_cases); i++) { const ReplaceCase& cur = replace_cases[i]; + SCOPED_TRACE(cur.base); int base_len = static_cast<int>(strlen(cur.base)); Parsed parsed; ParseFileURL(cur.base, base_len, &parsed); @@ -1824,13 +1829,19 @@ const char* expected; bool expected_success; } cases[] = { - {"Filesystem:htTp://www.Foo.com:80/tempoRary", "filesystem:http://www.foo.com/tempoRary/", true}, - {"filesystem:httpS://www.foo.com/temporary/", "filesystem:https://www.foo.com/temporary/", true}, - {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//", false}, - {"filesystem:http://www.foo.com/persistent/bob?query#ref", "filesystem:http://www.foo.com/persistent/bob?query#ref", true}, - {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true}, - {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true}, - {"filesystem:File:///temporary/Bob?qUery#reF", "filesystem:file:///temporary/Bob?qUery#reF", true}, + {"Filesystem:htTp://www.Foo.com:80/tempoRary", + "filesystem:http://www.foo.com/tempoRary/", true}, + {"filesystem:httpS://www.foo.com/temporary/", + "filesystem:https://www.foo.com/temporary/", true}, + {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//", + false}, + {"filesystem:http://www.foo.com/persistent/bob?query#ref", + "filesystem:http://www.foo.com/persistent/bob?query#ref", true}, + {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true}, + {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true}, + {"filesystem:File:///temporary/Bob?qUery#reF", + "filesystem:file:///temporary/Bob?qUery#reF", true}, + {"FilEsysteM:htTp:E=/.", "filesystem:http://e%3D//", false}, }; for (size_t i = 0; i < gurl_base::size(cases); i++) {
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc index b0f91a1..18e1895 100644 --- a/url/url_idna_icu.cc +++ b/url/url_idna_icu.cc
@@ -8,7 +8,9 @@ #include <stdlib.h> #include <string.h> -#include "polyfills/base/logging.h" +#include <ostream> + +#include "polyfills/base/check_op.h" #include "base/no_destructor.h" #include <unicode/uidna.h> #include <unicode/utypes.h> @@ -52,7 +54,7 @@ << ". If you see this error message in a test environment " << "your test environment likely lacks the required data " << "tables for libicu. See https://crbug.com/778929."; - value = NULL; + value = nullptr; } } @@ -84,7 +86,7 @@ GURL_DCHECK(output->length() == 0); // Output buffer is assumed empty. UIDNA* uidna = GetUIDNA(); - GURL_DCHECK(uidna != NULL); + GURL_DCHECK(uidna != nullptr); while (true) { UErrorCode err = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER;
diff --git a/url/url_parse_file.cc b/url/url_parse_file.cc index b666d0b..c1c878a 100644 --- a/url/url_parse_file.cc +++ b/url/url_parse_file.cc
@@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "polyfills/base/logging.h" +#include "polyfills/base/check.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_file.h" #include "url/url_parse_internal.h"
diff --git a/url/url_util.cc b/url/url_util.cc index 7c72bfc..13c30b3 100644 --- a/url/url_util.cc +++ b/url/url_util.cc
@@ -8,8 +8,8 @@ #include <string.h> #include <atomic> +#include "polyfills/base/check_op.h" #include "base/compiler_specific.h" -#include "polyfills/base/logging.h" #include "base/no_destructor.h" #include "base/stl_util.h" #include "base/strings/string_util.h" @@ -49,6 +49,12 @@ }; // Schemes that are allowed for referrers. + // + // WARNING: Adding (1) a non-"standard" scheme or (2) a scheme whose URLs have + // opaque origins could lead to surprising behavior in some of the referrer + // generation logic. In order to avoid surprises, be sure to have adequate + // test coverage in each of the multiple code locations that compute + // referrers. std::vector<SchemeWithType> referrer_schemes = { {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},