Sync googleurl with the Chromium upstream
Updates up to the version 62ea163d27b1472e3d4fb01cbef18529917c17da
from Thu Jul 6 04:19:31 2023 +0000
Change-Id: I39a8b29812eff97ddc23525f261013a8882df1dd
diff --git a/AUTHORS b/AUTHORS
index d4f7c9d..365f616 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -41,6 +41,7 @@
Adrià Vilanova Martínez <me@avm99963.com>
Ahmed Elwasefi <a.m.elwasefi@gmail.com>
Ahmet Emir Ercin <ahmetemiremir@gmail.com>
+Aiden Grossman <aidengrossmanpso@gmail.com>
Ajay Berwal <a.berwal@samsung.com>
Ajay Berwal <ajay.berwal@samsung.com>
Ajith Kumar V <ajith.v@samsung.com>
@@ -116,6 +117,7 @@
Andrew Tulloch <andrew@tullo.ch>
Andriy Rysin <arysin@gmail.com>
Anish Patankar <anish.p@samsung.com>
+Ankit Kiran <sahuankit453@gmail.com>
Ankit Kumar <ankit2.kumar@samsung.com>
Ankur Verma <ankur1.verma@samsung.com>
Anna Henningsen <anna@addaleax.net>
@@ -150,6 +152,7 @@
Arunprasad Rajkumar <ararunprasad@gmail.com>
Arunprasad Rajkumar <arurajku@cisco.com>
Arup Barua <arup.barua@samsung.com>
+Aryan Kaushik <aryankaushik2023@gmail.com>
Asami Doi <d0iasm.pub@gmail.com>
Ashish Kumar Gupta <guptaag@amazon.com>
Ashlin Joseph <ashlin.j@samsung.com>
@@ -213,6 +216,7 @@
Calvin Watford <watfordcalvin@gmail.com>
Cameron Gutman <aicommander@gmail.com>
Camille Viot <viot.camille@outlook.com>
+Can Liu <peter.can.liu@gmail.com>
Carlos Santa <carlos.santa@intel.com>
Catalin Badea <badea@adobe.com>
Cathie Chen <cathiechen@tencent.com>
@@ -249,7 +253,7 @@
Chris Tserng <tserng@amazon.com>
Chris Vasselli <clindsay@gmail.com>
Chris Ye <hawkoyates@gmail.com>
-Christoph Durschang <christoph142@gmx.com>
+Christoph Staengle <christoph142@gmx.com>
Christophe Dumez <ch.dumez@samsung.com>
Christopher Dale <chrelad@gmail.com>
Chunbo Hua <chunbo.hua@intel.com>
@@ -308,6 +312,7 @@
David Spellman <dspell@amazon.com>
David Valachovic <adenflorian@gmail.com>
Dax Kelson <dkelson@gurulabs.com>
+Dean Leitersdorf <dean.leitersdorf@gmail.com>
Debadree Chatterjee <debadree333@gmail.com>
Debashish Samantaray <d.samantaray@samsung.com>
Debug Wang <debugwang@tencent.com>
@@ -322,7 +327,6 @@
Devlin Cronin <rdevlin.cronin@gmail.com>
Dhi Aurrahman <dio@rockybars.com>
Di Wu <meetwudi@gmail.com>
-Di Zhang <dizhangg@chromium.org>
Diana Suvorova <diana.suvorova@gmail.com>
Diego Fernández Santos <agujaydedal@gmail.com>
Diego Ferreiro Val <elfogris@gmail.com>
@@ -474,6 +478,7 @@
Horia Olaru <horia.olaru@gmail.com>
Horia Olaru <olaru@adobe.com>
Hosung You <hosung.you@samsung.com>
+Huai Wang <gkvjwa@gmail.com>
Huapeng Li <huapengl@amazon.com>
Huayong Xu <huayong.xu@samsung.com>
Hung Ngo <ngotienhung195@gmail.com>
@@ -481,6 +486,7 @@
Hui Wang <wanghui07050707@gmail.com>
Hui Wang <wanghui210@huawei.com>
Huiwon Jo <jhwon0415@gmail.com>
+Hunseop Jeong <hs85jeong@gmail.com>
Huy Duong <huy.duongdinh@gmail.com>
Hwanseung Lee <hs1217.lee@gmail.com>
Hwanseung Lee <hs1217.lee@samsung.com>
@@ -504,6 +510,7 @@
Ibrar Ahmed <ibrar.ahmad@gmail.com>
Ilia Demianenko <ilia.demianenko@gmail.com>
Ilia K <ki.stfu@gmail.com>
+Ilwoo Lee <ilwoo905.lee@samsung.com>
Ilya Konstantinov <ilya.konstantinov@gmail.com>
Imam Mohammad Bokhary <imam.bokhary@samsung.com>
Imranur Rahman <i.rahman@samsung.com>
@@ -675,6 +682,7 @@
Justin Ribeiro <justin@justinribeiro.com>
Jüri Valdmann <juri.valdmann@qt.io>
Juyoung Kim <chattank05@gmail.com>
+Jing Peiyang <jingpeiyang@eswincomputing.com>
Kai Jiang <jiangkai@gmail.com>
Kai Köhne <kai.koehne@qt.io>
Kai Uwe Broulik <kde@privat.broulik.de>
@@ -700,6 +708,7 @@
Keita Suzuki <keitasuzuki.park@gmail.com>
Keita Yoshimoto <y073k3@gmail.com>
Keith Chen <keitchen@amazon.com>
+Keith Cirkel <chromium@keithcirkel.co.uk>
Kenneth Rohde Christiansen <kenneth.r.christiansen@intel.com>
Kenneth Strickland <ken.strickland@gmail.com>
Kenneth Zhou <knthzh@gmail.com>
@@ -710,6 +719,7 @@
Kevin Gibbons <bakkot@gmail.com>
Kevin Lee Helpingstine <sig11@reprehensible.net>
Kevin M. McCormick <mckev@amazon.com>
+Keyou <qqkillyou@gmail.com>
Khasim Syed Mohammed <khasim.mohammed@linaro.org>
Khem Raj <raj.khem@gmail.com>
Kihong Kwon <kihong.kwon@samsung.com>
@@ -853,6 +863,7 @@
Max Vujovic <mvujovic@adobe.com>
Mayank Gupta <mayank.g1@samsung.com>
Mayur Kankanwadi <mayurk.vk@samsung.com>
+Mc Zeng <zengmcong@gmail.com>
Md Abdullah Al Alamin <a.alamin.cse@gmail.com>
Md. Hasanur Rashid <hasanur.r@samsung.com>
Md Jobed Hossain <jobed.h@samsung.com>
@@ -917,6 +928,7 @@
Nagarjuna Atluri <nagarjuna.a@samsung.com>
Naiem Shaik <naiem.shaik@gmail.com>
Naoki Takano <takano.naoki@gmail.com>
+Naoto Ono <onoto1998@gmail.com>
Nathan Mitchell <nathaniel.v.mitchell@gmail.com>
Naveen Bobbili <naveenbobbili@motorola.com>
Naveen Bobbili <qghc36@motorola.com>
@@ -954,6 +966,7 @@
Parag Radke <nrqv63@motorola.com>
Paritosh Kumar <paritosh.in@samsung.com>
Patrasciuc Sorin Cristian <cristian.patrasciuc@gmail.com>
+Patricija Cerkaite <cer.patricija@gmail.com>
Patrick Chan <chanpatorikku@gmail.com>
Patrick Kettner <patrickkettner@gmail.com>
Patrick Riordan <patrickriordan177@gmail.com>
@@ -1008,6 +1021,7 @@
Prashant Hiremath <prashhir@cisco.com>
Prashant Nevase <prashant.n@samsung.com>
Prashant Patil <prashant.patil@imgtec.com>
+Pratham <prathamIN@proton.me>
Praveen Akkiraju <praveen.anp@samsung.com>
Preeti Nayak <preeti.nayak@samsung.com>
Pritam Nikam <pritam.nikam@samsung.com>
@@ -1053,6 +1067,7 @@
Richard Smith <happyercat@gmail.com>
Rijubrata Bhaumik <rijubrata.bhaumik@intel.com>
Riku Voipio <riku.voipio@linaro.org>
+Ritesh Saharan <r.saharan@samsung.com>
Rob Buis <rob.buis@samsung.com>
Rob Wu <rob@robwu.nl>
Robert Bear Travis <bear.travis@gmail.com>
@@ -1091,6 +1106,7 @@
Ryuan Choi <ryuan.choi@samsung.com>
Saikrishna Arcot <saiarcot895@gmail.com>
Sajal Khandelwal <skhandelwa22@bloomberg.net>
+Sajeesh Sidharthan <sajeesh.sidharthan@amd.corp-partner.google.com>
Saksham Mittal <gotlouemail@gmail.com>
Salvatore Iovene <salvatore.iovene@intel.com>
Sam James <sam@gentoo.org>
@@ -1182,7 +1198,6 @@
Simon La Macchia <smacchia@amazon.com>
Siva Kumar Gunturi <siva.gunturi@samsung.com>
Slava Aseev <nullptrnine@gmail.com>
-Sohan Jyoti Ghosh <sohanjg@chromium.org>
Sohom Datta <sohom.datta@learner.manipal.edu>
Sohom Datta <dattasohom1@gmail.com>
Song Fangzhen <songfangzhen@bytedance.com>
@@ -1217,6 +1232,7 @@
Sundoo Kim <0xd00d00b@gmail.com>
Suneel Kota <suneel.kota@samsung.com>
Sungguk Lim <limasdf@gmail.com>
+Sunghyeok Kang <sh0528.kang@samsung.com>
Sungmann Cho <sungmann.cho@gmail.com>
Sungmann Cho <sungmann.cho@navercorp.com>
Sunil Ratnu <sunil.ratnu@samsung.com>
@@ -1252,6 +1268,7 @@
Takeshi Kurosawa <taken.spc@gmail.com>
Tanay Chowdhury <tanay.c@samsung.com>
Tanvir Rizvi <tanvir.rizvi@samsung.com>
+Tao Wang <tao.wang.2261@gmail.com>
Tapu Kumar Ghose <ghose.tapu@gmail.com>
Taylor Price <trprice@gmail.com>
Ted Kim <neot0000@gmail.com>
@@ -1267,6 +1284,7 @@
Tiago Vignatti <tiago.vignatti@intel.com>
Tibor Dusnoki <tibor.dusnoki.91@gmail.com>
Tibor Dusnoki <tdusnoki@inf.u-szeged.hu>
+Tien Hock Loh <tienhock.loh@starfivetech.com>
Tim Ansell <mithro@mithis.com>
Tim Niederhausen <tim@rnc-ag.de>
Tim Steiner <twsteiner@gmail.com>
@@ -1281,6 +1299,7 @@
Tom Callaway <tcallawa@redhat.com>
Tom Harwood <tfh@skip.org>
Tomas Popela <tomas.popela@gmail.com>
+Tony Shen <legendmastertony@gmail.com>
Torsten Kurbad <google@tk-webart.de>
Toshihito Kikuchi <leamovret@gmail.com>
Toshiaki Tanaka <zokutyou2@gmail.com>
@@ -1322,6 +1341,8 @@
Waihung Fu <fufranci@amazon.com>
wafuwafu13 <mariobaske@i.softbank.jp>
Wojciech Bielawski <wojciech.bielawski@gmail.com>
+Wang Chen <wangchen20@iscas.ac.cn>
+Wang Chen <unicornxw@gmail.com>
Wang Weiwei <wangww@dingdao.com>
Wangyang Dai <jludwy@gmail.com>
Wanming Lin <wanming.lin@intel.com>
@@ -1332,6 +1353,7 @@
Wesley Lancel <wesleylancel@gmail.com>
Wei Wang <wei4.wang@intel.com>
Wei Wen <wenwei.wenwei@bytedance.com>
+Weidong Liu <liuwd8@gmail.com>
Wesley Wigham <wwigham@gmail.com>
Will Cohen <wwcohen@gmail.com>
Will Hirsch <chromium@willhirsch.co.uk>
@@ -1355,6 +1377,7 @@
Xu Samuel <samuel.xu@intel.com>
Xu Xing <xing.xu@intel.com>
Xuefei Ren <xrenishere@gmail.com>
+Xuefu Zhang <xuefu.sh@gmail.com>
Xuehui Xie <xuehui.xxh@alibaba-inc.com>
Xueqing Huang <huangxueqing@xiaomi.com>
Xun Sun <xun.sun@intel.com>
@@ -1364,6 +1387,7 @@
Yan Wang <yan0422.wang@samsung.com>
Yang Gu <yang.gu@intel.com>
Yang Liu <jd9668954@gmail.com>
+Yannay Hammer <yannayha@gmail.com>
Yannic Bonenberger <yannic.bonenberger@gmail.com>
Yarin Kaul <yarin.kaul@gmail.com>
Yash Joshi <yashjoshimail@gmail.com>
@@ -1417,6 +1441,7 @@
Zachary Capalbo <zach.geek@gmail.com>
Zeno Albisser <zeno.albisser@digia.com>
Zeqin Chen <talonchen@tencent.com>
+Zhanbang He <hezhanbang@gmail.com>
Zhang Hao <zhanghao.m@bytedance.com>
Zhang Hao <15686357310a@gmail.com>
Zhaoming Jiang <zhaoming.jiang@intel.com>
@@ -1437,6 +1462,7 @@
Zoltan Herczeg <zherczeg.u-szeged@partner.samsung.com>
Zoltan Kuscsik <zoltan.kuscsik@linaro.org>
Zoru Lee <donzoru@gmail.com>
+Zuckjet <zuckjet@gmail.com>
Zsolt Borbely <zsborbely.u-szeged@partner.samsung.com>
方觉 (Fang Jue) <fangjue23303@gmail.com>
迷渡 <justjavac@gmail.com>
@@ -1457,18 +1483,15 @@
Code Aurora Forum <*@codeaurora.org>
CodeWeavers, Inc. <*@codeweavers.com>
Collabora Limited <*@collabora.com>
-Comodo CA Limited
-CoSMo Software pvt ltd <*@cosmosoftware.io>
+Comodo CA Limited CoSMo Software pvt ltd <*@cosmosoftware.io>
Cosium <*@cosium.com>
Dell Technologies Inc. <*@dell.corp-partner.google.com>
Ding (Beijing) Intelligent Technology Co. Ltd <*@dingdao.com>
+Dropbox, Inc. <*@dropbox.com>
Duck Duck Go, Inc. <*@duckduckgo.com>
Endless Mobile, Inc. <*@endlessm.com>
EngFlow, Inc. <*@engflow.com>
Estimote, Inc. <*@estimote.com>
-Meta Platforms, Inc. <*@fb.com>
-Meta Platforms, Inc. <*@meta.com>
-Meta Platforms, Inc. <*@oculus.com>
Google Inc. <*@google.com>
Grammarly, Inc. <*@grammarly.com>
Hewlett-Packard Development Company, L.P. <*@hp.com>
@@ -1479,13 +1502,16 @@
Imagination Technologies Limited <*@imagination.corp-partner.google.com>
Impossible Dreams Network <*@impossibledreams.net>
Intel Corporation <*@intel.com>
+Island Technology, Inc. <*@island.io>
LG Electronics, Inc. <*@lge.com>
Loongson Technology Corporation Limited. <*@loongson.cn>
Macadamian <*@macadamian.com>
Mail.ru Group <*@corp.mail.ru>
Make Positive Provar Limited <*@provartesting.com>
-Mc Zeng <zengmcong@gmail.com>
Mediatek <*@mediatek.com>
+Meta Platforms, Inc. <*@fb.com>
+Meta Platforms, Inc. <*@meta.com>
+Meta Platforms, Inc. <*@oculus.com>
Microsoft <*@microsoft.com>
MIPS Technologies, Inc. <*@mips.com>
Mobica Limited <*@mobica.com>
@@ -1502,7 +1528,6 @@
Rakuten Kobo Inc. <*@kobo.com>
Rakuten Kobo Inc. <*@rakuten.com>
Red Hat Inc. <*@redhat.com>
-Sajeesh Sidharthan <sajeesh.sidharthan@amd.corp-partner.google.com>
Semihalf <*@semihalf.com>
Seznam.cz, a.s. <*@firma.seznam.cz>
Slack Technologies Inc. <*@slack-corp.com>
@@ -1515,9 +1540,7 @@
The Chromium Authors <*@chromium.org>
The MathWorks, Inc. <binod.pant@mathworks.com>
THEO Technologies <*@theoplayer.com>
-Tien Hock Loh <tienhock.loh@starfivetech.com>
-Torchmobile Inc.
-Upwork <*@cloud.upwork.com>
+Torchmobile Inc. Upwork <*@cloud.upwork.com>
Venture 3 Systems LLC <*@venture3systems.com>
Vewd Software AS <*@vewd.com>
Vivaldi Technologies AS <*@vivaldi.com>
@@ -1525,6 +1548,5 @@
Whist Technologies <*@whist.com>
Xperi Corporation <*@xperi.com>
Yandex LLC <*@yandex-team.ru>
-Zuckjet <zuckjet@gmail.com>
# Please DO NOT APPEND here. See comments at the top of the file.
# END organizations section.
diff --git a/base/BUILD b/base/BUILD
index 5f36123..1d1efd2 100644
--- a/base/BUILD
+++ b/base/BUILD
@@ -8,11 +8,11 @@
name = "base",
srcs = [
"debug/crash_logging.cc",
- "strings/string_piece.cc",
"strings/string_util.cc",
"strings/string_util_constants.cc",
"strings/utf_string_conversion_utils.cc",
"strings/utf_string_conversions.cc",
+ "strings/utf_ostream_operators.cc",
] + select({
"//build_config:windows_x86_64": ["strings/string_util_win.cc"],
"//conditions:default": [],
@@ -25,7 +25,6 @@
"containers/contiguous_iterator.h",
"containers/span.h",
"containers/util.h",
- "cxx17_backports.h",
"cxx20_is_constant_evaluated.h",
"cxx20_to_address.h",
"debug/crash_logging.h",
@@ -51,6 +50,7 @@
"ranges/ranges.h",
"stl_util.h",
"template_util.h",
+ "types/always_false.h",
"strings/string_piece_forward.h",
"strings/string_piece.h",
"strings/string_util.h",
@@ -58,6 +58,7 @@
"strings/string_util_internal.h",
"strings/string_number_conversions.h",
"strings/utf_string_conversions.h",
+ "strings/utf_ostream_operators.h",
"strings/utf_string_conversion_utils.h",
"win/win_handle_types.h",
] + build_config.strings_hdrs,
diff --git a/base/bits.h b/base/bits.h
index ea011ad..3f1541a 100644
--- a/base/bits.h
+++ b/base/bits.h
@@ -7,7 +7,6 @@
#ifndef BASE_BITS_H_
#define BASE_BITS_H_
-#include <limits.h>
#include <stddef.h>
#include <stdint.h>
@@ -134,7 +133,7 @@
static_assert(std::is_integral<T>::value,
"This function can only be used with integral types.");
T one(1u);
- return one << ((CHAR_BIT * sizeof(T) - 1));
+ return one << (8 * sizeof(T) - 1);
}
} // namespace bits
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index b2d4024..e85da5c 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -414,4 +414,25 @@
#define LOGICALLY_CONST
#endif
+// preserve_most clang's calling convention. Reduces register pressure for the
+// caller and as such can be used for cold calls. Support for the
+// "preserve_most" attribute is limited:
+// - 32-bit platforms do not implement it,
+// - component builds fail because _dl_runtime_resolve() clobbers registers,
+// - there are crashes on arm64 on Windows (https://crbug.com/v8/14065), which
+// can hopefully be fixed in the future.
+// Additionally, the initial implementation in clang <= 16 overwrote the return
+// register(s) in the epilogue of a preserve_most function, so we only use
+// preserve_most in clang >= 17 (see https://reviews.llvm.org/D143425).
+// See https://clang.llvm.org/docs/AttributeReference.html#preserve-most for
+// more details.
+#if defined(ARCH_CPU_64_BITS) && \
+ !(BUILDFLAG(IS_WIN) && defined(ARCH_CPU_ARM64)) && \
+ !defined(COMPONENT_BUILD) && defined(__clang__) && \
+ __clang_major__ >= 17 && HAS_ATTRIBUTE(preserve_most)
+#define PRESERVE_MOST __attribute__((preserve_most))
+#else
+#define PRESERVE_MOST
+#endif
+
#endif // BASE_COMPILER_SPECIFIC_H_
diff --git a/base/containers/checked_iterators.h b/base/containers/checked_iterators.h
index 0cd2832..18f1006 100644
--- a/base/containers/checked_iterators.h
+++ b/base/containers/checked_iterators.h
@@ -24,6 +24,9 @@
using pointer = T*;
using reference = T&;
using iterator_category = std::random_access_iterator_tag;
+#if defined(__cpp_lib_ranges)
+ using iterator_concept = std::contiguous_iterator_tag;
+#endif
// Required for converting constructor below.
template <typename U>
@@ -31,10 +34,8 @@
// Required for certain libc++ algorithm optimizations that are not available
// for NaCl.
-#if defined(_LIBCPP_VERSION) && !BUILDFLAG(IS_NACL)
template <typename Ptr>
friend struct std::pointer_traits;
-#endif
constexpr CheckedContiguousIterator() = default;
@@ -147,6 +148,12 @@
return it;
}
+ constexpr friend CheckedContiguousIterator operator+(
+ difference_type lhs,
+ const CheckedContiguousIterator& rhs) {
+ return rhs + lhs;
+ }
+
constexpr CheckedContiguousIterator& operator-=(difference_type rhs) {
if (rhs < 0) {
GURL_CHECK_LE(-rhs, end_ - current_);
@@ -224,7 +231,6 @@
} // namespace base
-#if defined(_LIBCPP_VERSION) && !BUILDFLAG(IS_NACL)
// Specialize both std::__is_cpp17_contiguous_iterator and std::pointer_traits
// for CCI in case we compile with libc++ outside of NaCl. The former is
// required to enable certain algorithm optimizations (e.g. std::copy can be a
@@ -242,13 +248,35 @@
// [1] https://wg21.link/iterator.concept.contiguous
// [2] https://wg21.link/std.iterator.tags
// [3] https://wg21.link/pointer.traits.optmem
-namespace std {
+#if defined(_LIBCPP_VERSION)
+
+// TODO(crbug.com/1284275): Remove when C++20 is on by default, as the use
+// of `iterator_concept` above should suffice.
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// TODO(crbug.com/1449299): https://reviews.llvm.org/D150801 renamed this from
+// `__is_cpp17_contiguous_iterator` to `__libcpp_is_contiguous_iterator`. Clean
+// up the old spelling after libc++ rolls.
+template <typename T>
+struct __is_cpp17_contiguous_iterator;
template <typename T>
struct __is_cpp17_contiguous_iterator<::gurl_base::CheckedContiguousIterator<T>>
: true_type {};
template <typename T>
+struct __libcpp_is_contiguous_iterator;
+template <typename T>
+struct __libcpp_is_contiguous_iterator<::gurl_base::CheckedContiguousIterator<T>>
+ : true_type {};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif
+
+namespace std {
+
+template <typename T>
struct pointer_traits<::gurl_base::CheckedContiguousIterator<T>> {
using pointer = ::gurl_base::CheckedContiguousIterator<T>;
using element_type = T;
@@ -267,6 +295,5 @@
};
} // namespace std
-#endif
#endif // BASE_CONTAINERS_CHECKED_ITERATORS_H_
diff --git a/base/containers/span.h b/base/containers/span.h
index 578f3b6..a218045 100644
--- a/base/containers/span.h
+++ b/base/containers/span.h
@@ -19,6 +19,7 @@
#include "base/containers/checked_iterators.h"
#include "base/containers/contiguous_iterator.h"
#include "base/cxx20_to_address.h"
+#include "polyfills/base/memory/raw_ptr_exclusion.h"
#include "base/numerics/safe_math.h"
namespace gurl_base {
@@ -452,7 +453,9 @@
}
private:
- T* data_;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter
+ // for: #constexpr-ctor-field-initializer, #global-scope, #union
+ RAW_PTR_EXCLUSION T* data_;
};
// span<T, Extent>::extent can not be declared inline prior to C++17, hence this
diff --git a/base/cxx17_backports.h b/base/cxx17_backports.h
deleted file mode 100644
index a784f84..0000000
--- a/base/cxx17_backports.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2021 The Chromium Authors
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef BASE_CXX17_BACKPORTS_H_
-#define BASE_CXX17_BACKPORTS_H_
-
-#include <algorithm>
-
-namespace gurl_base {
-
-// TODO(crbug.com/1373621): Rewrite all uses of gurl_base::clamp as std::clamp and
-// remove this file.
-using std::clamp;
-
-} // namespace base
-
-#endif // BASE_CXX17_BACKPORTS_H_
diff --git a/base/numerics/math_constants.h b/base/numerics/math_constants.h
index c406ab9..2819606 100644
--- a/base/numerics/math_constants.h
+++ b/base/numerics/math_constants.h
@@ -10,6 +10,18 @@
constexpr double kPiDouble = 3.14159265358979323846;
constexpr float kPiFloat = 3.14159265358979323846f;
+// pi/180 and 180/pi. These are correctly rounded from the true
+// mathematical value, unlike what you'd get from e.g.
+// 180.0f / kPiFloat.
+constexpr double kDegToRadDouble = 0.017453292519943295769;
+constexpr float kDegToRadFloat = 0.017453292519943295769f;
+constexpr double kRadToDegDouble = 57.295779513082320876798;
+constexpr float kRadToDegFloat = 57.295779513082320876798f;
+
+// sqrt(1/2) = 1/sqrt(2).
+constexpr double kSqrtHalfDouble = 0.70710678118654752440;
+constexpr float kSqrtHalfFloat = 0.70710678118654752440f;
+
// The mean acceleration due to gravity on Earth in m/s^2.
constexpr double kMeanGravityDouble = 9.80665;
constexpr float kMeanGravityFloat = 9.80665f;
diff --git a/base/ranges/algorithm.h b/base/ranges/algorithm.h
index 70932a8..fe09c74 100644
--- a/base/ranges/algorithm.h
+++ b/base/ranges/algorithm.h
@@ -16,6 +16,7 @@
#include "base/cxx20_is_constant_evaluated.h"
#include "base/functional/identity.h"
#include "base/functional/invoke.h"
+#include "polyfills/base/memory/raw_ptr_exclusion.h"
#include "base/ranges/functional.h"
#include "base/ranges/ranges.h"
@@ -108,9 +109,15 @@
}
private:
- Pred& pred_;
- Proj1& proj1_;
- Proj2& proj2_;
+ // This field is not a raw_ref<> because it was filtered by the rewriter for:
+ // #constexpr-ctor-field-initializer
+ RAW_PTR_EXCLUSION Pred& pred_;
+ // This field is not a raw_ref<> because it was filtered by the rewriter for:
+ // #constexpr-ctor-field-initializer
+ RAW_PTR_EXCLUSION Proj1& proj1_;
+ // This field is not a raw_ref<> because it was filtered by the rewriter for:
+ // #constexpr-ctor-field-initializer
+ RAW_PTR_EXCLUSION Proj2& proj2_;
};
// Small wrappers around BinaryPredicateProjector to make the calling side more
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h
index e505a81..6649046 100644
--- a/base/strings/string_number_conversions_internal.h
+++ b/base/strings/string_number_conversions_internal.h
@@ -5,10 +5,8 @@
#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_
#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_INTERNAL_H_
-#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
-#include <wctype.h>
#include <limits>
@@ -72,32 +70,6 @@
return absl::nullopt;
}
-// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
-// is locale independent, whereas the functions we are replacing were
-// locale-dependent. TBD what is desired, but for the moment let's not
-// introduce a change in behaviour.
-template <typename CHAR>
-class WhitespaceHelper {};
-
-template <>
-class WhitespaceHelper<char> {
- public:
- static bool Invoke(char c) {
- return 0 != isspace(static_cast<unsigned char>(c));
- }
-};
-
-template <>
-class WhitespaceHelper<char16_t> {
- public:
- static bool Invoke(char16_t c) { return 0 != iswspace(c); }
-};
-
-template <typename CHAR>
-bool LocalIsWhitespace(CHAR c) {
- return WhitespaceHelper<CHAR>::Invoke(c);
-}
-
template <typename Number, int kBase>
class StringToNumberParser {
public:
@@ -188,7 +160,7 @@
auto begin = input.begin();
auto end = input.end();
- while (begin != end && LocalIsWhitespace(*begin)) {
+ while (begin != end && IsAsciiWhitespace(*begin)) {
has_leading_whitespace = true;
++begin;
}
@@ -272,7 +244,11 @@
// - If the entire string was not processed, there are either characters
// remaining in the string after a parsed number, or the string does not
// begin with a parseable number.
- // - If the first character is a space, there was leading whitespace
+ // - If the first character is a space, there was leading whitespace. Note
+ // that this checks using IsWhitespace(), which behaves differently for
+ // wide and narrow characters -- that is intentional and matches the
+ // behavior of the double_conversion library's whitespace-skipping
+ // algorithm.
return !input.empty() && output != HUGE_VAL && output != -HUGE_VAL &&
static_cast<size_t>(processed_characters_count) == input.size() &&
!IsWhitespace(input[0]);
diff --git a/base/strings/string_number_conversions_win.cc b/base/strings/string_number_conversions_win.cc
index 6e81b2f..9857dd4 100644
--- a/base/strings/string_number_conversions_win.cc
+++ b/base/strings/string_number_conversions_win.cc
@@ -39,16 +39,6 @@
return internal::DoubleToStringT<std::wstring>(value);
}
-namespace internal {
-
-template <>
-class WhitespaceHelper<wchar_t> {
- public:
- static bool Invoke(wchar_t c) { return 0 != iswspace(c); }
-};
-
-} // namespace internal
-
bool StringToInt(WStringPiece input, int* output) {
return internal::StringToIntImpl(input, *output);
}
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc
deleted file mode 100644
index e67aa17..0000000
--- a/base/strings/string_piece.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright 2012 The Chromium Authors
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/strings/string_piece.h"
-
-#include <algorithm>
-#include <climits>
-#include <limits>
-#include <ostream>
-#include <string>
-
-#include "base/strings/utf_string_conversions.h"
-#include "build/build_config.h"
-
-namespace gurl_base {
-namespace {
-
-// For each character in characters_wanted, sets the index corresponding
-// to the ASCII code of that character to 1 in table. This is used by
-// the find_.*_of methods below to tell whether or not a character is in
-// the lookup table in constant time.
-// The argument `table' must be an array that is large enough to hold all
-// the possible values of an unsigned char. Thus it should be be declared
-// as follows:
-// bool table[UCHAR_MAX + 1]
-inline void BuildLookupTable(StringPiece characters_wanted, bool* table) {
- const size_t length = characters_wanted.length();
- const char* const data = characters_wanted.data();
- for (size_t i = 0; i < length; ++i) {
- table[static_cast<unsigned char>(data[i])] = true;
- }
-}
-
-} // namespace
-
-// MSVC doesn't like complex extern templates and DLLs.
-#if !defined(COMPILER_MSVC)
-template class BasicStringPiece<char>;
-template class BasicStringPiece<char16_t>;
-template class BasicStringPiece<wchar_t>;
-#endif
-
-std::ostream& operator<<(std::ostream& o, StringPiece piece) {
- o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
- return o;
-}
-
-std::ostream& operator<<(std::ostream& o, StringPiece16 piece) {
- return o << UTF16ToUTF8(piece);
-}
-
-std::ostream& operator<<(std::ostream& o, WStringPiece piece) {
- return o << WideToUTF8(piece);
-}
-
-namespace internal {
-
-template <typename T, typename CharT = typename T::value_type>
-size_t findT(T self, T s, size_t pos) {
- if (pos > self.size())
- return BasicStringPiece<CharT>::npos;
-
- typename BasicStringPiece<CharT>::const_iterator result =
- std::search(self.begin() + pos, self.end(), s.begin(), s.end());
- const size_t xpos =
- static_cast<size_t>(result - self.begin());
- return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<CharT>::npos;
-}
-
-size_t find(StringPiece self, StringPiece s, size_t pos) {
- return findT(self, s, pos);
-}
-
-size_t find(StringPiece16 self, StringPiece16 s, size_t pos) {
- return findT(self, s, pos);
-}
-
-template <typename T, typename CharT = typename T::value_type>
-size_t rfindT(T self, T s, size_t pos) {
- if (self.size() < s.size())
- return BasicStringPiece<CharT>::npos;
-
- if (s.empty())
- return std::min(self.size(), pos);
-
- typename BasicStringPiece<CharT>::const_iterator last =
- self.begin() + std::min(self.size() - s.size(), pos) + s.size();
- typename BasicStringPiece<CharT>::const_iterator result =
- std::find_end(self.begin(), last, s.begin(), s.end());
- return result != last ? static_cast<size_t>(result - self.begin())
- : BasicStringPiece<CharT>::npos;
-}
-
-size_t rfind(StringPiece self, StringPiece s, size_t pos) {
- return rfindT(self, s, pos);
-}
-
-size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos) {
- return rfindT(self, s, pos);
-}
-
-// 8-bit version using lookup table.
-size_t find_first_of(StringPiece self, StringPiece s, size_t pos) {
- if (self.size() == 0 || s.size() == 0)
- return StringPiece::npos;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.size() == 1)
- return self.find(s.data()[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (size_t i = pos; i < self.size(); ++i) {
- if (lookup[static_cast<unsigned char>(self.data()[i])]) {
- return i;
- }
- }
- return StringPiece::npos;
-}
-
-// Generic brute force version.
-template <typename T, typename CharT = typename T::value_type>
-size_t find_first_ofT(T self, T s, size_t pos) {
- // Use the faster std::find() if searching for a single character.
- typename BasicStringPiece<CharT>::const_iterator found =
- s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0])
- : std::find_first_of(self.begin() + pos, self.end(),
- s.begin(), s.end());
- if (found == self.end())
- return BasicStringPiece<CharT>::npos;
- return static_cast<size_t>(found - self.begin());
-}
-
-size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) {
- return find_first_ofT(self, s, pos);
-}
-
-// 8-bit version using lookup table.
-size_t find_first_not_of(StringPiece self, StringPiece s, size_t pos) {
- if (pos >= self.size())
- return StringPiece::npos;
-
- if (s.size() == 0)
- return pos;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.size() == 1)
- return self.find_first_not_of(s.data()[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (size_t i = pos; i < self.size(); ++i) {
- if (!lookup[static_cast<unsigned char>(self.data()[i])]) {
- return i;
- }
- }
- return StringPiece::npos;
-}
-
-// Generic brute-force version.
-template <typename T, typename CharT = typename T::value_type>
-size_t find_first_not_ofT(T self, T s, size_t pos) {
- if (self.size() == 0)
- return BasicStringPiece<CharT>::npos;
-
- for (size_t self_i = pos; self_i < self.size(); ++self_i) {
- bool found = false;
- for (auto c : s) {
- if (self[self_i] == c) {
- found = true;
- break;
- }
- }
- if (!found)
- return self_i;
- }
- return BasicStringPiece<CharT>::npos;
-}
-
-size_t find_first_not_of(StringPiece16 self, StringPiece16 s, size_t pos) {
- return find_first_not_ofT(self, s, pos);
-}
-
-// 8-bit version using lookup table.
-size_t find_last_of(StringPiece self, StringPiece s, size_t pos) {
- if (self.size() == 0 || s.size() == 0)
- return StringPiece::npos;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.size() == 1)
- return self.rfind(s.data()[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (size_t i = std::min(pos, self.size() - 1); ; --i) {
- if (lookup[static_cast<unsigned char>(self.data()[i])])
- return i;
- if (i == 0)
- break;
- }
- return StringPiece::npos;
-}
-
-// Generic brute-force version.
-template <typename T, typename CharT = typename T::value_type>
-size_t find_last_ofT(T self, T s, size_t pos) {
- if (self.size() == 0)
- return BasicStringPiece<CharT>::npos;
-
- for (size_t self_i = std::min(pos, self.size() - 1); ;
- --self_i) {
- for (auto c : s) {
- if (self.data()[self_i] == c)
- return self_i;
- }
- if (self_i == 0)
- break;
- }
- return BasicStringPiece<CharT>::npos;
-}
-
-size_t find_last_of(StringPiece16 self, StringPiece16 s, size_t pos) {
- return find_last_ofT(self, s, pos);
-}
-
-// 8-bit version using lookup table.
-size_t find_last_not_of(StringPiece self, StringPiece s, size_t pos) {
- if (self.size() == 0)
- return StringPiece::npos;
-
- size_t i = std::min(pos, self.size() - 1);
- if (s.size() == 0)
- return i;
-
- // Avoid the cost of BuildLookupTable() for a single-character search.
- if (s.size() == 1)
- return self.find_last_not_of(s.data()[0], pos);
-
- bool lookup[UCHAR_MAX + 1] = { false };
- BuildLookupTable(s, lookup);
- for (; ; --i) {
- if (!lookup[static_cast<unsigned char>(self.data()[i])])
- return i;
- if (i == 0)
- break;
- }
- return StringPiece::npos;
-}
-
-// Generic brute-force version.
-template <typename T, typename CharT = typename T::value_type>
-size_t find_last_not_ofT(T self, T s, size_t pos) {
- if (self.size() == 0)
- return StringPiece::npos;
-
- for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) {
- bool found = false;
- for (auto c : s) {
- if (self.data()[self_i] == c) {
- found = true;
- break;
- }
- }
- if (!found)
- return self_i;
- if (self_i == 0)
- break;
- }
- return BasicStringPiece<CharT>::npos;
-}
-
-size_t find_last_not_of(StringPiece16 self, StringPiece16 s, size_t pos) {
- return find_last_not_ofT(self, s, pos);
-}
-
-size_t find(WStringPiece self, WStringPiece s, size_t pos) {
- return findT(self, s, pos);
-}
-
-size_t rfind(WStringPiece self, WStringPiece s, size_t pos) {
- return rfindT(self, s, pos);
-}
-
-size_t find_first_of(WStringPiece self, WStringPiece s, size_t pos) {
- return find_first_ofT(self, s, pos);
-}
-
-size_t find_first_not_of(WStringPiece self, WStringPiece s, size_t pos) {
- return find_first_not_ofT(self, s, pos);
-}
-
-size_t find_last_of(WStringPiece self, WStringPiece s, size_t pos) {
- return find_last_ofT(self, s, pos);
-}
-
-size_t find_last_not_of(WStringPiece self, WStringPiece s, size_t pos) {
- return find_last_not_ofT(self, s, pos);
-}
-} // namespace internal
-} // namespace base
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index d5f51ad..6e4ea0f 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h
@@ -2,647 +2,34 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
-// A string-like object that points to a sized piece of memory.
+// This header is deprecated. `gurl_base::StringPiece` is now `std::string_view`.
+// Use it and <string_view> instead.
//
-// You can use StringPiece as a function or method parameter. A StringPiece
-// parameter can receive a double-quoted string literal argument, a "const
-// char*" argument, a string argument, or a StringPiece argument with no data
-// copying. Systematic use of StringPiece for arguments reduces data
-// copies and strlen() calls.
-//
-// Prefer passing StringPieces by value:
-// void MyFunction(StringPiece arg);
-// If circumstances require, you may also pass by const reference:
-// void MyFunction(const StringPiece& arg); // not preferred
-// Both of these have the same lifetime semantics. Passing by value
-// generates slightly smaller code. For more discussion, Googlers can see
-// the thread go/stringpiecebyvalue on c-users.
+// TODO(crbug.com/691162): Remove uses of this header.
#ifndef BASE_STRINGS_STRING_PIECE_H_
#define BASE_STRINGS_STRING_PIECE_H_
-#include <stddef.h>
-#include <stdint.h>
+#include <functional>
-#include <algorithm>
-#include <iosfwd>
-#include <limits>
-#include <string>
-#include <string_view>
-#include <type_traits>
-
+// Many files including this header rely on these being included due to IWYU
+// violations. Preserve the includes for now. As code is migrated away from this
+// header, we can incrementally fix the IWYU violations.
#include "polyfills/base/base_export.h"
#include "polyfills/base/check.h"
#include "polyfills/base/check_op.h"
#include "base/compiler_specific.h"
#include "base/cxx20_is_constant_evaluated.h"
-#include "base/strings/string_piece_forward.h" // IWYU pragma: export
+#include "base/strings/string_piece_forward.h"
+#include "base/strings/utf_ostream_operators.h"
#include "build/build_config.h"
namespace gurl_base {
-// internal --------------------------------------------------------------------
-
-// Many of the StringPiece functions use different implementations for the
-// 8-bit and 16-bit versions, and we don't want lots of template expansions in
-// this (very common) header that will slow down compilation.
-//
-// So here we define overloaded functions called by the StringPiece template.
-// For those that share an implementation, the two versions will expand to a
-// template internal to the .cc file.
-namespace internal {
-
-BASE_EXPORT size_t find(StringPiece self, StringPiece s, size_t pos);
-BASE_EXPORT size_t find(StringPiece16 self, StringPiece16 s, size_t pos);
-
-BASE_EXPORT size_t rfind(StringPiece self, StringPiece s, size_t pos);
-BASE_EXPORT size_t rfind(StringPiece16 self, StringPiece16 s, size_t pos);
-
-BASE_EXPORT size_t find_first_of(StringPiece self, StringPiece s, size_t pos);
-BASE_EXPORT size_t find_first_of(StringPiece16 self,
- StringPiece16 s,
- size_t pos);
-
-BASE_EXPORT size_t find_first_not_of(StringPiece self,
- StringPiece s,
- size_t pos);
-BASE_EXPORT size_t find_first_not_of(StringPiece16 self,
- StringPiece16 s,
- size_t pos);
-
-BASE_EXPORT size_t find_last_of(StringPiece self, StringPiece s, size_t pos);
-BASE_EXPORT size_t find_last_of(StringPiece16 self,
- StringPiece16 s,
- size_t pos);
-
-BASE_EXPORT size_t find_last_not_of(StringPiece self,
- StringPiece s,
- size_t pos);
-BASE_EXPORT size_t find_last_not_of(StringPiece16 self,
- StringPiece16 s,
- size_t pos);
-
-BASE_EXPORT size_t find(WStringPiece self, WStringPiece s, size_t pos);
-BASE_EXPORT size_t rfind(WStringPiece self, WStringPiece s, size_t pos);
-BASE_EXPORT size_t find_first_of(WStringPiece self, WStringPiece s, size_t pos);
-BASE_EXPORT size_t find_first_not_of(WStringPiece self,
- WStringPiece s,
- size_t pos);
-BASE_EXPORT size_t find_last_of(WStringPiece self, WStringPiece s, size_t pos);
-BASE_EXPORT size_t find_last_not_of(WStringPiece self,
- WStringPiece s,
- size_t pos);
-
-} // namespace internal
-
-// BasicStringPiece ------------------------------------------------------------
-
-// Mirrors the C++17 version of std::basic_string_view<> as closely as possible,
-// except where noted below.
-template <typename CharT, typename Traits>
-class GSL_POINTER BasicStringPiece {
- public:
- using traits_type = Traits;
- using value_type = CharT;
- using pointer = CharT*;
- using const_pointer = const CharT*;
- using reference = CharT&;
- using const_reference = const CharT&;
- using const_iterator = const CharT*;
- using iterator = const_iterator;
- using const_reverse_iterator = std::reverse_iterator<const_iterator>;
- using reverse_iterator = const_reverse_iterator;
- using size_type = size_t;
- using difference_type = ptrdiff_t;
-
- constexpr BasicStringPiece() noexcept : ptr_(nullptr), length_(0) {}
- constexpr BasicStringPiece(const BasicStringPiece& other) noexcept = default;
- constexpr BasicStringPiece& operator=(const BasicStringPiece& view) noexcept =
- default;
- constexpr BasicStringPiece(const CharT* s, size_t count)
- : ptr_(s), length_(count) {
- // Intentional STL deviation: Check the string length fits in
- // `difference_type`. No valid buffer can exceed this type, otherwise
- // pointer arithmetic would not be defined. This helps avoid bugs where
- // `count` was computed from an underflow or negative sentinel value.
- GURL_CHECK(length_ <= size_t{PTRDIFF_MAX});
- }
- // NOLINTNEXTLINE(google-explicit-constructor)
- constexpr BasicStringPiece(const CharT* s)
- : ptr_(s), length_(s ? traits_type::length(s) : 0) {
- // Intentional STL deviation: Null-check instead of UB.
- GURL_CHECK(s);
- }
- // Explicitly disallow construction from nullptr. Note that this does not
- // catch construction from runtime strings that might be null.
- // Note: The following is just a more elaborate way of spelling
- // `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is
- // not supported by the PNaCl toolchain.
- template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>>
- // NOLINTNEXTLINE(google-explicit-constructor)
- BasicStringPiece(T) {
- static_assert(sizeof(T) == 0, // Always false.
- "StringPiece does not support construction from nullptr, use "
- "the default constructor instead.");
- }
-
- // These are necessary because std::basic_string provides construction from
- // (an object convertible to) a std::basic_string_view, as well as an explicit
- // cast operator to a std::basic_string_view, but (obviously) not from/to a
- // BasicStringPiece.
- // NOLINTNEXTLINE(google-explicit-constructor)
- BasicStringPiece(const std::basic_string<CharT>& str)
- : ptr_(str.data()), length_(str.size()) {}
- explicit operator std::basic_string<CharT>() const {
- return std::basic_string<CharT>(data(), size());
- }
-
- // Provide implicit conversions from/to the STL version, for interoperability
- // with non-Chromium code.
- // TODO(crbug.com/691162): These will be moot when BasicStringPiece is
- // replaced with std::basic_string_view.
- // NOLINTNEXTLINE(google-explicit-constructor)
- constexpr BasicStringPiece(std::basic_string_view<CharT> str)
- : ptr_(str.data()), length_(str.size()) {}
- // NOLINTNEXTLINE(google-explicit-constructor)
- constexpr operator std::basic_string_view<CharT>() const {
- return std::basic_string_view<CharT>(data(), size());
- }
-
- constexpr const_iterator begin() const noexcept { return ptr_; }
- constexpr const_iterator cbegin() const noexcept { return ptr_; }
- constexpr const_iterator end() const noexcept { return ptr_ + length_; }
- constexpr const_iterator cend() const noexcept { return ptr_ + length_; }
- constexpr const_reverse_iterator rbegin() const noexcept {
- return const_reverse_iterator(ptr_ + length_);
- }
- constexpr const_reverse_iterator crbegin() const noexcept {
- return const_reverse_iterator(ptr_ + length_);
- }
- constexpr const_reverse_iterator rend() const noexcept {
- return const_reverse_iterator(ptr_);
- }
- constexpr const_reverse_iterator crend() const noexcept {
- return const_reverse_iterator(ptr_);
- }
-
- constexpr const_reference operator[](size_type pos) const {
- // Intentional STL deviation: Bounds-check instead of UB.
- return at(pos);
- }
- constexpr const_reference at(size_type pos) const {
- GURL_CHECK_LT(pos, size());
- return data()[pos];
- }
-
- constexpr const_reference front() const { return operator[](0); }
-
- constexpr const_reference back() const { return operator[](size() - 1); }
-
- constexpr const_pointer data() const noexcept { return ptr_; }
-
- constexpr size_type size() const noexcept { return length_; }
- constexpr size_type length() const noexcept { return length_; }
-
- constexpr size_type max_size() const {
- return std::numeric_limits<size_type>::max() / sizeof(CharT);
- }
-
- [[nodiscard]] constexpr bool empty() const noexcept { return size() == 0; }
-
- constexpr void remove_prefix(size_type n) {
- // Intentional STL deviation: Bounds-check instead of UB.
- GURL_CHECK_LE(n, size());
- ptr_ += n;
- length_ -= n;
- }
-
- constexpr void remove_suffix(size_type n) {
- // Intentional STL deviation: Bounds-check instead of UB.
- GURL_CHECK_LE(n, size());
- length_ -= n;
- }
-
- constexpr void swap(BasicStringPiece& v) noexcept {
- // Note: Cannot use std::swap() since it is not constexpr until C++20.
- const const_pointer ptr = ptr_;
- ptr_ = v.ptr_;
- v.ptr_ = ptr;
- const size_type length = length_;
- length_ = v.length_;
- v.length_ = length;
- }
-
- constexpr size_type copy(CharT* dest,
- size_type count,
- size_type pos = 0) const {
- GURL_CHECK_LE(pos, size());
- const size_type rcount = std::min(count, size() - pos);
- traits_type::copy(dest, data() + pos, rcount);
- return rcount;
- }
-
- constexpr BasicStringPiece substr(size_type pos = 0,
- size_type count = npos) const {
- GURL_CHECK_LE(pos, size());
- const size_type rcount = std::min(count, size() - pos);
- return {data() + pos, rcount};
- }
-
- constexpr int compare(BasicStringPiece v) const noexcept {
- const size_type rlen = std::min(size(), v.size());
- const int result = traits_type::compare(data(), v.data(), rlen);
- if (result != 0)
- return result;
- if (size() == v.size())
- return 0;
- return size() < v.size() ? -1 : 1;
- }
- constexpr int compare(size_type pos1,
- size_type count1,
- BasicStringPiece v) const {
- return substr(pos1, count1).compare(v);
- }
- constexpr int compare(size_type pos1,
- size_type count1,
- BasicStringPiece v,
- size_type pos2,
- size_type count2) const {
- return substr(pos1, count1).compare(v.substr(pos2, count2));
- }
- constexpr int compare(const CharT* s) const {
- return compare(BasicStringPiece(s));
- }
- constexpr int compare(size_type pos1,
- size_type count1,
- const CharT* s) const {
- return substr(pos1, count1).compare(BasicStringPiece(s));
- }
- constexpr int compare(size_type pos1,
- size_type count1,
- const CharT* s,
- size_type count2) const {
- return substr(pos1, count1).compare(BasicStringPiece(s, count2));
- }
-
- constexpr size_type find(BasicStringPiece v,
- size_type pos = 0) const noexcept {
- if (is_constant_evaluated()) {
- if (v.size() > size())
- return npos;
- for (size_type p = pos; p <= size() - v.size(); ++p) {
- if (!compare(p, v.size(), v))
- return p;
- }
- return npos;
- }
-
- return internal::find(*this, v, pos);
- }
- constexpr size_type find(CharT ch, size_type pos = 0) const noexcept {
- if (pos >= size())
- return npos;
-
- const const_pointer result =
- traits_type::find(data() + pos, size() - pos, ch);
- return result ? static_cast<size_type>(result - data()) : npos;
- }
- constexpr size_type find(const CharT* s,
- size_type pos,
- size_type count) const {
- return find(BasicStringPiece(s, count), pos);
- }
- constexpr size_type find(const CharT* s, size_type pos = 0) const {
- return find(BasicStringPiece(s), pos);
- }
-
- constexpr size_type rfind(BasicStringPiece v,
- size_type pos = npos) const noexcept {
- if (is_constant_evaluated()) {
- if (v.size() > size())
- return npos;
- for (size_type p = std::min(size() - v.size(), pos);; --p) {
- if (!compare(p, v.size(), v))
- return p;
- if (!p)
- break;
- }
- return npos;
- }
-
- return internal::rfind(*this, v, pos);
- }
- constexpr size_type rfind(CharT c, size_type pos = npos) const noexcept {
- if (empty())
- return npos;
-
- for (size_t i = std::min(pos, size() - 1);; --i) {
- if (data()[i] == c)
- return i;
-
- if (i == 0)
- break;
- }
- return npos;
- }
- constexpr size_type rfind(const CharT* s,
- size_type pos,
- size_type count) const {
- return rfind(BasicStringPiece(s, count), pos);
- }
- constexpr size_type rfind(const CharT* s, size_type pos = npos) const {
- return rfind(BasicStringPiece(s), pos);
- }
-
- constexpr size_type find_first_of(BasicStringPiece v,
- size_type pos = 0) const noexcept {
- if (is_constant_evaluated()) {
- if (empty() || v.empty())
- return npos;
- for (size_type p = pos; p < size(); ++p) {
- if (v.find(data()[p]) != npos)
- return p;
- }
- return npos;
- }
-
- return internal::find_first_of(*this, v, pos);
- }
- constexpr size_type find_first_of(CharT c, size_type pos = 0) const noexcept {
- return find(c, pos);
- }
- constexpr size_type find_first_of(const CharT* s,
- size_type pos,
- size_type count) const {
- return find_first_of(BasicStringPiece(s, count), pos);
- }
- constexpr size_type find_first_of(const CharT* s, size_type pos = 0) const {
- return find_first_of(BasicStringPiece(s), pos);
- }
-
- constexpr size_type find_last_of(BasicStringPiece v,
- size_type pos = npos) const noexcept {
- if (is_constant_evaluated()) {
- if (empty() || v.empty())
- return npos;
- for (size_type p = std::min(pos, size() - 1);; --p) {
- if (v.find(data()[p]) != npos)
- return p;
- if (!p)
- break;
- }
- return npos;
- }
-
- return internal::find_last_of(*this, v, pos);
- }
- constexpr size_type find_last_of(CharT c,
- size_type pos = npos) const noexcept {
- return rfind(c, pos);
- }
- constexpr size_type find_last_of(const CharT* s,
- size_type pos,
- size_type count) const {
- return find_last_of(BasicStringPiece(s, count), pos);
- }
- constexpr size_type find_last_of(const CharT* s, size_type pos = npos) const {
- return find_last_of(BasicStringPiece(s), pos);
- }
-
- constexpr size_type find_first_not_of(BasicStringPiece v,
- size_type pos = 0) const noexcept {
- if (is_constant_evaluated()) {
- if (empty())
- return npos;
- for (size_type p = pos; p < size(); ++p) {
- if (v.find(data()[p]) == npos)
- return p;
- }
- return npos;
- }
-
- return internal::find_first_not_of(*this, v, pos);
- }
- constexpr size_type find_first_not_of(CharT c,
- size_type pos = 0) const noexcept {
- if (empty())
- return npos;
-
- for (; pos < size(); ++pos) {
- if (data()[pos] != c)
- return pos;
- }
- return npos;
- }
- constexpr size_type find_first_not_of(const CharT* s,
- size_type pos,
- size_type count) const {
- return find_first_not_of(BasicStringPiece(s, count), pos);
- }
- constexpr size_type find_first_not_of(const CharT* s,
- size_type pos = 0) const {
- return find_first_not_of(BasicStringPiece(s), pos);
- }
-
- constexpr size_type find_last_not_of(BasicStringPiece v,
- size_type pos = npos) const noexcept {
- if (is_constant_evaluated()) {
- if (empty())
- return npos;
- for (size_type p = std::min(pos, size() - 1);; --p) {
- if (v.find(data()[p]) == npos)
- return p;
- if (!p)
- break;
- }
- return npos;
- }
-
- return internal::find_last_not_of(*this, v, pos);
- }
- constexpr size_type find_last_not_of(CharT c,
- size_type pos = npos) const noexcept {
- if (empty())
- return npos;
-
- for (size_t i = std::min(pos, size() - 1);; --i) {
- if (data()[i] != c)
- return i;
- if (i == 0)
- break;
- }
- return npos;
- }
- constexpr size_type find_last_not_of(const CharT* s,
- size_type pos,
- size_type count) const {
- return find_last_not_of(BasicStringPiece(s, count), pos);
- }
- constexpr size_type find_last_not_of(const CharT* s,
- size_type pos = npos) const {
- return find_last_not_of(BasicStringPiece(s), pos);
- }
-
- static constexpr size_type npos = size_type(-1);
-
- protected:
- const_pointer ptr_;
- size_type length_;
-};
-
-// static
-template <typename CharT, typename Traits>
-const typename BasicStringPiece<CharT, Traits>::size_type
- BasicStringPiece<CharT, Traits>::npos;
-
-// MSVC doesn't like complex extern templates and DLLs.
-#if !defined(COMPILER_MSVC)
-extern template class BASE_EXPORT BasicStringPiece<char>;
-extern template class BASE_EXPORT BasicStringPiece<char16_t>;
-#endif
-
-template <typename CharT, typename Traits>
-constexpr bool operator==(BasicStringPiece<CharT, Traits> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
-}
-// Here and below we make use of std::common_type_t to emulate
-// std::type_identity (part of C++20). This creates a non-deduced context, so
-// that we can compare StringPieces with types that implicitly convert to
-// StringPieces. See https://wg21.link/n3766 for details.
-// Furthermore, we require dummy template parameters for these overloads to work
-// around a name mangling issue on Windows.
-template <typename CharT, typename Traits, int = 1>
-constexpr bool operator==(
- BasicStringPiece<CharT, Traits> lhs,
- std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
- return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
-}
-template <typename CharT, typename Traits, int = 2>
-constexpr bool operator==(
- std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
-}
-
-template <typename CharT, typename Traits>
-constexpr bool operator!=(BasicStringPiece<CharT, Traits> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return !(lhs == rhs);
-}
-template <typename CharT, typename Traits, int = 1>
-constexpr bool operator!=(
- BasicStringPiece<CharT, Traits> lhs,
- std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
- return !(lhs == rhs);
-}
-template <typename CharT, typename Traits, int = 2>
-constexpr bool operator!=(
- std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return !(lhs == rhs);
-}
-
-template <typename CharT, typename Traits>
-constexpr bool operator<(BasicStringPiece<CharT, Traits> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return lhs.compare(rhs) < 0;
-}
-template <typename CharT, typename Traits, int = 1>
-constexpr bool operator<(
- BasicStringPiece<CharT, Traits> lhs,
- std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
- return lhs.compare(rhs) < 0;
-}
-
-template <typename CharT, typename Traits, int = 2>
-constexpr bool operator<(
- std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return lhs.compare(rhs) < 0;
-}
-
-template <typename CharT, typename Traits>
-constexpr bool operator>(BasicStringPiece<CharT, Traits> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return rhs < lhs;
-}
-template <typename CharT, typename Traits, int = 1>
-constexpr bool operator>(
- BasicStringPiece<CharT, Traits> lhs,
- std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
- return rhs < lhs;
-}
-template <typename CharT, typename Traits, int = 2>
-constexpr bool operator>(
- std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return rhs < lhs;
-}
-
-template <typename CharT, typename Traits>
-constexpr bool operator<=(BasicStringPiece<CharT, Traits> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return !(rhs < lhs);
-}
-template <typename CharT, typename Traits, int = 1>
-constexpr bool operator<=(
- BasicStringPiece<CharT, Traits> lhs,
- std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
- return !(rhs < lhs);
-}
-template <typename CharT, typename Traits, int = 2>
-constexpr bool operator<=(
- std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return !(rhs < lhs);
-}
-
-template <typename CharT, typename Traits>
-constexpr bool operator>=(BasicStringPiece<CharT, Traits> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return !(lhs < rhs);
-}
-template <typename CharT, typename Traits, int = 1>
-constexpr bool operator>=(
- BasicStringPiece<CharT, Traits> lhs,
- std::common_type_t<BasicStringPiece<CharT, Traits>> rhs) noexcept {
- return !(lhs < rhs);
-}
-template <typename CharT, typename Traits, int = 2>
-constexpr bool operator>=(
- std::common_type_t<BasicStringPiece<CharT, Traits>> lhs,
- BasicStringPiece<CharT, Traits> rhs) noexcept {
- return !(lhs < rhs);
-}
-
-BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece piece);
-// Not in the STL: convenience functions to output non-UTF-8 strings to an
-// 8-bit-width stream.
-BASE_EXPORT std::ostream& operator<<(std::ostream& o, StringPiece16 piece);
-BASE_EXPORT std::ostream& operator<<(std::ostream& o, WStringPiece piece);
-
-// Intentionally omitted (since Chromium does not use character literals):
-// operator""sv.
-
-// Stand-ins for the STL's std::hash<> specializations.
-template <typename StringPieceType>
-struct StringPieceHashImpl {
- using is_transparent = void; // to allow for heterogenous lookup
-
- // This is a custom hash function. We don't use the ones already defined for
- // string and std::u16string directly because it would require the string
- // constructors to be called, which we don't want.
- size_t operator()(StringPieceType sp) const {
- size_t result = 0;
- for (auto c : sp)
- result = (result * 131) + static_cast<size_t>(c);
- return result;
- }
-};
-using StringPieceHash = StringPieceHashImpl<StringPiece>;
-using StringPiece16Hash = StringPieceHashImpl<StringPiece16>;
-using WStringPieceHash = StringPieceHashImpl<WStringPiece>;
+// Historically, `std::hash` did not support `gurl_base::StringPiece`. Now
+// `gurl_base::StringPiece` is `std::string_view`, so this is no longer necessary.
+// Replace uses of this type with the default hasher.
+using StringPieceHash = std::hash<StringPiece>;
} // namespace base
diff --git a/base/strings/string_piece_forward.h b/base/strings/string_piece_forward.h
index 6c391b1..9138f3f 100644
--- a/base/strings/string_piece_forward.h
+++ b/base/strings/string_piece_forward.h
@@ -2,20 +2,23 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
-// Forward declaration of StringPiece types from base/strings/string_piece.h.
+// This header is deprecated. `gurl_base::StringPiece` is now `std::string_view`.
+// Use it and <string_view> instead.
+//
+// TODO(crbug.com/691162): Remove uses of this header.
#ifndef BASE_STRINGS_STRING_PIECE_FORWARD_H_
#define BASE_STRINGS_STRING_PIECE_FORWARD_H_
-#include <iosfwd>
+#include <string_view>
namespace gurl_base {
template <typename CharT, typename Traits = std::char_traits<CharT>>
-class BasicStringPiece;
-using StringPiece = BasicStringPiece<char>;
-using StringPiece16 = BasicStringPiece<char16_t>;
-using WStringPiece = BasicStringPiece<wchar_t>;
+using BasicStringPiece = std::basic_string_view<CharT, Traits>;
+using StringPiece = std::string_view;
+using StringPiece16 = std::u16string_view;
+using WStringPiece = std::wstring_view;
} // namespace base
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc
index 35ea795..b77edbd 100644
--- a/base/strings/string_piece_unittest.cc
+++ b/base/strings/string_piece_unittest.cc
@@ -518,7 +518,7 @@
std::basic_string<TypeParam> s1(TestFixture::as_string("123"));
s1 += static_cast<TypeParam>('\0');
s1 += TestFixture::as_string("456");
- BasicStringPiece<TypeParam> b(s1);
+ [[maybe_unused]] BasicStringPiece<TypeParam> b(s1);
BasicStringPiece<TypeParam> e;
std::basic_string<TypeParam> s2;
@@ -565,7 +565,7 @@
std::string s1("123");
s1 += '\0';
s1 += "456";
- StringPiece b(s1);
+ [[maybe_unused]] StringPiece b(s1);
StringPiece e;
std::string s2;
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index 082e42d..d40c11b 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -421,6 +421,28 @@
}
template <typename Char>
+inline bool IsAsciiControl(Char c) {
+ if constexpr (std::is_signed_v<Char>) {
+ if (c < 0) {
+ return false;
+ }
+ }
+ return c <= 0x1f || c == 0x7f;
+}
+
+template <typename Char>
+inline bool IsUnicodeControl(Char c) {
+ return IsAsciiControl(c) ||
+ // C1 control characters: http://unicode.org/charts/PDF/U0080.pdf
+ (c >= 0x80 && c <= 0x9F);
+}
+
+template <typename Char>
+inline bool IsAsciiPunctuation(Char c) {
+ return c > 0x20 && c < 0x7f && !IsAsciiAlphaNumeric(c);
+}
+
+template <typename Char>
inline bool IsHexDigit(Char c) {
return (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
diff --git a/base/strings/string_util_impl_helpers.h b/base/strings/string_util_impl_helpers.h
index 9578477..fb15c87 100644
--- a/base/strings/string_util_impl_helpers.h
+++ b/base/strings/string_util_impl_helpers.h
@@ -481,12 +481,12 @@
auto iter = parts.begin();
GURL_DCHECK(iter != parts.end());
- result.append(iter->data(), iter->size());
+ result.append(*iter);
++iter;
for (; iter != parts.end(); ++iter) {
- result.append(sep.data(), sep.size());
- result.append(iter->data(), iter->size());
+ result.append(sep);
+ result.append(*iter);
}
// Sanity-check that we pre-allocated correctly.
diff --git a/base/strings/to_string.h b/base/strings/to_string.h
new file mode 100644
index 0000000..78691a0
--- /dev/null
+++ b/base/strings/to_string.h
@@ -0,0 +1,117 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_TO_STRING_H_
+#define BASE_STRINGS_TO_STRING_H_
+
+#include <ios>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "base/template_util.h"
+#include "base/types/supports_ostream_operator.h"
+
+namespace gurl_base {
+
+namespace internal {
+
+template <typename T, typename = void>
+struct SupportsToString : std::false_type {};
+template <typename T>
+struct SupportsToString<T, decltype(void(std::declval<T>().ToString()))>
+ : std::true_type {};
+
+// I/O manipulators are function pointers, but should be sent directly to the
+// `ostream` instead of being cast to `const void*` like other function
+// pointers.
+template <typename T, typename = void>
+constexpr bool IsIomanip = false;
+template <typename T>
+constexpr bool
+ IsIomanip<T&(T&), std::enable_if_t<std::is_base_of_v<std::ios_base, T>>> =
+ true;
+
+// Function pointers implicitly convert to `bool`, so use this to avoid printing
+// function pointers as 1 or 0.
+template <typename T, typename = void>
+constexpr bool WillBeIncorrectlyStreamedAsBool = false;
+template <typename T>
+constexpr bool WillBeIncorrectlyStreamedAsBool<
+ T,
+ std::enable_if_t<std::is_function_v<std::remove_pointer_t<T>> &&
+ !IsIomanip<std::remove_pointer_t<T>>>> = true;
+
+// Fallback case when there is no better representation.
+template <typename T, typename = void>
+struct ToStringHelper {
+ static void Stringify(const T& v, std::ostringstream& ss) {
+ ss << "[" << sizeof(v) << "-byte object at 0x" << std::addressof(v) << "]";
+ }
+};
+
+// Most streamables.
+template <typename T>
+struct ToStringHelper<
+ T,
+ std::enable_if_t<SupportsOstreamOperator<const T&>::value &&
+ !WillBeIncorrectlyStreamedAsBool<T>>> {
+ static void Stringify(const T& v, std::ostringstream& ss) { ss << v; }
+};
+
+// Functions and function pointers.
+template <typename T>
+struct ToStringHelper<
+ T,
+ std::enable_if_t<SupportsOstreamOperator<const T&>::value &&
+ WillBeIncorrectlyStreamedAsBool<T>>> {
+ static void Stringify(const T& v, std::ostringstream& ss) {
+ ToStringHelper<const void*>::Stringify(reinterpret_cast<const void*>(v),
+ ss);
+ }
+};
+
+// Non-streamables that have a `ToString` member.
+template <typename T>
+struct ToStringHelper<
+ T,
+ std::enable_if_t<!SupportsOstreamOperator<const T&>::value &&
+ SupportsToString<const T&>::value>> {
+ static void Stringify(const T& v, std::ostringstream& ss) {
+ // .ToString() may not return a std::string, e.g. blink::WTF::String.
+ ToStringHelper<decltype(v.ToString())>::Stringify(v.ToString(), ss);
+ }
+};
+
+// Non-streamable enums (i.e. scoped enums where no `operator<<` overload was
+// declared).
+template <typename T>
+struct ToStringHelper<
+ T,
+ std::enable_if_t<!SupportsOstreamOperator<const T&>::value &&
+ std::is_enum_v<T>>> {
+ static void Stringify(const T& v, std::ostringstream& ss) {
+ using UT = typename std::underlying_type_t<T>;
+ ToStringHelper<UT>::Stringify(static_cast<UT>(v), ss);
+ }
+};
+
+} // namespace internal
+
+// Converts any type to a string, preferring defined operator<<() or ToString()
+// methods if they exist.
+template <typename... Ts>
+std::string ToString(const Ts&... values) {
+ std::ostringstream ss;
+ (internal::ToStringHelper<remove_cvref_t<decltype(values)>>::Stringify(values,
+ ss),
+ ...);
+ return ss.str();
+}
+
+} // namespace base
+
+#endif // BASE_STRINGS_TO_STRING_H_
diff --git a/base/strings/to_string_test.cc b/base/strings/to_string_test.cc
new file mode 100644
index 0000000..87d021f
--- /dev/null
+++ b/base/strings/to_string_test.cc
@@ -0,0 +1,115 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/to_string.h"
+
+#include <ios>
+#include <ostream>
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace gurl_base {
+namespace {
+
+class NotStringifiable {};
+class HasToString {
+ public:
+ std::string ToString() const { return "yay!"; }
+};
+
+// .ToString() support on structs.
+static_assert(!internal::SupportsToString<NotStringifiable>::value,
+ "value without ToString() shouldn't be marked SupportsToString");
+static_assert(!internal::SupportsToString<const NotStringifiable&>::value,
+ "const& without ToString() shouldn't be marked SupportsToString");
+static_assert(internal::SupportsToString<HasToString>::value,
+ "value with ToString() should be marked SupportsToString");
+static_assert(internal::SupportsToString<const HasToString&>::value,
+ "const& with ToString() should be marked SupportsToString");
+
+TEST(ToStringTest, Streamable) {
+ // Types with built-in <<.
+ EXPECT_EQ(ToString("foo"), "foo");
+ EXPECT_EQ(ToString(123), "123");
+}
+
+enum class StreamableTestEnum { kGreeting, kLocation };
+
+std::ostream& operator<<(std::ostream& os, const StreamableTestEnum& value) {
+ switch (value) {
+ case StreamableTestEnum::kGreeting:
+ return os << "hello";
+ case StreamableTestEnum::kLocation:
+ return os << "world";
+ }
+}
+
+TEST(ToStringTest, UserDefinedStreamable) {
+ // Type with user-defined <<.
+ EXPECT_EQ(ToString(StreamableTestEnum::kGreeting), "hello");
+ EXPECT_EQ(ToString(StreamableTestEnum::kGreeting, " ",
+ StreamableTestEnum::kLocation),
+ "hello world");
+}
+
+TEST(ToStringTest, UserDefinedToString) {
+ // Type with user-defined ToString().
+ EXPECT_EQ(ToString(HasToString()), "yay!");
+}
+
+class UnusualToString {
+ public:
+ HasToString ToString() const { return HasToString(); }
+};
+
+TEST(ToStringTest, ToStringReturnsNonStdString) {
+ // Types with a ToString() that does not directly return a std::string should
+ // still work.
+ EXPECT_EQ(ToString(UnusualToString()), "yay!");
+}
+
+enum class NonStreamableTestEnum { kGreeting = 0, kLocation };
+
+TEST(ToStringTest, ScopedEnum) {
+ // Scoped enums without a defined << should print as their underlying type.
+ EXPECT_EQ(ToString(NonStreamableTestEnum::kLocation), "1");
+}
+
+TEST(ToStringTest, IoManip) {
+ // I/O manipulators should have their expected effect, not be printed as
+ // function pointers.
+ EXPECT_EQ(ToString("42 in hex is ", std::hex, 42), "42 in hex is 2a");
+}
+
+void Func() {}
+
+TEST(ToStringTest, FunctionPointer) {
+ // We don't care about the actual address, but a function pointer should not
+ // be implicitly converted to bool.
+ EXPECT_NE(ToString(&Func), ToString(true));
+
+ // Functions should be treated like function pointers.
+ EXPECT_EQ(ToString(Func), ToString(&Func));
+}
+
+class OverloadsAddressOp {
+ public:
+ OverloadsAddressOp* operator&() { return nullptr; }
+ const OverloadsAddressOp* operator&() const { return nullptr; }
+};
+
+TEST(ToStringTest, NonStringifiable) {
+ // Non-stringifiable types should be printed using a fallback.
+ EXPECT_NE(ToString(NotStringifiable()).find("-byte object at 0x"),
+ std::string::npos);
+
+ // Non-stringifiable types which overload operator& should print their real
+ // address.
+ EXPECT_NE(ToString(OverloadsAddressOp()),
+ ToString(static_cast<OverloadsAddressOp*>(nullptr)));
+}
+
+} // namespace
+} // namespace base
diff --git a/base/strings/utf_ostream_operators.cc b/base/strings/utf_ostream_operators.cc
new file mode 100644
index 0000000..2f60140
--- /dev/null
+++ b/base/strings/utf_ostream_operators.cc
@@ -0,0 +1,31 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/utf_ostream_operators.h"
+
+#include "base/strings/utf_string_conversions.h"
+
+std::ostream& std::operator<<(std::ostream& out, const wchar_t* wstr) {
+ return out << (wstr ? std::wstring_view(wstr) : std::wstring_view());
+}
+
+std::ostream& std::operator<<(std::ostream& out, std::wstring_view wstr) {
+ return out << gurl_base::WideToUTF8(wstr);
+}
+
+std::ostream& std::operator<<(std::ostream& out, const std::wstring& wstr) {
+ return out << std::wstring_view(wstr);
+}
+
+std::ostream& std::operator<<(std::ostream& out, const char16_t* str16) {
+ return out << (str16 ? std::u16string_view(str16) : std::u16string_view());
+}
+
+std::ostream& std::operator<<(std::ostream& out, std::u16string_view str16) {
+ return out << gurl_base::UTF16ToUTF8(str16);
+}
+
+std::ostream& std::operator<<(std::ostream& out, const std::u16string& str16) {
+ return out << std::u16string_view(str16);
+}
diff --git a/base/strings/utf_ostream_operators.h b/base/strings/utf_ostream_operators.h
new file mode 100644
index 0000000..6fca090
--- /dev/null
+++ b/base/strings/utf_ostream_operators.h
@@ -0,0 +1,45 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_STRINGS_UTF_OSTREAM_OPERATORS_H_
+#define BASE_STRINGS_UTF_OSTREAM_OPERATORS_H_
+
+#include <iosfwd>
+#include <string_view>
+
+#include "polyfills/base/base_export.h"
+
+// Note that "The behavior of a C++ program is undefined if it adds declarations
+// or definitions to namespace std or to a namespace within namespace std unless
+// otherwise specified." --C++11[namespace.std]
+//
+// We've checked that this particular definition has the intended behavior on
+// our implementations, but it's prone to breaking in the future, and please
+// don't imitate this in your own definitions without checking with some
+// standard library experts.
+namespace std {
+// These functions are provided as a convenience for logging, which is where we
+// use streams (it is against Google style to use streams in other places). It
+// is designed to allow you to emit non-ASCII Unicode strings to the log file,
+// which is normally ASCII. It is relatively slow, so try not to use it for
+// common cases. Non-ASCII characters will be converted to UTF-8 by these
+// operators.
+//
+// The `std::basic_string<T>` overloads are necessary to allow logging types
+// which are implicitly convertible to `std::basic_string<T>`. Simply taking
+// `std::basic_string_view<T>` would not work because C++ only allows one
+// implicit conversion.
+BASE_EXPORT std::ostream& operator<<(std::ostream& out, const wchar_t* wstr);
+BASE_EXPORT std::ostream& operator<<(std::ostream& out, std::wstring_view wstr);
+BASE_EXPORT std::ostream& operator<<(std::ostream& out,
+ const std::wstring& wstr);
+
+BASE_EXPORT std::ostream& operator<<(std::ostream& out, const char16_t* str16);
+BASE_EXPORT std::ostream& operator<<(std::ostream& out,
+ std::u16string_view str16);
+BASE_EXPORT std::ostream& operator<<(std::ostream& out,
+ const std::u16string& str16);
+} // namespace std
+
+#endif // BASE_STRINGS_UTF_OSTREAM_OPERATORS_H_
diff --git a/base/strings/utf_string_conversion_utils.cc b/base/strings/utf_string_conversion_utils.cc
index 162fa36..149a6ee 100644
--- a/base/strings/utf_string_conversion_utils.cc
+++ b/base/strings/utf_string_conversion_utils.cc
@@ -9,6 +9,21 @@
namespace gurl_base {
+// CountUnicodeCharacters ------------------------------------------------------
+
+absl::optional<size_t> CountUnicodeCharacters(const char16_t* src,
+ size_t src_len,
+ size_t limit) {
+ base_icu::UChar32 unused = 0;
+ size_t count = 0;
+ for (size_t index = 0; count < limit && index < src_len; ++count, ++index) {
+ if (!ReadUnicodeCharacter(src, src_len, &index, &unused)) {
+ return absl::nullopt;
+ }
+ }
+ return count;
+}
+
// ReadUnicodeCharacter --------------------------------------------------------
bool ReadUnicodeCharacter(const char* src,
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h
index f5ae5b1..638a59f 100644
--- a/base/strings/utf_string_conversion_utils.h
+++ b/base/strings/utf_string_conversion_utils.h
@@ -16,6 +16,7 @@
#include "polyfills/base/base_export.h"
#include "base/third_party/icu/icu_utf.h"
#include "build/build_config.h"
+#include "absl/types/optional.h"
namespace gurl_base {
@@ -39,6 +40,15 @@
(code_point & 0xFFFE) != 0xFFFE);
}
+// CountUnicodeCharacters ------------------------------------------------------
+
+// Returns the number of Unicode characters in `text`, up to the supplied
+// `limit`, if `text` contains valid UTF-16. Returns `nullopt` otherwise.
+BASE_EXPORT absl::optional<size_t> CountUnicodeCharacters(
+ const char16_t* src,
+ size_t src_len,
+ size_t limit = std::numeric_limits<size_t>::max());
+
// ReadUnicodeCharacter --------------------------------------------------------
// Reads a UTF-8 stream, placing the next code point into the given output
diff --git a/base/strings/utf_string_conversion_utils_unittest.cc b/base/strings/utf_string_conversion_utils_unittest.cc
new file mode 100644
index 0000000..4e700dc
--- /dev/null
+++ b/base/strings/utf_string_conversion_utils_unittest.cc
@@ -0,0 +1,32 @@
+// Copyright 2023 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/utf_string_conversion_utils.h"
+
+#include <string>
+
+#include "base/strings/string_piece.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace gurl_base {
+
+TEST(UtfStringConversionUtilsTest, CountUnicodeCharacters) {
+ struct TestCase {
+ std::u16string value;
+ size_t limit;
+ absl::optional<size_t> count;
+ } test_cases[] = {
+ {u"", 0, 0}, {u"abc", 1, 1},
+ {u"abc", 3, 3}, {u"abc", 0, 0},
+ {u"abc", 4, 3}, {u"abc\U0001F4A9", 4, 4},
+ {u"\U0001F4A9", 1, 1}, {{1, 0xD801u}, 5, absl::nullopt},
+ };
+ for (const auto& test_case : test_cases) {
+ EXPECT_EQ(CountUnicodeCharacters(test_case.value.data(),
+ test_case.value.length(), test_case.limit),
+ test_case.count);
+ }
+}
+
+} // namespace base
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h
index 975d29d..1ee702c 100644
--- a/base/strings/utf_string_conversions.h
+++ b/base/strings/utf_string_conversions.h
@@ -11,6 +11,7 @@
#include "polyfills/base/base_export.h"
#include "base/strings/string_piece.h"
+#include "base/types/always_false.h"
#include "build/build_config.h"
namespace gurl_base {
@@ -70,19 +71,22 @@
// compile time.
template <size_t N>
std::u16string WideToUTF16(const wchar_t (&str)[N]) {
- static_assert(N == 0, "Error: Use the u\"...\" prefix instead.");
+ static_assert(AlwaysFalse<decltype(N)>,
+ "Error: Use the u\"...\" prefix instead.");
return std::u16string();
}
template <size_t N>
std::u16string UTF8ToUTF16(const char (&str)[N]) {
- static_assert(N == 0, "Error: Use the u\"...\" prefix instead.");
+ static_assert(AlwaysFalse<decltype(N)>,
+ "Error: Use the u\"...\" prefix instead.");
return std::u16string();
}
template <size_t N>
std::u16string ASCIIToUTF16(const char (&str)[N]) {
- static_assert(N == 0, "Error: Use the u\"...\" prefix instead.");
+ static_assert(AlwaysFalse<decltype(N)>,
+ "Error: Use the u\"...\" prefix instead.");
return std::u16string();
}
diff --git a/base/template_util.h b/base/template_util.h
index 93b7c31..480eaca 100644
--- a/base/template_util.h
+++ b/base/template_util.h
@@ -18,12 +18,6 @@
namespace internal {
-template <typename T, typename = void>
-struct SupportsToString : std::false_type {};
-template <typename T>
-struct SupportsToString<T, decltype(void(std::declval<T>().ToString()))>
- : std::true_type {};
-
// Used to detech whether the given type is an iterator. This is normally used
// with std::enable_if to provide disambiguation for functions that take
// templatzed iterators as input.
diff --git a/base/types/always_false.h b/base/types/always_false.h
new file mode 100644
index 0000000..7f43695
--- /dev/null
+++ b/base/types/always_false.h
@@ -0,0 +1,48 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_TYPES_ALWAYS_FALSE_H_
+#define BASE_TYPES_ALWAYS_FALSE_H_
+
+namespace gurl_base {
+
+// A helper that can be used with a static_assert() that must always fail (e.g.
+// for an undesirable template instantiation). Such a static_assert() cannot
+// simply be written as static_assert(false, ...) because that would always fail
+// to compile, even if the template was never instantiated. Instead, a common
+// idiom is to force the static_assert() to depend on a template parameter so
+// that it is only evaluated when the template is instantiated:
+//
+// template <typename U = T>
+// void SomeDangerousMethodThatShouldNeverCompile() {
+// static_assert(gurl_base::AlwaysFalse<U>, "explanatory message here");
+// }
+//
+//
+// The issue of not being able to use static_assert(false, ...) in a
+// non-instantiated template was fixed in C++23. When Chromium switches to
+// building with C++23, remove this file and use false directly, and search
+// across the Chromium codebase for "AlwaysFalse", as there are other
+// implementations in places that cannot depend on this file.
+//
+// References:
+// - https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2593r0.html
+// - https://github.com/cplusplus/papers/issues/1251
+
+namespace internal {
+
+template <typename... Args>
+struct AlwaysFalseHelper {
+ static constexpr bool kValue = false;
+};
+
+} // namespace internal
+
+template <typename... Args>
+inline constexpr bool AlwaysFalse =
+ internal::AlwaysFalseHelper<Args...>::kValue;
+
+} // namespace base
+
+#endif // BASE_TYPES_ALWAYS_FALSE_H_
diff --git a/build/build_config.h b/build/build_config.h
index 6db5d9b..2484703 100644
--- a/build/build_config.h
+++ b/build/build_config.h
@@ -33,13 +33,13 @@
// COMPILER_MSVC / COMPILER_GCC
//
// Processor:
-// ARCH_CPU_ARM64 / ARCH_CPU_ARMEL / ARCH_CPU_LOONG32 / ARCH_CPU_LOONG64 /
-// ARCH_CPU_MIPS / ARCH_CPU_MIPS64 / ARCH_CPU_MIPS64EL / ARCH_CPU_MIPSEL /
-// ARCH_CPU_PPC64 / ARCH_CPU_S390 / ARCH_CPU_S390X / ARCH_CPU_X86 /
-// ARCH_CPU_X86_64 / ARCH_CPU_RISCV64
+// ARCH_CPU_ARM64 / ARCH_CPU_ARMEL / ARCH_CPU_LOONGARCH32 /
+// ARCH_CPU_LOONGARCH64 / ARCH_CPU_MIPS / ARCH_CPU_MIPS64 /
+// ARCH_CPU_MIPS64EL / ARCH_CPU_MIPSEL / ARCH_CPU_PPC64 / ARCH_CPU_S390 /
+// ARCH_CPU_S390X / ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_RISCV64
// Processor family:
// ARCH_CPU_ARM_FAMILY: ARMEL or ARM64
-// ARCH_CPU_LOONG_FAMILY: LOONG32 or LOONG64
+// ARCH_CPU_LOONGARCH_FAMILY: LOONGARCH32 or LOONGARCH64
// ARCH_CPU_MIPS_FAMILY: MIPS64EL or MIPSEL or MIPS64 or MIPS
// ARCH_CPU_PPC64_FAMILY: PPC64
// ARCH_CPU_S390_FAMILY: S390 or S390X
@@ -335,16 +335,16 @@
#define ARCH_CPU_32_BITS 1
#define ARCH_CPU_BIG_ENDIAN 1
#endif
-#elif defined(__loongarch32)
-#define ARCH_CPU_LOONG_FAMILY 1
-#define ARCH_CPU_LOONG32 1
-#define ARCH_CPU_32_BITS 1
+#elif defined(__loongarch__)
+#define ARCH_CPU_LOONGARCH_FAMILY 1
#define ARCH_CPU_LITTLE_ENDIAN 1
-#elif defined(__loongarch64)
-#define ARCH_CPU_LOONG_FAMILY 1
-#define ARCH_CPU_LOONG64 1
+#if __loongarch_grlen == 64
+#define ARCH_CPU_LOONGARCH64 1
#define ARCH_CPU_64_BITS 1
-#define ARCH_CPU_LITTLE_ENDIAN 1
+#else
+#define ARCH_CPU_LOONGARCH32 1
+#define ARCH_CPU_32_BITS 1
+#endif
#elif defined(__riscv) && (__riscv_xlen == 64)
#define ARCH_CPU_RISCV_FAMILY 1
#define ARCH_CPU_RISCV64 1
diff --git a/copy.bara.sky b/copy.bara.sky
index 7f21a09..9020cbf 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -37,6 +37,7 @@
"base/strings/*.cc",
"base/strings/*.h",
"base/template_util.h",
+ "base/types/always_false.h",
"base/third_party/icu/**",
"base/win/win_handle_types.h",
"base/win/win_handle_types_list.inc",
@@ -106,6 +107,7 @@
# iOS version of url_idna is ASCII-only, but it uses .mm extension; rename
# it to a .cc file.
core.move("url/url_idna_icu_alternatives_ios.mm", "url/url_idna_ascii_only.cc"),
+ core.replace("!defined(__has_feature) || !__has_feature(objc_arc)", "false"),
# Fix some Perfetto includes.
core.replace("base/trace_event/base_tracing.h", "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"),
diff --git a/polyfills/base/base_export.h b/polyfills/base/base_export.h
index 209e910..e02383e 100644
--- a/polyfills/base/base_export.h
+++ b/polyfills/base/base_export.h
@@ -5,6 +5,9 @@
#ifndef POLYFILLS_BASE_BASE_EXPORT_H_
#define POLYFILLS_BASE_BASE_EXPORT_H_
+// Required to get some of the headers to compile.
+#include <limits>
+
#define BASE_EXPORT
#endif /* POLYFILLS_BASE_BASE_EXPORT_H_ */
diff --git a/polyfills/base/feature_list.h b/polyfills/base/feature_list.h
index b687509..8ad7a08 100644
--- a/polyfills/base/feature_list.h
+++ b/polyfills/base/feature_list.h
@@ -30,6 +30,8 @@
static bool IsEnabled(const Feature& feature) {
return feature.default_state == FEATURE_ENABLED_BY_DEFAULT;
}
+
+ static FeatureList* GetInstance() { return nullptr; }
};
} // namespace gurl_base
diff --git a/polyfills/base/logging.h b/polyfills/base/logging.h
index 3d7aadc..42a5068 100644
--- a/polyfills/base/logging.h
+++ b/polyfills/base/logging.h
@@ -36,6 +36,6 @@
#define GURL_DCHECK(statement) GurlFakeLogSink({statement})
#define GURL_DLOG(severity) GurlFakeLogSink(true)
#define GURL_LOG(severity) GurlFakeLogSink(true)
-#define GURL_NOTREACHED()
+#define GURL_NOTREACHED() GurlFakeLogSink(true)
#endif /* POLYFILLS_BASE_LOGGING_H_ */
diff --git a/url/gurl.cc b/url/gurl.cc
index 73e1070..dea8c7a 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -13,6 +13,7 @@
#include "polyfills/base/check_op.h"
#include "base/no_destructor.h"
+#include "polyfills/base/notreached.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
@@ -158,7 +159,9 @@
if (is_valid_ || spec_.empty())
return spec_;
- GURL_DCHECK(false) << "Trying to get the spec of an invalid URL!";
+ // TODO(crbug.com/851128): Make sure this no longer hits before making
+ // NOTREACHED_NORETURN();
+ GURL_NOTREACHED() << "Trying to get the spec of an invalid URL!";
return gurl_base::EmptyString();
}
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index dd19af2..da0fb68 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -81,10 +81,11 @@
}
for (; i < length; i++) {
if (static_cast<UCHAR>(source[i]) >= 0x80) {
- // ReadChar will fill the code point with kUnicodeReplacementCharacter
- // when the input is invalid, which is what we want.
+ // ReadUTFCharLossy will fill the code point with
+ // kUnicodeReplacementCharacter when the input is invalid, which is what
+ // we want.
base_icu::UChar32 code_point;
- ReadUTFChar(source, &i, length, &code_point);
+ ReadUTFCharLossy(source, &i, length, &code_point);
AppendUTF8EscapedValue(code_point, output);
} else {
// Just append the 7-bit character, possibly escaping it.
@@ -312,24 +313,22 @@
DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
-bool ReadUTFChar(const char* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out) {
- if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
- !gurl_base::IsValidCharacter(*code_point_out)) {
+bool ReadUTFCharLossy(const char* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out) {
+ if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
return false;
}
return true;
}
-bool ReadUTFChar(const char16_t* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out) {
- if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
- !gurl_base::IsValidCharacter(*code_point_out)) {
+bool ReadUTFCharLossy(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out) {
+ if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
return false;
}
@@ -356,7 +355,7 @@
bool success = true;
for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
- success &= ReadUTFChar(input, &i, input_len, &code_point);
+ success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
AppendUTF8Value(code_point, output);
}
return success;
@@ -368,7 +367,7 @@
bool success = true;
for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
- success &= ReadUTFChar(input, &i, input_len, &code_point);
+ success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
AppendUTF16Value(code_point, output);
}
return success;
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index b9ac5bf..199a8b7 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -144,19 +144,19 @@
// UTF-8 functions ------------------------------------------------------------
-// Reads one character in UTF-8 starting at |*begin| in |str| and places
-// the decoded value into |*code_point|. If the character is valid, we will
-// return true. If invalid, we'll return false and put the
-// kUnicodeReplacementCharacter into |*code_point|.
+// Reads one character in UTF-8 starting at |*begin| in |str|, places
+// the decoded value into |*code_point|, and returns true on success.
+// Otherwise, we'll return false and put the kUnicodeReplacementCharacter
+// into |*code_point|.
//
// |*begin| will be updated to point to the last character consumed so it
// can be incremented in a loop and will be ready for the next character.
// (for a single-byte ASCII character, it will not be changed).
COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const char* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out);
+bool ReadUTFCharLossy(const char* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out);
// Generic To-UTF-8 converter. This will call the given append method for each
// character that should be appended, with the given output method. Wrappers
@@ -216,19 +216,19 @@
// UTF-16 functions -----------------------------------------------------------
-// Reads one character in UTF-16 starting at |*begin| in |str| and places
-// the decoded value into |*code_point|. If the character is valid, we will
-// return true. If invalid, we'll return false and put the
-// kUnicodeReplacementCharacter into |*code_point|.
+// Reads one character in UTF-16 starting at |*begin| in |str|, places
+// the decoded value into |*code_point|, and returns true on success.
+// Otherwise, we'll return false and put the kUnicodeReplacementCharacter
+// into |*code_point|.
//
// |*begin| will be updated to point to the last character consumed so it
// can be incremented in a loop and will be ready for the next character.
// (for a single-16-bit-word character, it will not be changed).
COMPONENT_EXPORT(URL)
-bool ReadUTFChar(const char16_t* str,
- size_t* begin,
- size_t length,
- base_icu::UChar32* code_point_out);
+bool ReadUTFCharLossy(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
inline void AppendUTF16Value(base_icu::UChar32 code_point,
@@ -266,11 +266,11 @@
size_t* begin,
size_t length,
CanonOutput* output) {
- // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
- // us the kUnicodeReplacementCharacter, so we don't have to do special
+ // UTF-16 input. ReadUTFCharLossy will handle invalid characters for us and
+ // give us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
base_icu::UChar32 char_value;
- bool success = ReadUTFChar(str, begin, length, &char_value);
+ bool success = ReadUTFCharLossy(str, begin, length, &char_value);
AppendUTF8EscapedValue(char_value, output);
return success;
}
@@ -280,11 +280,11 @@
size_t* begin,
size_t length,
CanonOutput* output) {
- // ReadUTF8Char will handle invalid characters for us and give us the
+ // ReadUTFCharLossy will handle invalid characters for us and give us the
// kUnicodeReplacementCharacter, so we don't have to do special checking
// after failure, just pass through the failure to the caller.
base_icu::UChar32 ch;
- bool success = ReadUTFChar(str, begin, length, &ch);
+ bool success = ReadUTFCharLossy(str, begin, length, &ch);
AppendUTF8EscapedValue(ch, output);
return success;
}
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index db875a8..e927f74 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -335,13 +335,10 @@
L"bar.com",
"www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
// Invalid unicode characters should fail...
- // ...In wide input, ICU will barf and we'll end up with the input as
- // escaped UTF-8 (the invalid character should be replaced with the
- // replacement character).
- {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com",
+ {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%B7%90zyx.com",
Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
// ...This is the same as previous but with with escaped.
- {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com",
+ {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%B7%90zyx.com",
Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
// Test name prepping, fullwidth input should be converted to ASCII and
// NOT
@@ -1083,7 +1080,7 @@
test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(),
Component(0, 8), &output));
output.Complete();
- EXPECT_EQ("%EF%BF%BDzyx.com", out_str);
+ EXPECT_EQ("%EF%B7%90zyx.com", out_str);
}
// Should return true for empty input strings.
@@ -1325,10 +1322,9 @@
{"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
Component(0, 37), true},
- // Invalid unicode characters should fail. We only do validation on
- // UTF-16 input, so this doesn't happen on 8-bit.
+ // Unicode Noncharacter (U+FDD0) should not fail.
{"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
- {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+ {nullptr, L"/\xfdd0zyx", "/%EF%B7%90zyx", Component(0, 13), true},
};
typedef bool (*CanonFunc8Bit)(const char*,
@@ -1507,7 +1503,7 @@
{"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true},
{nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true},
// Test a Unicode invalid character.
- {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true},
+ {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%B7%90", Component(1, 10), true},
// Refs can have # signs and we should preserve them.
{"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true},
{"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
@@ -2129,9 +2125,9 @@
{"JavaScript:Foo", "javascript:Foo"},
{"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
- // Validation errors should not cause failure. See
+ // Unicode invalid characters should not cause failure. See
// https://crbug.com/925614.
- {"javascript:\uFFFF", "javascript:%EF%BF%BD"},
+ {"javascript:\uFFFF", "javascript:%EF%BF%BF"},
};
for (size_t i = 0; i < std::size(path_cases); i++) {
@@ -2169,7 +2165,7 @@
{"Foo", L"Foo", "Foo"},
{"\":This /is interesting;?#", L"\":This /is interesting;?#",
"\":This /is interesting;?#"},
- {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+ {"\uFFFF", L"\uFFFF", "%EF%BF%BF"},
};
for (size_t i = 0; i < std::size(path_cases); i++) {
diff --git a/url/url_features.cc b/url/url_features.cc
index b82afd7..3602bdd 100644
--- a/url/url_features.cc
+++ b/url/url_features.cc
@@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "url/url_features.h"
+#include "polyfills/base/feature_list.h"
namespace url {
@@ -25,6 +26,13 @@
gurl_base::FEATURE_ENABLED_BY_DEFAULT);
bool IsUsingIDNA2008NonTransitional() {
+ // If the FeatureList isn't available yet, fall back to the feature's default
+ // state. This may happen during early startup, see crbug.com/1441956.
+ if (!gurl_base::FeatureList::GetInstance()) {
+ return kUseIDNA2008NonTransitional.default_state ==
+ gurl_base::FEATURE_ENABLED_BY_DEFAULT;
+ }
+
return gurl_base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
}
diff --git a/url/url_idna_ascii_only.cc b/url/url_idna_ascii_only.cc
index 2a4f0d3..921858a 100644
--- a/url/url_idna_ascii_only.cc
+++ b/url/url_idna_ascii_only.cc
@@ -12,6 +12,10 @@
#include "base/strings/utf_string_conversions.h"
#include "url/url_canon_internal.h"
+#if false
+#error "This file requires ARC support."
+#endif
+
namespace url {
// Only allow ASCII to avoid ICU dependency. Use NSString+IDN
diff --git a/url/url_util.cc b/url/url_util.cc
index 705ec2e..1fb663e 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -885,8 +885,8 @@
// character.
size_t next_character = i;
base_icu::UChar32 code_point;
- if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
- &code_point)) {
+ if (ReadUTFCharLossy(unescaped_chars.data(), &next_character,
+ unescaped_length, &code_point)) {
// Valid UTF-8 character, convert to UTF-16.
AppendUTF16Value(code_point, output);
i = next_character;
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index a220af9..b052c3d 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -247,6 +247,13 @@
{"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
"pqrstuvwxyz{|}~\x7f/"},
{"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+ // U+FFFF (Noncharacter) should not be replaced with U+FFFD (Replacement
+ // Character) (http://crbug.com/1416021)
+ {"%ef%bf%bf", "\xef\xbf\xbf"},
+ // U+FDD0 (Noncharacter)
+ {"%ef%b7%90", "\xef\xb7\x90"},
+ // U+FFFD (Replacement Character)
+ {"%ef%bf%bd", "\xef\xbf\xbd"},
};
for (size_t i = 0; i < std::size(decode_cases); i++) {