Update googleurl to the latest version
This updates googleurl to the bfe9d19846d916ccc17fb8c5d9e1d256fc77a243
revision of upstream from Mon Mar 16 23:52:37 2020 +0000.
diff --git a/AUTHORS b/AUTHORS
index 13d16fb..a8d90ac 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -7,7 +7,10 @@
# Organization <fnmatch pattern>
#
# See python fnmatch module documentation for more information.
+#
+# Please keep the list sorted.
+# BEGIN individuals section.
Aaron Boushley <boushley@gmail.com>
Aaron Jacobs <samusaaron3@gmail.com>
Aaron Leventhal <aaronlevbugs@gmail.com>
@@ -18,9 +21,11 @@
Abhishek Agarwal <abhishek.a21@samsung.com>
Abhishek Kanike <abhishek.ka@samsung.com>
Abhishek Singh <abhi.rathore@samsung.com>
+Abul Hasan Md Osama <ahm.osama@samsung.com>
Adam Bonner <abonner-chromium@solscope.com>
Adam Bujalski <abujalski@gmail.com>
Adam Kallai <kadam@inf.u-szeged.hu>
+Adam Labuda <a.labuda@samsung.com>
Adam Roben <adam@github.com>
Adam Treat <adam.treat@samsung.com>
Adam Yi <i@adamyi.com>
@@ -41,6 +46,7 @@
Alex Scheele <alexscheele@gmail.com>
Alexander Douglas <agdoug@amazon.com>
Alexander Guettler <alexander@guettler.io>
+Alexander Rezepkin <etu@vivaldi.net>
Alexander Shalamov <alexander.shalamov@intel.com>
Alexander Sulfrian <alexander@sulfrian.net>
Alexandre Abreu <wiss1976@gmail.com>
@@ -65,6 +71,7 @@
Amruth Raj <amruthraj@motorola.com>
Amruth Raj <ckqr36@motorola.com>
Anand Ratn <anand.ratn@samsung.com>
+Anant Jain <anant90@gmail.com>
Anastasios Cassiotis <tom.cassiotis@gmail.com>
anatoly techtonik <techtonik@gmail.com>
Ancil George <ancilgeorge@samsung.com>
@@ -74,6 +81,7 @@
Andrei Parvu <parvu@adobe.com>
Andrew Boyarshin <andrew.boyarshin@gmail.com>
Andrew Brampton <me@bramp.net>
+Andrew Brindamour <abrindamour@bluejeans.com>
Andrew Hung <andrhung@amazon.com>
Andrew Jorgensen <ajorgens@amazon.com>
Andrew MacPherson <andrew.macpherson@soundtrap.com>
@@ -133,6 +141,7 @@
Branden Archer <bma4@zips.uakron.edu>
Brendan Kirby <brendan.kirby@imgtec.com>
Brendan Long <self@brendanlong.com>
+Brian Clifton <clifton@brave.com>
Brian G. Merrell <bgmerrell@gmail.com>
Brian Konzman, SJ <b.g.konzman@gmail.com>
Brian Luft <brian@electroly.com>
@@ -184,6 +193,7 @@
Clement Scheelfeldt Skau <clementskau@gmail.com>
Clinton Staley <clintstaley@gmail.com>
Connor Pearson <cjp822@gmail.com>
+Conrad Irwin <conrad.irwin@gmail.com>
Craig Schlenter <craig.schlenter@gmail.com>
Csaba Osztrogonác <ossy.szeged@gmail.com>
Daegyu Lee <na7jun8gi@gmail.com>
@@ -200,6 +210,7 @@
Daniel Lockyer <thisisdaniellockyer@gmail.com>
Daniel Nishi <dhnishi@gmail.com>
Daniel Platz <daplatz@googlemail.com>
+Daniel Playfair Cal <daniel.playfair.cal@gmail.com>
Daniel Shaulov <dshaulov@ptc.com>
Daniel Trebbien <dtrebbien@gmail.com>
Daniel Waxweiler <daniel.waxweiler@gmail.com>
@@ -213,6 +224,7 @@
David Fox <david@davidjfox.com>
David Futcher <david.mike.futcher@gmail.com>
David Leen <davileen@amazon.com>
+David Manouchehri <david@davidmanouchehri.com>
David McAllister <mcdavid@amazon.com>
David Michael Barr <david.barr@samsung.com>
David Spellman <dspell@amazon.com>
@@ -222,6 +234,7 @@
Debug Wang <debugwang@tencent.com>
Deepak Dilip Borade <deepak.db@samsung.com>
Deepak Mittal <deepak.m1@samsung.com>
+Deepak Mohan <hop2deep@gmail.com>
Deepak Sharma <deepak.sharma@amd.com>
Deepak Singla <deepak.s@samsung.com>
Deokjin Kim <deokjin81.kim@samsung.com>
@@ -273,13 +286,16 @@
Evangelos Foutras <evangelos@foutrelis.com>
Evgeniy Dushistov <dushistov@gmail.com>
Evgeny Agafonchikov <evgeny.agafonchikov@akvelon.com>
+Fabian Henneke <fabian.henneke@gmail.com>
Fabien Tassin <fta@sofaraway.org>
+Felipe Erias Morandeira <felipeerias@gmail.com>
Felix H. Dahlke <fhd@ubercode.de>
Fengrong Fang <fr.fang@samsung.com>
Fernando Jiménez Moreno <ferjmoreno@gmail.com>
Finbar Crago <finbar.crago@gmail.com>
François Beaufort <beaufort.francois@gmail.com>
Francois Kritzinger <francoisk777@gmail.com>
+Francois Marier <francois@brave.com>
Francois Rauch <leopardb@gmail.com>
Frankie Dintino <fdintino@theatlantic.com>
Franklin Ta <fta2012@gmail.com>
@@ -347,6 +363,7 @@
Huayong Xu <huayong.xu@samsung.com>
Hugo Holgersson <hugo.holgersson@sonymobile.com>
Hui Wang <wanghui07050707@gmail.com>
+Hui Wang <wanghui210@huawei.com>
Huiwon Jo <jhwon0415@gmail.com>
Huy Duong <huy.duongdinh@gmail.com>
Hwanseung Lee <hs1217.lee@gmail.com>
@@ -365,6 +382,8 @@
Ilia Demianenko <ilia.demianenko@gmail.com>
Ilia K <ki.stfu@gmail.com>
Ilya Konstantinov <ilya.konstantinov@gmail.com>
+Imam Mohammad Bokhary <imam.bokhary@samsung.com>
+Imranur Rahman <i.rahman@samsung.com>
Imranur Rahman <ir.shimul@gmail.com>
Ion Rosca <rosca@adobe.com>
Irmak Kavasoglu <irmakkavasoglu@gmail.com>
@@ -415,12 +434,16 @@
Jesse Miller <jesse@jmiller.biz>
Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com>
Jiadong Zhu <jiadong.zhu@linaro.org>
+Jiahe Zhang <jiahe.zhang@intel.com>
Jiajia Qin <jiajia.qin@intel.com>
Jiajie Hu <jiajie.hu@intel.com>
Jianjun Zhu <jianjun.zhu@intel.com>
Jianneng Zhong <muzuiget@gmail.com>
Jiawei Shao <jiawei.shao@intel.com>
+Jiaxun Wei <leuisken@gmail.com>
+Jiaxun Yang <jiaxun.yang@flygoat.com>
Jie Chen <jie.a.chen@intel.com>
+Jihan Chao <jihan@bluejeans.com>
Jihoon Chung <j.c@navercorp.com>
Jihoon Chung <jihoon@gmail.com>
Jihun Brent Kim <devgrapher@gmail.com>
@@ -451,9 +474,11 @@
Jonathan Frazer <listedegarde@gmail.com>
Jonathan Garbee <jonathan@garbee.me>
Jonathan Hacker <jhacker@arcanefour.com>
+Jonathan Kingston <kingstonmailbox@gmail.com>
Jongdeok Kim <jongdeok.kim@navercorp.com>
Jongheon Kim <sapzape@gmail.com>
JongKwon Lee <jongkwon.lee@navercorp.com>
+Jongmok Kim <jongmok.kim@navercorp.com>
Jongsoo Lee <leejongsoo@gmail.com>
Joone Hur <joone.hur@intel.com>
Joonghun Park <pjh0718@gmail.com>
@@ -466,6 +491,7 @@
Joshua Roesslein <jroesslein@gmail.com>
Josué Ratelle <jorat1346@gmail.com>
Josyula Venkat Narasimham <venkat.nj@samsung.com>
+Joyer Huang <collger@gmail.com>
Juan Jose Lopez Jaimez <jj.lopezjaimez@gmail.com>
Juhui Lee <juhui24.lee@samsung.com>
Julien Brianceau <jbriance@cisco.com>
@@ -479,6 +505,7 @@
JungJik Lee <jungjik.lee@samsung.com>
Jungkee Song <jungkee.song@samsung.com>
Junmin Zhu <junmin.zhu@intel.com>
+Junsong Li <ljs.darkfish@gmail.com>
Justin Okamoto <justmoto@amazon.com>
Justin Ribeiro <justin@justinribeiro.com>
Jüri Valdmann <juri.valdmann@qt.io>
@@ -503,12 +530,14 @@
Kenneth Rohde Christiansen <kenneth.r.christiansen@intel.com>
Kenneth Strickland <ken.strickland@gmail.com>
Kenneth Zhou <knthzh@gmail.com>
+Kenny Levinsen <kl@kl.wtf>
Keonho Kim <keonho07.kim@samsung.com>
Ketan Goyal <ketan.goyal@samsung.com>
Kevin Gibbons <bakkot@gmail.com>
Kevin Lee Helpingstine <sig11@reprehensible.net>
Kevin M. McCormick <mckev@amazon.com>
Khasim Syed Mohammed <khasim.mohammed@linaro.org>
+Khem Raj <raj.khem@gmail.com>
Kihong Kwon <kihong.kwon@samsung.com>
Kihoon Ko <rhrlgns777@gmail.com>
Kihwang Kim <pwangkk@gmail.com>
@@ -614,6 +643,7 @@
Md Jobed Hossain <jrony15@gmail.com>
Md Sami Uddin <md.sami@samsung.com>
Michael Cirone <mikecirone@gmail.com>
+Michael Forney <mforney@mforney.org>
Michael Gilbert <floppymaster@gmail.com>
Michael Lopez <lopes92290@gmail.com>
Michael Morrison <codebythepound@gmail.com>
@@ -630,6 +660,7 @@
Mikhail Pozdnyakov <mikhail.pozdnyakov@intel.com>
Milko Leporis <milko.leporis@imgtec.com>
Milton Chiang <milton.chiang@mediatek.com>
+Milutin Smiljanic <msmiljanic.gm@gmail.com>
Minggang Wang <minggang.wang@intel.com>
Mingmin Xie <melvinxie@gmail.com>
Minjeong Lee <apenr1234@gmail.com>
@@ -679,6 +710,7 @@
Oliver Dunk <oliver@oliverdunk.com>
Olli Raula (Old name Olli Syrjälä) <olli.raula@intel.com>
Omar Sandoval <osandov@osandov.com>
+Owen Yuwono <owenyuwono@gmail.com>
Pan Deng <pan.deng@intel.com>
Parag Radke <nrqv63@motorola.com>
Paritosh Kumar <paritosh.in@samsung.com>
@@ -701,6 +733,7 @@
Pavel Ivanov <paivanof@gmail.com>
Pawel Forysiuk <p.forysiuk@samsung.com>
Paweł Hajdan jr <phajdan.jr@gmail.com>
+Piotr Zarycki <piotr.zarycki@gmail.com>
Payal Pandey <payal.pandey@samsung.com>
Peng Hu <penghu@tencent.com>
Peng Jiang <leiyi.jp@gmail.com>
@@ -735,12 +768,14 @@
Qi Yang <qi1988.yang@samsung.com>
Qiankun Miao <qiankun.miao@intel.com>
Qing Zhang <qing.zhang@intel.com>
+Qingmei Li <qingmei.li@vivo.com>
Radu Stavila <stavila@adobe.com>
Radu Velea <radu.velea@intel.com>
Rafael Antognolli <rafael.antognolli@intel.com>
Raghavendra Ghatage <r.ghatage@samsung.com>
Raghu Ram Nagaraj <r.nagaraj@samsung.com>
Rahul Gupta <rahul.g@samsung.com>
+Rajesh Mahindra <rmahindra@uber.com>
Rajneesh Rana <rajneesh.r@samsung.com>
Raman Tenneti <raman.tenneti@gmail.com>
Ramkumar Gokarnesan <ramkumar.gokarnesan@gmail.com>
@@ -824,6 +859,7 @@
Sergey Kipet <sergey.kipet@gmail.com>
Sergey Putilin <p.sergey@samsung.com>
Sergey Shekyan <shekyan@gmail.com>
+Sergey Talantov <sergey.talantov@gmail.com>
Sergio Carlos Morales Angeles <carloschilazo@gmail.com>
Sergiy Belozorov <rryk.ua@gmail.com>
Seshadri Mahalingam <seshadri.mahalingam@gmail.com>
@@ -848,6 +884,7 @@
Shirish S <shirish.s@amd.com>
Shiva Kumar <shiva.k1@samsung.com>
Shivakumar JM <shiva.jm@samsung.com>
+Shobhit Goel <shobhit.goel@samsung.com>
Shouqun Liu <liushouqun@xiaomi.com>
Shouqun Liu <shouqun.liu@intel.com>
Shreeram Kushwaha <shreeram.k@samsung.com>
@@ -857,6 +894,7 @@
Siba Samal <siba.samal@samsung.com>
Siddharth Bagai <b.siddharth@samsung.com>
Siddharth Shankar <funkysidd@gmail.com>
+Simeon Kuran <simeon.kuran@gmail.com>
Simon Arlott <simon.arlott@gmail.com>
Simon La Macchia <smacchia@amazon.com>
Siva Kumar Gunturi <siva.gunturi@samsung.com>
@@ -870,6 +908,7 @@
Sreerenj Balachandran <sreerenj.balachandran@intel.com>
Srirama Chandra Sekhar Mogali <srirama.m@samsung.com>
Staphany Park <stapark008@gmail.com>
+Stephan Hartmann <stha09@googlemail.com>
Stephen Searles <stephen.searles@gmail.com>
Steve Sanders <steve@zanderz.com>
Steven Pennington <spenn@engr.uvic.ca>
@@ -898,6 +937,7 @@
Szabolcs David <davidsz@inf.u-szeged.hu>
Szymon Piechowicz <szymonpiechowicz@o2.pl>
Taeheon Kim <skyrabbits1@gmail.com>
+Taeho Nam <thn7440@gmail.com>
Taehoon Lee <taylor.hoon@gmail.com>
Takashi Fujita <tgfjt.mail@gmail.com>
Takeshi Kurosawa <taken.spc@gmail.com>
@@ -915,6 +955,7 @@
Thomas White <im.toms.inbox@gmail.com>
Tiago Vignatti <tiago.vignatti@intel.com>
Tibor Dusnoki <tibor.dusnoki.91@gmail.com>
+Tibor Dusnoki <tdusnoki@inf.u-szeged.hu>
Tim Ansell <mithro@mithis.com>
Tim Niederhausen <tim@rnc-ag.de>
Timo Gurr <timo.gurr@gmail.com>
@@ -925,9 +966,11 @@
Tom Harwood <tfh@skip.org>
Tomas Popela <tomas.popela@gmail.com>
Torsten Kurbad <google@tk-webart.de>
+Toshihito Kikuchi <leamovret@gmail.com>
Trent Willis <trentmwillis@gmail.com>
Trevor Perrin <unsafe@trevp.net>
Tripta Gupta <tripta.g@samsung.com>
+Tuukka Toivonen <tuukka.toivonen@intel.com>
U. Artie Eoff <ullysses.a.eoff@intel.com>
Umar Hansa <umar.hansa@gmail.com>
Upendra Gowda <upendrag.gowda@gmail.com>
@@ -937,14 +980,16 @@
Valentin Ilie <valentin.ilie@intel.com>
Vamshikrishna Yellenki <vamshi@motorola.com>
Vani Hegde <vani.hegde@samsung.com>
-Varun Chowdhary Paturi <v.paturi@samsung.com>
Vartul Katiyar <vartul.k@samsung.com>
+Varun Chowdhary Paturi <v.paturi@samsung.com>
+Varun Varada <varuncvarada@gmail.com>
Vedran Šajatović <vedran.sajatovic@gmail.com>
Vernon Tang <vt@foilhead.net>
Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
Victor Costan <costan@gmail.com>
Viet-Trung Luu <viettrungluu@gmail.com>
Vinay Anantharaman <vinaya@adobe.com>
+Vinoth Chandar <vinoth@uber.com>
Vipul Bhasin <vipul.bhasin@gmail.com>
Visa Putkinen <v.putkinen@partner.samsung.com>
Vishal Bhatnagar <vishal.b@samsung.com>
@@ -954,8 +999,10 @@
Waihung Fu <fufranci@amazon.com>
Wanming Lin <wanming.lin@intel.com>
Wei Li <wei.c.li@intel.com>
+Wenxiang Qian <leonwxqian@gmail.com>
WenSheng He <wensheng.he@samsung.com>
Wesley Lancel <wesleylancel@gmail.com>
+Wei Wang <wei4.wang@intel.com>
Wesley Wigham <wwigham@gmail.com>
Will Hirsch <chromium@willhirsch.co.uk>
Will Shackleton <w.shackleton@gmail.com>
@@ -979,7 +1026,7 @@
Yael Aharon <yael.aharon@intel.com>
Yan Wang <yan0422.wang@samsung.com>
Yang Gu <yang.gu@intel.com>
-Yannic Bonenberger <contact@yannic-bonenberger.com>
+Yannic Bonenberger <yannic.bonenberger@gmail.com>
Yarin Kaul <yarin.kaul@gmail.com>
Yash Vempati <vempatiy@amazon.com>
Ye Liu <cbakgly@gmail.com>
@@ -1005,7 +1052,9 @@
Youngmin Yoo <youngmin.yoo@samsung.com>
Youngsoo Choi <kenshin.choi@samsung.com>
Youngsun Suh <zard17@gmail.com>
+Yuan-Pin Yu <yjames@uber.com>
Yuhong Sha <yuhong.sha@samsung.com>
+Yuki Tsuchiya <Yuki.Tsuchiya@sony.com>
Yumikiyo Osanai <yumios.art@gmail.com>
Yunchao He <yunchao.he@intel.com>
Yupei Lin <yplam@yplam.com>
@@ -1013,12 +1062,14 @@
Yura Yaroshevich <yura.yaroshevich@gmail.com>
Yuri Gorobets <yuri.gorobets@gmail.com>
Yuriy Taraday <yorik.sar@gmail.com>
+Yuta Kasai <kasai.yuta0810@gmail.com>
Yuvanesh Natarajan <yuvanesh.n1@samsung.com>
Zeno Albisser <zeno.albisser@digia.com>
Zeqin Chen <talonchen@tencent.com>
Zhaoze Zhou <zhaoze.zhou@partner.samsung.com>
Zheda Chen <zheda.chen@intel.com>
Zheng Chuang <zhengchuangscu@gmail.com>
+Zheng Xu <zxu@kobo.com>
Zhengkun Li <zhengkli@amazon.com>
Zhenyu Liang <zhenyu.liang@intel.com>
Zhenyu Shan <zhenyu.shan@intel.com>
@@ -1029,12 +1080,9 @@
Zoltan Kuscsik <zoltan.kuscsik@linaro.org>
Zsolt Borbely <zsborbely.u-szeged@partner.samsung.com>
方觉 (Fang Jue) <fangjue23303@gmail.com>
-Rajesh Mahindra <rmahindra@uber.com>
-Yuan-Pin Yu <yjames@uber.com>
-Vinoth Chandar <vinoth@uber.com>
-Zheng Xu <zxu@kobo.com>
-Junsong Li <ljs.darkfish@gmail.com>
+# END individuals section.
+# BEGIN organizations section.
ACCESS CO., LTD. <*@access-company.com>
Akamai Inc. <*@akamai.com>
ARM Holdings <*@arm.com>
@@ -1042,10 +1090,12 @@
Bocoup <*@bocoup.com>
Canonical Limited <*@canonical.com>
Cloudflare, Inc. <*@cloudflare.com>
+CloudMosa, Inc. <*@cloudmosa.com>
Code Aurora Forum <*@codeaurora.org>
Collabora Limited <*@collabora.com>
Comodo CA Limited
Cosium <*@cosium.com>
+Dell Technologies Inc. <*@dell.corp-partner.google.com>
Duck Duck Go, Inc. <*@duckduckgo.com>
Endless Mobile, Inc. <*@endlessm.com>
Estimote, Inc. <*@estimote.com>
@@ -1062,6 +1112,8 @@
LG Electronics, Inc. <*@lge.com>
Loongson Technology Corporation Limited. <*@loongson.cn>
Macadamian <*@macadamian.com>
+Mail.ru Group <*@corp.mail.ru>
+Make Positive Provar Limited <*@provartesting.com>
Mediatek <*@mediatek.com>
Microsoft <*@microsoft.com>
MIPS Technologies, Inc. <*@mips.com>
@@ -1087,4 +1139,4 @@
Vewd Software AS <*@vewd.com>
Vivaldi Technologies AS <*@vivaldi.com>
Yandex LLC <*@yandex-team.ru>
-Make Positive Provar Limited <*@provartesting.com>
+# END organizations section.
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index 7e2c510..2962537 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -7,58 +7,10 @@
#include "build/build_config.h"
-#if defined(COMPILER_MSVC)
-
-#if !defined(__clang__)
+#if defined(COMPILER_MSVC) && !defined(__clang__)
#error "Only clang-cl is supported on Windows, see https://crbug.com/988071"
#endif
-// Macros for suppressing and disabling warnings on MSVC.
-//
-// Warning numbers are enumerated at:
-// http://msdn.microsoft.com/en-us/library/8x5x43k7(VS.80).aspx
-//
-// The warning pragma:
-// http://msdn.microsoft.com/en-us/library/2c8f766e(VS.80).aspx
-//
-// Using __pragma instead of #pragma inside macros:
-// http://msdn.microsoft.com/en-us/library/d9x1s805.aspx
-
-// MSVC_PUSH_DISABLE_WARNING pushes |n| onto a stack of warnings to be disabled.
-// The warning remains disabled until popped by MSVC_POP_WARNING.
-#define MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \
- __pragma(warning(disable:n))
-
-// Pop effects of innermost MSVC_PUSH_* macro.
-#define MSVC_POP_WARNING() __pragma(warning(pop))
-
-#else // Not MSVC
-
-#define MSVC_PUSH_DISABLE_WARNING(n)
-#define MSVC_POP_WARNING()
-#define MSVC_DISABLE_OPTIMIZE()
-#define MSVC_ENABLE_OPTIMIZE()
-
-#endif // COMPILER_MSVC
-
-// These macros can be helpful when investigating compiler bugs or when
-// investigating issues in local optimized builds, by temporarily disabling
-// optimizations for a single function or file. These macros should never be
-// used to permanently work around compiler bugs or other mysteries, and should
-// not be used in landed changes.
-#if !defined(OFFICIAL_BUILD)
-#if defined(__clang__)
-#define DISABLE_OPTIMIZE() __pragma(clang optimize off)
-#define ENABLE_OPTIMIZE() __pragma(clang optimize on)
-#elif defined(COMPILER_MSVC)
-#define DISABLE_OPTIMIZE() __pragma(optimize("", off))
-#define ENABLE_OPTIMIZE() __pragma(optimize("", on))
-#else
-// These macros are not currently available for other compiler options.
-#endif
-// These macros are not available in official builds.
-#endif // !defined(OFFICIAL_BUILD)
-
// Annotate a variable indicating it's ok if the variable is not used.
// (Typically used to silence a compiler warning when the assignment
// is important for some other reason.)
@@ -141,7 +93,7 @@
// For member functions, the implicit this parameter counts as index 1.
#if defined(COMPILER_GCC) || defined(__clang__)
#define PRINTF_FORMAT(format_param, dots_param) \
- __attribute__((format(printf, format_param, dots_param)))
+ __attribute__((format(printf, format_param, dots_param)))
#else
#define PRINTF_FORMAT(format_param, dots_param)
#endif
@@ -170,14 +122,14 @@
// Mark a memory region fully initialized.
// Use this to annotate code that deliberately reads uninitialized data, for
// example a GC scavenging root set pointers from the stack.
-#define MSAN_UNPOISON(p, size) __msan_unpoison(p, size)
+#define MSAN_UNPOISON(p, size) __msan_unpoison(p, size)
// Check a memory region for initializedness, as if it was being used here.
// If any bits are uninitialized, crash with an MSan report.
// Use this to sanitize data which MSan won't be able to track, e.g. before
// passing data to another process via shared memory.
#define MSAN_CHECK_MEM_IS_INITIALIZED(p, size) \
- __msan_check_mem_is_initialized(p, size)
+ __msan_check_mem_is_initialized(p, size)
#else // MEMORY_SANITIZER
#define MSAN_UNPOISON(p, size)
#define MSAN_CHECK_MEM_IS_INITIALIZED(p, size)
@@ -260,4 +212,39 @@
#endif
#endif
+#if defined(__clang__) && __has_attribute(uninitialized)
+// Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for
+// the specified variable.
+// Library-wide alternative is
+// 'configs -= [ "//build/config/compiler:default_init_stack_vars" ]' in .gn
+// file.
+//
+// See "init_stack_vars" in build/config/compiler/BUILD.gn and
+// http://crbug.com/977230
+// "init_stack_vars" is enabled for non-official builds and we hope to enable it
+// in official build in 2020 as well. The flag writes fixed pattern into
+// uninitialized parts of all local variables. In rare cases such initialization
+// is undesirable and attribute can be used:
+// 1. Degraded performance
+// In most cases compiler is able to remove additional stores. E.g. if memory is
+// never accessed or properly initialized later. Preserved stores mostly will
+// not affect program performance. However if compiler failed on some
+// performance critical code we can get a visible regression in a benchmark.
+// 2. memset, memcpy calls
+// Compiler may replaces some memory writes with memset or memcpy calls. This is
+// not -ftrivial-auto-var-init specific, but it can happen more likely with the
+// flag. It can be a problem if code is not linked with C run-time library.
+//
+// Note: The flag is security risk mitigation feature. So in future the
+// attribute uses should be avoided when possible. However to enable this
+// mitigation on the most of the code we need to be less strict now and minimize
+// number of exceptions later. So if in doubt feel free to use attribute, but
+// please document the problem for someone who is going to cleanup it later.
+// E.g. platform, bot, benchmark or test name in patch description or next to
+// the attribute.
+#define STACK_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define STACK_UNINITIALIZED
+#endif
+
#endif // BASE_COMPILER_SPECIFIC_H_
diff --git a/base/macros.h b/base/macros.h
index cda8e3a..c67bdbd 100644
--- a/base/macros.h
+++ b/base/macros.h
@@ -10,6 +10,10 @@
#ifndef BASE_MACROS_H_
#define BASE_MACROS_H_
+// ALL DISALLOW_xxx MACROS ARE DEPRECATED; DO NOT USE IN NEW CODE.
+// Use explicit deletions instead. See the section on copyability/movability in
+// //styleguide/c++/c++-dos-and-donts.md for more information.
+
// Put this in the declarations for a class to be uncopyable.
#define DISALLOW_COPY(TypeName) \
TypeName(const TypeName&) = delete
diff --git a/base/optional.h b/base/optional.h
index 345147c..a043122 100644
--- a/base/optional.h
+++ b/base/optional.h
@@ -30,11 +30,13 @@
namespace internal {
+struct DummyUnionMember {};
+
template <typename T, bool = std::is_trivially_destructible<T>::value>
struct OptionalStorageBase {
- // Initializing |empty_| here instead of using default member initializing
- // to avoid errors in g++ 4.8.
- constexpr OptionalStorageBase() : empty_('\0') {}
+ // Provide non-defaulted default ctor to make sure it's not deleted by
+ // non-trivial T::T() in the union.
+ constexpr OptionalStorageBase() : dummy_() {}
template <class... Args>
constexpr explicit OptionalStorageBase(in_place_t, Args&&... args)
@@ -65,19 +67,28 @@
bool is_populated_ = false;
union {
- // |empty_| exists so that the union will always be initialized, even when
+ // |dummy_| exists so that the union will always be initialized, even when
// it doesn't contain a value. Union members must be initialized for the
- // constructor to be 'constexpr'.
- char empty_;
+ // constructor to be 'constexpr'. Having a special trivial class for it is
+ // better than e.g. using char, because the latter will have to be
+ // zero-initialized, and the compiler can't optimize this write away, since
+ // it assumes this might be a programmer's invariant. This can also cause
+ // problems for conservative GC in Oilpan. Compiler is free to split shared
+ // and non-shared parts of the union in separate memory locations (or
+ // registers). If conservative GC is triggered at this moment, the stack
+ // scanning routine won't find the correct object pointed from
+ // Optional<HeapObject*>. This dummy valueless struct lets the compiler know
+ // that we don't care about the value of this union member.
+ DummyUnionMember dummy_;
T value_;
};
};
template <typename T>
struct OptionalStorageBase<T, true /* trivially destructible */> {
- // Initializing |empty_| here instead of using default member initializing
- // to avoid errors in g++ 4.8.
- constexpr OptionalStorageBase() : empty_('\0') {}
+ // Provide non-defaulted default ctor to make sure it's not deleted by
+ // non-trivial T::T() in the union.
+ constexpr OptionalStorageBase() : dummy_() {}
template <class... Args>
constexpr explicit OptionalStorageBase(in_place_t, Args&&... args)
@@ -106,10 +117,19 @@
bool is_populated_ = false;
union {
- // |empty_| exists so that the union will always be initialized, even when
+ // |dummy_| exists so that the union will always be initialized, even when
// it doesn't contain a value. Union members must be initialized for the
- // constructor to be 'constexpr'.
- char empty_;
+ // constructor to be 'constexpr'. Having a special trivial class for it is
+ // better than e.g. using char, because the latter will have to be
+ // zero-initialized, and the compiler can't optimize this write away, since
+ // it assumes this might be a programmer's invariant. This can also cause
+ // problems for conservative GC in Oilpan. Compiler is free to split shared
+ // and non-shared parts of the union in separate memory locations (or
+ // registers). If conservative GC is triggered at this moment, the stack
+ // scanning routine won't find the correct object pointed from
+ // Optional<HeapObject*>. This dummy valueless struct lets the compiler know
+ // that we don't care about the value of this union member.
+ DummyUnionMember dummy_;
T value_;
};
};
diff --git a/base/stl_util.h b/base/stl_util.h
index d6ca464..7fc8108 100644
--- a/base/stl_util.h
+++ b/base/stl_util.h
@@ -31,15 +31,18 @@
namespace internal {
-// Calls erase on iterators of matching elements.
+// Calls erase on iterators of matching elements and returns the number of
+// removed elements.
template <typename Container, typename Predicate>
-void IterateAndEraseIf(Container& container, Predicate pred) {
- for (auto it = container.begin(); it != container.end();) {
+size_t IterateAndEraseIf(Container& container, Predicate pred) {
+ size_t old_size = container.size();
+ for (auto it = container.begin(), last = container.end(); it != last;) {
if (pred(*it))
it = container.erase(it);
else
++it;
}
+ return old_size - container.size();
}
template <typename Iter>
@@ -144,6 +147,30 @@
return il.begin();
}
+// std::array::data() was not constexpr prior to C++17 [1].
+// Hence these overloads are provided.
+//
+// [1] https://en.cppreference.com/w/cpp/container/array/data
+template <typename T, size_t N>
+constexpr T* data(std::array<T, N>& array) noexcept {
+ return !array.empty() ? &array[0] : nullptr;
+}
+
+template <typename T, size_t N>
+constexpr const T* data(const std::array<T, N>& array) noexcept {
+ return !array.empty() ? &array[0] : nullptr;
+}
+
+// C++14 implementation of C++17's std::as_const():
+// https://en.cppreference.com/w/cpp/utility/as_const
+template <typename T>
+constexpr std::add_const_t<T>& as_const(T& t) noexcept {
+ return t;
+}
+
+template <typename T>
+void as_const(const T&& t) = delete;
+
// Returns a const reference to the underlying container of a container adapter.
// Works for std::priority_queue, std::queue, and std::stack.
template <class A>
@@ -473,8 +500,9 @@
a2.begin(), a2.end());
}
-// Erase/EraseIf are based on library fundamentals ts v2 erase/erase_if
-// http://en.cppreference.com/w/cpp/experimental/lib_extensions_2
+// Erase/EraseIf are based on C++20's uniform container erasure API:
+// - https://eel.is/c++draft/libraryindex#:erase
+// - https://eel.is/c++draft/libraryindex#:erase_if
// They provide a generic way to erase elements from a container.
// The functions here implement these for the standard containers until those
// functions are available in the C++ standard.
@@ -484,89 +512,109 @@
// have it either.
template <typename CharT, typename Traits, typename Allocator, typename Value>
-void Erase(std::basic_string<CharT, Traits, Allocator>& container,
- const Value& value) {
- container.erase(std::remove(container.begin(), container.end(), value),
- container.end());
+size_t Erase(std::basic_string<CharT, Traits, Allocator>& container,
+ const Value& value) {
+ auto it = std::remove(container.begin(), container.end(), value);
+ size_t removed = std::distance(it, container.end());
+ container.erase(it, container.end());
+ return removed;
}
template <typename CharT, typename Traits, typename Allocator, class Predicate>
-void EraseIf(std::basic_string<CharT, Traits, Allocator>& container,
- Predicate pred) {
- container.erase(std::remove_if(container.begin(), container.end(), pred),
- container.end());
+size_t EraseIf(std::basic_string<CharT, Traits, Allocator>& container,
+ Predicate pred) {
+ auto it = std::remove_if(container.begin(), container.end(), pred);
+ size_t removed = std::distance(it, container.end());
+ container.erase(it, container.end());
+ return removed;
}
template <class T, class Allocator, class Value>
-void Erase(std::deque<T, Allocator>& container, const Value& value) {
- container.erase(std::remove(container.begin(), container.end(), value),
- container.end());
+size_t Erase(std::deque<T, Allocator>& container, const Value& value) {
+ auto it = std::remove(container.begin(), container.end(), value);
+ size_t removed = std::distance(it, container.end());
+ container.erase(it, container.end());
+ return removed;
}
template <class T, class Allocator, class Predicate>
-void EraseIf(std::deque<T, Allocator>& container, Predicate pred) {
- container.erase(std::remove_if(container.begin(), container.end(), pred),
- container.end());
+size_t EraseIf(std::deque<T, Allocator>& container, Predicate pred) {
+ auto it = std::remove_if(container.begin(), container.end(), pred);
+ size_t removed = std::distance(it, container.end());
+ container.erase(it, container.end());
+ return removed;
}
template <class T, class Allocator, class Value>
-void Erase(std::vector<T, Allocator>& container, const Value& value) {
- container.erase(std::remove(container.begin(), container.end(), value),
- container.end());
+size_t Erase(std::vector<T, Allocator>& container, const Value& value) {
+ auto it = std::remove(container.begin(), container.end(), value);
+ size_t removed = std::distance(it, container.end());
+ container.erase(it, container.end());
+ return removed;
}
template <class T, class Allocator, class Predicate>
-void EraseIf(std::vector<T, Allocator>& container, Predicate pred) {
- container.erase(std::remove_if(container.begin(), container.end(), pred),
- container.end());
+size_t EraseIf(std::vector<T, Allocator>& container, Predicate pred) {
+ auto it = std::remove_if(container.begin(), container.end(), pred);
+ size_t removed = std::distance(it, container.end());
+ container.erase(it, container.end());
+ return removed;
}
template <class T, class Allocator, class Value>
-void Erase(std::forward_list<T, Allocator>& container, const Value& value) {
+size_t Erase(std::forward_list<T, Allocator>& container, const Value& value) {
// Unlike std::forward_list::remove, this function template accepts
// heterogeneous types and does not force a conversion to the container's
// value type before invoking the == operator.
- container.remove_if([&](const T& cur) { return cur == value; });
+ return EraseIf(container, [&](const T& cur) { return cur == value; });
}
template <class T, class Allocator, class Predicate>
-void EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) {
+size_t EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) {
+ // Note: std::forward_list does not have a size() API, thus we need to use the
+ // O(n) std::distance work-around. However, given that EraseIf is O(n)
+ // already, this should not make a big difference.
+ size_t old_size = std::distance(container.begin(), container.end());
container.remove_if(pred);
+ return old_size - std::distance(container.begin(), container.end());
}
template <class T, class Allocator, class Value>
-void Erase(std::list<T, Allocator>& container, const Value& value) {
+size_t Erase(std::list<T, Allocator>& container, const Value& value) {
// Unlike std::list::remove, this function template accepts heterogeneous
// types and does not force a conversion to the container's value type before
// invoking the == operator.
- container.remove_if([&](const T& cur) { return cur == value; });
+ return EraseIf(container, [&](const T& cur) { return cur == value; });
}
template <class T, class Allocator, class Predicate>
-void EraseIf(std::list<T, Allocator>& container, Predicate pred) {
+size_t EraseIf(std::list<T, Allocator>& container, Predicate pred) {
+ size_t old_size = container.size();
container.remove_if(pred);
+ return old_size - container.size();
}
template <class Key, class T, class Compare, class Allocator, class Predicate>
-void EraseIf(std::map<Key, T, Compare, Allocator>& container, Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(std::map<Key, T, Compare, Allocator>& container,
+ Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key, class T, class Compare, class Allocator, class Predicate>
-void EraseIf(std::multimap<Key, T, Compare, Allocator>& container,
- Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(std::multimap<Key, T, Compare, Allocator>& container,
+ Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key, class Compare, class Allocator, class Predicate>
-void EraseIf(std::set<Key, Compare, Allocator>& container, Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(std::set<Key, Compare, Allocator>& container, Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key, class Compare, class Allocator, class Predicate>
-void EraseIf(std::multiset<Key, Compare, Allocator>& container,
- Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(std::multiset<Key, Compare, Allocator>& container,
+ Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key,
@@ -575,9 +623,9 @@
class KeyEqual,
class Allocator,
class Predicate>
-void EraseIf(std::unordered_map<Key, T, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(std::unordered_map<Key, T, Hash, KeyEqual, Allocator>& container,
+ Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key,
@@ -586,10 +634,10 @@
class KeyEqual,
class Allocator,
class Predicate>
-void EraseIf(
+size_t EraseIf(
std::unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& container,
Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key,
@@ -597,9 +645,9 @@
class KeyEqual,
class Allocator,
class Predicate>
-void EraseIf(std::unordered_set<Key, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(std::unordered_set<Key, Hash, KeyEqual, Allocator>& container,
+ Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
template <class Key,
@@ -607,9 +655,10 @@
class KeyEqual,
class Allocator,
class Predicate>
-void EraseIf(std::unordered_multiset<Key, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- internal::IterateAndEraseIf(container, pred);
+size_t EraseIf(
+ std::unordered_multiset<Key, Hash, KeyEqual, Allocator>& container,
+ Predicate pred) {
+ return internal::IterateAndEraseIf(container, pred);
}
// A helper class to be used as the predicate with |EraseIf| to implement
diff --git a/base/strings/strcat.cc b/base/strings/strcat.cc
index 1774a15..db41a69 100644
--- a/base/strings/strcat.cc
+++ b/base/strings/strcat.cc
@@ -8,8 +8,8 @@
namespace {
-// Reserves an additional amount of size in the given string, growing by at
-// least 2x. Used by StrAppend().
+// Reserves an additional amount of capacity in the given string, growing by at
+// least 2x if necessary. Used by StrAppendT().
//
// The "at least 2x" growing rule duplicates the exponential growth of
// std::string. The problem is that most implementations of reserve() will grow
@@ -18,11 +18,15 @@
// call to StrAppend() would definitely cause a reallocation, and loops with
// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want
// StrAppend() to have the same semantics as std::string::append().
-//
-// If the string is empty, we assume that exponential growth is not necessary.
template <typename String>
-void ReserveAdditional(String* str, typename String::size_type additional) {
- str->reserve(std::max(str->size() + additional, str->size() * 2));
+void ReserveAdditionalIfNeeded(String* str,
+ typename String::size_type additional) {
+ const size_t required = str->size() + additional;
+ // Check whether we need to reserve additional capacity at all.
+ if (required <= str->capacity())
+ return;
+
+ str->reserve(std::max(required, str->capacity() * 2));
}
template <typename DestString, typename InputString>
@@ -30,7 +34,7 @@
size_t additional_size = 0;
for (const auto& cur : pieces)
additional_size += cur.size();
- ReserveAdditional(dest, additional_size);
+ ReserveAdditionalIfNeeded(dest, additional_size);
for (const auto& cur : pieces)
dest->append(cur.data(), cur.size());
diff --git a/base/strings/strcat.h b/base/strings/strcat.h
index bcdfe17..220fa24 100644
--- a/base/strings/strcat.h
+++ b/base/strings/strcat.h
@@ -59,10 +59,13 @@
// for this call and generate slightly less code. This is something we can
// explore more in the future.
-BASE_EXPORT std::string StrCat(span<const StringPiece> pieces);
-BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces);
-BASE_EXPORT std::string StrCat(span<const std::string> pieces);
-BASE_EXPORT string16 StrCat(span<const string16> pieces);
+BASE_EXPORT std::string StrCat(span<const StringPiece> pieces)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT std::string StrCat(span<const std::string> pieces)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT;
// Initializer list forwards to the array version.
inline std::string StrCat(std::initializer_list<StringPiece> pieces) {
diff --git a/base/strings/strcat_unittest.cc b/base/strings/strcat_unittest.cc
index d51b840..9374c39 100644
--- a/base/strings/strcat_unittest.cc
+++ b/base/strings/strcat_unittest.cc
@@ -64,4 +64,22 @@
EXPECT_EQ(ASCIIToUTF16("foo122333"), result);
}
+TEST(StrAppendT, ReserveAdditionalIfNeeded) {
+ std::string str = "foo";
+ const char* prev_data = str.data();
+ size_t prev_capacity = str.capacity();
+ // Fully exhaust current capacity.
+ StrAppend(&str, {std::string(str.capacity() - str.size(), 'o')});
+ // Expect that we hit capacity, but didn't require a re-alloc.
+ EXPECT_EQ(str.capacity(), str.size());
+ EXPECT_EQ(prev_data, str.data());
+ EXPECT_EQ(prev_capacity, str.capacity());
+
+ // Force a re-alloc by appending another character.
+ StrAppend(&str, {"o"});
+
+ // Expect at least 2x growth in capacity.
+ EXPECT_LE(2 * prev_capacity, str.capacity());
+}
+
} // namespace base
diff --git a/base/strings/string_number_conversions.cc b/base/strings/string_number_conversions.cc
index 2bf6142..701d71c 100644
--- a/base/strings/string_number_conversions.cc
+++ b/base/strings/string_number_conversions.cc
@@ -13,10 +13,11 @@
#include <type_traits>
#include "polyfills/base/logging.h"
+#include "base/no_destructor.h"
#include "base/numerics/safe_math.h"
-#include "base/scoped_clear_last_error.h"
+#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
-#include "base/third_party/dmg_fp/dmg_fp.h"
+#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
namespace gurl_base {
@@ -360,21 +361,29 @@
return IntToStringT<string16, unsigned long long>::IntToString(value);
}
+static const double_conversion::DoubleToStringConverter*
+GetDoubleToStringConverter() {
+ static NoDestructor<double_conversion::DoubleToStringConverter> converter(
+ double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
+ nullptr, nullptr, 'e', -6, 12, 0, 0);
+ return converter.get();
+}
+
std::string NumberToString(double value) {
- // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
char buffer[32];
- dmg_fp::g_fmt(buffer, value);
- return std::string(buffer);
+ double_conversion::StringBuilder builder(buffer, sizeof(buffer));
+ GetDoubleToStringConverter()->ToShortest(value, &builder);
+ return std::string(buffer, builder.position());
}
gurl_base::string16 NumberToString16(double value) {
- // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
char buffer[32];
- dmg_fp::g_fmt(buffer, value);
+ double_conversion::StringBuilder builder(buffer, sizeof(buffer));
+ GetDoubleToStringConverter()->ToShortest(value, &builder);
// The number will be ASCII. This creates the string using the "input
// iterator" variant which promotes from 8-bit to 16-bit via "=".
- return gurl_base::string16(&buffer[0], &buffer[strlen(buffer)]);
+ return gurl_base::string16(&buffer[0], &buffer[builder.position()]);
}
bool StringToInt(StringPiece input, int* output) {
@@ -417,35 +426,37 @@
return String16ToIntImpl(input, output);
}
-bool StringToDouble(const std::string& input, double* output) {
- // Thread-safe? It is on at least Mac, Linux, and Windows.
- internal::ScopedClearLastError clear_errno;
+template <typename STRING, typename CHAR>
+bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) {
+ static NoDestructor<double_conversion::StringToDoubleConverter> converter(
+ double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
+ double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
+ 0.0, 0, nullptr, nullptr);
- char* endptr = nullptr;
- *output = dmg_fp::strtod(input.c_str(), &endptr);
+ int processed_characters_count;
+ *output = converter->StringToDouble(data, input.size(),
+ &processed_characters_count);
// Cases to return false:
- // - If errno is ERANGE, there was an overflow or underflow.
// - If the input string is empty, there was nothing to parse.
- // - If endptr does not point to the end of the string, there are either
- // characters remaining in the string after a parsed number, or the string
- // does not begin with a parseable number. endptr is compared to the
- // expected end given the string's stated length to correctly catch cases
- // where the string contains embedded NUL characters.
+ // - If the value saturated to HUGE_VAL.
+ // - If the entire string was not processed, there are either characters
+ // remaining in the string after a parsed number, or the string does not
+ // begin with a parseable number.
// - If the first character is a space, there was leading whitespace
- return errno == 0 &&
- !input.empty() &&
- input.c_str() + input.length() == endptr &&
- !isspace(input[0]);
+ return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL &&
+ static_cast<size_t>(processed_characters_count) == input.size() &&
+ !IsUnicodeWhitespace(input[0]);
}
-// Note: if you need to add String16ToDouble, first ask yourself if it's
-// really necessary. If it is, probably the best implementation here is to
-// convert to 8-bit and then use the 8-bit version.
+bool StringToDouble(StringPiece input, double* output) {
+ return StringToDoubleImpl(input, input.data(), output);
+}
-// Note: if you need to add an iterator range version of StringToDouble, first
-// ask yourself if it's really necessary. If it is, probably the best
-// implementation here is to instantiate a string and use the string version.
+bool StringToDouble(StringPiece16 input, double* output) {
+ return StringToDoubleImpl(
+ input, reinterpret_cast<const uint16_t*>(input.data()), output);
+}
std::string HexEncode(const void* bytes, size_t size) {
static const char kHexChars[] = "0123456789ABCDEF";
@@ -485,7 +496,8 @@
input.begin(), input.end(), output);
}
-bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) {
+template <typename Container>
+static bool HexStringToByteContainer(StringPiece input, Container* output) {
GURL_DCHECK_EQ(output->size(), 0u);
size_t count = input.size();
if (count == 0 || (count % 2) != 0)
@@ -502,4 +514,32 @@
return true;
}
+bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) {
+ return HexStringToByteContainer(input, output);
+}
+
+bool HexStringToString(StringPiece input, std::string* output) {
+ return HexStringToByteContainer(input, output);
+}
+
+bool HexStringToSpan(StringPiece input, gurl_base::span<uint8_t> output) {
+ size_t count = input.size();
+ if (count == 0 || (count % 2) != 0)
+ return false;
+
+ if (count / 2 != output.size())
+ return false;
+
+ for (uintptr_t i = 0; i < count / 2; ++i) {
+ uint8_t msb = 0; // most significant 4 bits
+ uint8_t lsb = 0; // least significant 4 bits
+ if (!CharToDigit<16>(input[i * 2], &msb) ||
+ !CharToDigit<16>(input[i * 2 + 1], &lsb)) {
+ return false;
+ }
+ output[i] = (msb << 4) | lsb;
+ }
+ return true;
+}
+
} // namespace base
diff --git a/base/strings/string_number_conversions.h b/base/strings/string_number_conversions.h
index a3acab8..872ead2 100644
--- a/base/strings/string_number_conversions.h
+++ b/base/strings/string_number_conversions.h
@@ -98,7 +98,8 @@
// If your input is locale specific, use ICU to read the number.
// WARNING: Will write to |output| even when returning false.
// Read the comments here and above StringToInt() carefully.
-BASE_EXPORT bool StringToDouble(const std::string& input, double* output);
+BASE_EXPORT bool StringToDouble(StringPiece input, double* output);
+BASE_EXPORT bool StringToDouble(StringPiece16 input, double* output);
// Hex encoding ----------------------------------------------------------------
@@ -140,6 +141,17 @@
BASE_EXPORT bool HexStringToBytes(StringPiece input,
std::vector<uint8_t>* output);
+// Same as HexStringToBytes, but for an std::string.
+BASE_EXPORT bool HexStringToString(StringPiece input, std::string* output);
+
+// Decodes the hex string |input| into a presized |output|. The output buffer
+// must be sized exactly to |input.size() / 2| or decoding will fail and no
+// bytes will be written to |output|. Decoding an empty input is also
+// considered a failure. When decoding fails due to encountering invalid input
+// characters, |output| will have been filled with the decoded bytes up until
+// the failure.
+BASE_EXPORT bool HexStringToSpan(StringPiece input, gurl_base::span<uint8_t> output);
+
} // namespace base
#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
diff --git a/base/strings/string_number_conversions_unittest.cc b/base/strings/string_number_conversions_unittest.cc
index 93405e2..6f8d171 100644
--- a/base/strings/string_number_conversions_unittest.cc
+++ b/base/strings/string_number_conversions_unittest.cc
@@ -672,7 +672,8 @@
EXPECT_EQ(0xc0ffeeU, output);
}
-TEST(StringNumberConversionsTest, HexStringToBytes) {
+// Tests for HexStringToBytes, HexStringToString, HexStringToSpan.
+TEST(StringNumberConversionsTest, HexStringToBytesStringSpan) {
static const struct {
const std::string input;
const char* output;
@@ -698,16 +699,65 @@
"\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true},
};
- for (size_t i = 0; i < gurl_base::size(cases); ++i) {
- std::vector<uint8_t> output;
- std::vector<uint8_t> compare;
- EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) <<
- i << ": " << cases[i].input;
- for (size_t j = 0; j < cases[i].output_len; ++j)
- compare.push_back(static_cast<uint8_t>(cases[i].output[j]));
- ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
- EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
- i << ": " << cases[i].input;
+ for (size_t test_i = 0; test_i < gurl_base::size(cases); ++test_i) {
+ const auto& test = cases[test_i];
+
+ std::string expected_output(test.output, test.output_len);
+
+ // Test HexStringToBytes().
+ {
+ std::vector<uint8_t> output;
+ EXPECT_EQ(test.success, HexStringToBytes(test.input, &output))
+ << test_i << ": " << test.input;
+ EXPECT_EQ(expected_output, std::string(output.begin(), output.end()));
+ }
+
+ // Test HexStringToString().
+ {
+ std::string output;
+ EXPECT_EQ(test.success, HexStringToString(test.input, &output))
+ << test_i << ": " << test.input;
+ EXPECT_EQ(expected_output, output) << test_i << ": " << test.input;
+ }
+
+ // Test HexStringToSpan() with a properly sized output.
+ {
+ std::vector<uint8_t> output;
+ output.resize(test.input.size() / 2);
+
+ EXPECT_EQ(test.success, HexStringToSpan(test.input, output))
+ << test_i << ": " << test.input;
+
+ // On failure the output will only have been partially written (with
+ // everything after the failure being 0).
+ for (size_t i = 0; i < test.output_len; ++i) {
+ EXPECT_EQ(test.output[i], static_cast<char>(output[i]))
+ << test_i << ": " << test.input;
+ }
+ for (size_t i = test.output_len; i < output.size(); ++i) {
+ EXPECT_EQ('\0', static_cast<char>(output[i]))
+ << test_i << ": " << test.input;
+ }
+ }
+
+ // Test HexStringToSpan() with an output that is 1 byte too small.
+ {
+ std::vector<uint8_t> output;
+ if (test.input.size() > 1)
+ output.resize(test.input.size() / 2 - 1);
+
+ EXPECT_FALSE(HexStringToSpan(test.input, output))
+ << test_i << ": " << test.input;
+ }
+
+ // Test HexStringToSpan() with an output that is 1 byte too large.
+ {
+ std::vector<uint8_t> output;
+ output.resize(test.input.size() / 2 + 1);
+
+ EXPECT_FALSE(HexStringToSpan(test.input, output))
+ << test_i << ": " << test.input;
+ }
}
}
@@ -791,6 +841,8 @@
};
for (size_t i = 0; i < gurl_base::size(cases); ++i) {
+ SCOPED_TRACE(
+ StringPrintf("case %" PRIuS " \"%s\"", i, cases[i].input.c_str()));
double output;
errno = 1;
EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output));
@@ -814,13 +866,14 @@
double input;
const char* expected;
} cases[] = {
- {0.0, "0"},
- {1.25, "1.25"},
- {1.33518e+012, "1.33518e+12"},
- {1.33489e+012, "1.33489e+12"},
- {1.33505e+012, "1.33505e+12"},
- {1.33545e+009, "1335450000"},
- {1.33503e+009, "1335030000"},
+ {0.0, "0"},
+ {0.5, "0.5"},
+ {1.25, "1.25"},
+ {1.33518e+012, "1.33518e+12"},
+ {1.33489e+012, "1.33489e+12"},
+ {1.33505e+012, "1.33505e+12"},
+ {1.33545e+009, "1335450000"},
+ {1.33503e+009, "1335030000"},
};
for (const auto& i : cases) {
@@ -832,12 +885,12 @@
const char input_bytes[8] = {0, 0, 0, 0, '\xee', '\x6d', '\x73', '\x42'};
double input = 0;
memcpy(&input, input_bytes, gurl_base::size(input_bytes));
- EXPECT_EQ("1335179083776", NumberToString(input));
+ EXPECT_EQ("1.335179083776e+12", NumberToString(input));
const char input_bytes2[8] =
{0, 0, 0, '\xa0', '\xda', '\x6c', '\x73', '\x42'};
input = 0;
memcpy(&input, input_bytes2, gurl_base::size(input_bytes2));
- EXPECT_EQ("1334890332160", NumberToString(input));
+ EXPECT_EQ("1.33489033216e+12", NumberToString(input));
}
TEST(StringNumberConversionsTest, HexEncode) {
@@ -892,6 +945,7 @@
};
for (const auto& test : cases) {
+ SCOPED_TRACE(StringPrintf("input: \"%s\"", test.input));
double output;
EXPECT_TRUE(StringToDouble(test.input, &output));
EXPECT_EQ(bit_cast<uint64_t>(output), test.expected);
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc
index 68f3efc..74f3335 100644
--- a/base/strings/string_piece.cc
+++ b/base/strings/string_piece.cc
@@ -53,36 +53,6 @@
namespace internal {
template<typename STR>
-void CopyToStringT(const BasicStringPiece<STR>& self, STR* target) {
- if (self.empty())
- target->clear();
- else
- target->assign(self.data(), self.size());
-}
-
-void CopyToString(const StringPiece& self, std::string* target) {
- CopyToStringT(self, target);
-}
-
-void CopyToString(const StringPiece16& self, string16* target) {
- CopyToStringT(self, target);
-}
-
-template<typename STR>
-void AppendToStringT(const BasicStringPiece<STR>& self, STR* target) {
- if (!self.empty())
- target->append(self.data(), self.size());
-}
-
-void AppendToString(const StringPiece& self, std::string* target) {
- AppendToStringT(self, target);
-}
-
-void AppendToString(const StringPiece16& self, string16* target) {
- AppendToStringT(self, target);
-}
-
-template<typename STR>
size_t copyT(const BasicStringPiece<STR>& self,
typename STR::value_type* buf,
size_t n,
@@ -219,8 +189,11 @@
size_t find_first_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos) {
+ // Use the faster std::find() if searching for a single character.
StringPiece16::const_iterator found =
- std::find_first_of(self.begin() + pos, self.end(), s.begin(), s.end());
+ s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0])
+ : std::find_first_of(self.begin() + pos, self.end(),
+ s.begin(), s.end());
if (found == self.end())
return StringPiece16::npos;
return found - self.begin();
@@ -435,16 +408,5 @@
return substrT(self, pos, n);
}
-#if GURL_DCHECK_IS_ON()
-void AssertIteratorsInOrder(std::string::const_iterator begin,
- std::string::const_iterator end) {
- GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
-}
-void AssertIteratorsInOrder(string16::const_iterator begin,
- string16::const_iterator end) {
- GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
-}
-#endif
-
} // namespace internal
} // namespace base
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index 5359af6..964ec67 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h
@@ -47,12 +47,6 @@
// template internal to the .cc file.
namespace internal {
-BASE_EXPORT void CopyToString(const StringPiece& self, std::string* target);
-BASE_EXPORT void CopyToString(const StringPiece16& self, string16* target);
-
-BASE_EXPORT void AppendToString(const StringPiece& self, std::string* target);
-BASE_EXPORT void AppendToString(const StringPiece16& self, string16* target);
-
BASE_EXPORT size_t copy(const StringPiece& self,
char* buf,
size_t n,
@@ -141,21 +135,12 @@
size_t pos,
size_t n);
-#if GURL_DCHECK_IS_ON()
-// Asserts that begin <= end to catch some errors with iterator usage.
-BASE_EXPORT void AssertIteratorsInOrder(std::string::const_iterator begin,
- std::string::const_iterator end);
-BASE_EXPORT void AssertIteratorsInOrder(string16::const_iterator begin,
- string16::const_iterator end);
-#endif
-
} // namespace internal
// BasicStringPiece ------------------------------------------------------------
// Defines the types, methods, operators, and data members common to both
-// StringPiece and StringPiece16. Do not refer to this class directly, but
-// rather to BasicStringPiece, StringPiece, or StringPiece16.
+// StringPiece and StringPiece16.
//
// This is templatized by string class type rather than character type, so
// BasicStringPiece<std::string> or BasicStringPiece<gurl_base::string16>.
@@ -178,23 +163,32 @@
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected (likewise for char16, string16, StringPiece16).
constexpr BasicStringPiece() : ptr_(NULL), length_(0) {}
- // TODO(dcheng): Construction from nullptr is not allowed for
+ // TODO(crbug.com/1049498): Construction from nullptr is not allowed for
// std::basic_string_view, so remove the special handling for it.
// Note: This doesn't just use STRING_TYPE::traits_type::length(), since that
// isn't constexpr until C++17.
constexpr BasicStringPiece(const value_type* str)
: ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {}
+ // Explicitly disallow construction from nullptr. Note that this does not
+ // catch construction from runtime strings that might be null.
+ // Note: The following is just a more elaborate way of spelling
+ // `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is
+ // not supported by the PNaCl toolchain.
+ // TODO(crbug.com/1049498): Remove once we GURL_CHECK(str) in the constructor
+ // above.
+ template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>>
+ BasicStringPiece(T) {
+ static_assert(sizeof(T) == 0, // Always false.
+ "StringPiece does not support construction from nullptr, use "
+ "the default constructor instead.");
+ }
BasicStringPiece(const STRING_TYPE& str)
: ptr_(str.data()), length_(str.size()) {}
constexpr BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {}
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
const typename STRING_TYPE::const_iterator& end) {
-#if GURL_DCHECK_IS_ON()
- // This assertion is done out-of-line to avoid bringing in logging.h and
- // instantiating logging macros for every instantiation.
- internal::AssertIteratorsInOrder(begin, end);
-#endif
+ GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
length_ = static_cast<size_t>(std::distance(begin, end));
// The length test before assignment is to avoid dereferencing an iterator
@@ -211,19 +205,6 @@
constexpr size_type length() const noexcept { return length_; }
bool empty() const { return length_ == 0; }
- void clear() {
- ptr_ = NULL;
- length_ = 0;
- }
- void set(const value_type* data, size_type len) {
- ptr_ = data;
- length_ = len;
- }
- void set(const value_type* str) {
- ptr_ = str;
- length_ = str ? STRING_TYPE::traits_type::length(str) : 0;
- }
-
constexpr value_type operator[](size_type i) const {
GURL_CHECK(i < length_);
return ptr_[i];
@@ -280,16 +261,6 @@
size_type max_size() const { return length_; }
size_type capacity() const { return length_; }
- // Sets the value of the given string target type to be the current string.
- // This saves a temporary over doing |a = b.as_string()|
- void CopyToString(STRING_TYPE* target) const {
- internal::CopyToString(*this, target);
- }
-
- void AppendToString(STRING_TYPE* target) const {
- internal::AppendToString(*this, target);
- }
-
size_type copy(value_type* buf, size_type n, size_type pos = 0) const {
return internal::copy(*this, buf, n, pos);
}
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc
index 8e245e6..0777549 100644
--- a/base/strings/string_piece_unittest.cc
+++ b/base/strings/string_piece_unittest.cc
@@ -190,7 +190,7 @@
ASSERT_TRUE(e.empty());
ASSERT_EQ(e.begin(), e.end());
- d.clear();
+ d = BasicStringPiece<TypeParam>();
ASSERT_EQ(d.size(), 0U);
ASSERT_TRUE(d.empty());
ASSERT_EQ(d.data(), nullptr);
@@ -213,7 +213,7 @@
BasicStringPiece<TypeParam> c(xyz);
BasicStringPiece<TypeParam> d(foobar);
- d.clear();
+ d = Piece();
Piece e;
TypeParam temp(TestFixture::as_string("123"));
temp.push_back('\0');
@@ -511,14 +511,14 @@
c.remove_suffix(c.size());
ASSERT_EQ(c, e);
- // set
- c.set(foobar.c_str());
+ // assignment
+ c = foobar.c_str();
ASSERT_EQ(c, a);
- c.set(foobar.c_str(), 6);
+ c = {foobar.c_str(), 6};
ASSERT_EQ(c, a);
- c.set(foobar.c_str(), 0);
+ c = {foobar.c_str(), 0};
ASSERT_EQ(c, e);
- c.set(foobar.c_str(), 7); // Note, has an embedded NULL
+ c = {foobar.c_str(), 7}; // Note, has an embedded NULL
ASSERT_NE(c, a);
// as_string
@@ -543,25 +543,6 @@
StringPiece e;
std::string s2;
- // CopyToString
- a.CopyToString(&s2);
- ASSERT_EQ(s2.size(), 6U);
- ASSERT_EQ(s2, "foobar");
- b.CopyToString(&s2);
- ASSERT_EQ(s2.size(), 7U);
- ASSERT_EQ(s1, s2);
- e.CopyToString(&s2);
- ASSERT_TRUE(s2.empty());
-
- // AppendToString
- s2.erase();
- a.AppendToString(&s2);
- ASSERT_EQ(s2.size(), 6U);
- ASSERT_EQ(s2, "foobar");
- a.AppendToString(&s2);
- ASSERT_EQ(s2.size(), 12U);
- ASSERT_EQ(s2, "foobarfoobar");
-
// starts_with
ASSERT_TRUE(a.starts_with(a));
ASSERT_TRUE(a.starts_with("foo"));
@@ -587,21 +568,16 @@
ASSERT_TRUE(!e.ends_with(a));
StringPiece c;
- c.set("foobar", 6);
+ c = {"foobar", 6};
ASSERT_EQ(c, a);
- c.set("foobar", 0);
+ c = {"foobar", 0};
ASSERT_EQ(c, e);
- c.set("foobar", 7);
+ c = {"foobar", 7};
ASSERT_NE(c, a);
}
TYPED_TEST(CommonStringPieceTest, CheckNULL) {
- // we used to crash here, but now we don't.
- BasicStringPiece<TypeParam> s(nullptr);
- ASSERT_EQ(s.data(), nullptr);
- ASSERT_EQ(s.size(), 0U);
-
- s.set(nullptr);
+ BasicStringPiece<TypeParam> s;
ASSERT_EQ(s.data(), nullptr);
ASSERT_EQ(s.size(), 0U);
@@ -699,7 +675,7 @@
BasicStringPiece<TypeParam>(
str.c_str(),
static_cast<typename BasicStringPiece<TypeParam>::size_type>(0)));
- ASSERT_EQ(empty, BasicStringPiece<TypeParam>(nullptr));
+ ASSERT_EQ(empty, BasicStringPiece<TypeParam>());
ASSERT_TRUE(
empty ==
BasicStringPiece<TypeParam>(
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc
index ef9c74d..3816501 100644
--- a/base/strings/string_split.cc
+++ b/base/strings/string_split.cc
@@ -14,27 +14,15 @@
namespace {
-// PieceToOutputType converts a StringPiece as needed to a given output type,
-// which is either the same type of StringPiece (a NOP) or the corresponding
-// non-piece string type.
-//
-// The default converter is a NOP, it works when the OutputType is the
-// correct StringPiece.
-template<typename Str, typename OutputType>
-OutputType PieceToOutputType(BasicStringPiece<Str> piece) {
- return piece;
-}
-template<> // Convert StringPiece to std::string
-std::string PieceToOutputType<std::string, std::string>(StringPiece piece) {
- return piece.as_string();
-}
-template<> // Convert StringPiece16 to string16.
-string16 PieceToOutputType<string16, string16>(StringPiece16 piece) {
- return piece.as_string();
-}
-
// Returns either the ASCII or UTF-16 whitespace.
template<typename Str> BasicStringPiece<Str> WhitespaceForType();
+#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
+template <>
+WStringPiece WhitespaceForType<std::wstring>() {
+ return kWhitespaceWide;
+}
+#endif
+
template<> StringPiece16 WhitespaceForType<string16>() {
return kWhitespaceUTF16;
}
@@ -42,37 +30,12 @@
return kWhitespaceASCII;
}
-// Optimize the single-character case to call find() on the string instead,
-// since this is the common case and can be made faster. This could have been
-// done with template specialization too, but would have been less clear.
-//
-// There is no corresponding FindFirstNotOf because StringPiece already
-// implements these different versions that do the optimized searching.
-size_t FindFirstOf(StringPiece piece, char c, size_t pos) {
- return piece.find(c, pos);
-}
-size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) {
- return piece.find(c, pos);
-}
-size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) {
- return piece.find_first_of(one_of, pos);
-}
-size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) {
- return piece.find_first_of(one_of, pos);
-}
-
// General string splitter template. Can take 8- or 16-bit input, can produce
-// the corresponding string or StringPiece output, and can take single- or
-// multiple-character delimiters.
-//
-// DelimiterType is either a character (Str::value_type) or a string piece of
-// multiple characters (BasicStringPiece<Str>). StringPiece has a version of
-// find for both of these cases, and the single-character version is the most
-// common and can be implemented faster, which is why this is a template.
-template<typename Str, typename OutputStringType, typename DelimiterType>
+// the corresponding string or StringPiece output.
+template <typename OutputStringType, typename Str>
static std::vector<OutputStringType> SplitStringT(
BasicStringPiece<Str> str,
- DelimiterType delimiter,
+ BasicStringPiece<Str> delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
std::vector<OutputStringType> result;
@@ -81,7 +44,7 @@
size_t start = 0;
while (start != Str::npos) {
- size_t end = FindFirstOf(str, delimiter, start);
+ size_t end = str.find_first_of(delimiter, start);
BasicStringPiece<Str> piece;
if (end == Str::npos) {
@@ -96,7 +59,7 @@
piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
if (result_type == SPLIT_WANT_ALL || !piece.empty())
- result.push_back(PieceToOutputType<Str, OutputStringType>(piece));
+ result.emplace_back(piece);
}
return result;
}
@@ -115,7 +78,7 @@
DVLOG(1) << "cannot find delimiter in: " << input;
return false; // No delimiter.
}
- input.substr(0, end_key_pos).CopyToString(&result_pair.first);
+ result_pair.first = std::string(input.substr(0, end_key_pos));
// Find the value string.
StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
@@ -124,22 +87,23 @@
DVLOG(1) << "cannot parse value from input: " << input;
return false; // No value.
}
- remains.substr(begin_value_pos, remains.size() - begin_value_pos)
- .CopyToString(&result_pair.second);
+
+ result_pair.second = std::string(
+ remains.substr(begin_value_pos, remains.size() - begin_value_pos));
return true;
}
-template <typename Str, typename OutputStringType>
-void SplitStringUsingSubstrT(BasicStringPiece<Str> input,
- BasicStringPiece<Str> delimiter,
- WhitespaceHandling whitespace,
- SplitResult result_type,
- std::vector<OutputStringType>* result) {
+template <typename OutputStringType, typename Str>
+std::vector<OutputStringType> SplitStringUsingSubstrT(
+ BasicStringPiece<Str> input,
+ BasicStringPiece<Str> delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
using Piece = BasicStringPiece<Str>;
using size_type = typename Piece::size_type;
- result->clear();
+ std::vector<OutputStringType> result;
for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
begin_index = end_index + delimiter.size()) {
end_index = input.find(delimiter, begin_index);
@@ -151,8 +115,10 @@
term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
if (result_type == SPLIT_WANT_ALL || !term.empty())
- result->push_back(PieceToOutputType<Str, OutputStringType>(term));
+ result.emplace_back(term);
}
+
+ return result;
}
} // namespace
@@ -161,48 +127,29 @@
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- if (separators.size() == 1) {
- return SplitStringT<std::string, std::string, char>(
- input, separators[0], whitespace, result_type);
- }
- return SplitStringT<std::string, std::string, StringPiece>(
- input, separators, whitespace, result_type);
+ return SplitStringT<std::string>(input, separators, whitespace, result_type);
}
std::vector<string16> SplitString(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- if (separators.size() == 1) {
- return SplitStringT<string16, string16, char16>(
- input, separators[0], whitespace, result_type);
- }
- return SplitStringT<string16, string16, StringPiece16>(
- input, separators, whitespace, result_type);
+ return SplitStringT<string16>(input, separators, whitespace, result_type);
}
std::vector<StringPiece> SplitStringPiece(StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- if (separators.size() == 1) {
- return SplitStringT<std::string, StringPiece, char>(
- input, separators[0], whitespace, result_type);
- }
- return SplitStringT<std::string, StringPiece, StringPiece>(
- input, separators, whitespace, result_type);
+ return SplitStringT<StringPiece>(input, separators, whitespace, result_type);
}
std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
- if (separators.size() == 1) {
- return SplitStringT<string16, StringPiece16, char16>(
- input, separators[0], whitespace, result_type);
- }
- return SplitStringT<string16, StringPiece16, StringPiece16>(
- input, separators, whitespace, result_type);
+ return SplitStringT<StringPiece16>(input, separators, whitespace,
+ result_type);
}
bool SplitStringIntoKeyValuePairs(StringPiece input,
@@ -240,18 +187,16 @@
StringPiece16 delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- std::vector<string16> result;
- SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
- return result;
+ return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace,
+ result_type);
}
std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- std::vector<std::string> result;
- SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
- return result;
+ return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace,
+ result_type);
}
std::vector<StringPiece16> SplitStringPieceUsingSubstr(
@@ -260,8 +205,8 @@
WhitespaceHandling whitespace,
SplitResult result_type) {
std::vector<StringPiece16> result;
- SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
- return result;
+ return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace,
+ result_type);
}
std::vector<StringPiece> SplitStringPieceUsingSubstr(
@@ -269,9 +214,41 @@
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
- std::vector<StringPiece> result;
- SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
- return result;
+ return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace,
+ result_type);
}
+#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
+std::vector<std::wstring> SplitString(WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return SplitStringT<std::wstring>(input, separators, whitespace, result_type);
+}
+
+std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return SplitStringT<WStringPiece>(input, separators, whitespace, result_type);
+}
+
+std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace,
+ result_type);
+}
+
+std::vector<WStringPiece> SplitStringPieceUsingSubstr(
+ WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) {
+ return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace,
+ result_type);
+}
+#endif
+
} // namespace base
diff --git a/base/strings/string_split.h b/base/strings/string_split.h
index 1894d05..02c2c59 100644
--- a/base/strings/string_split.h
+++ b/base/strings/string_split.h
@@ -12,6 +12,7 @@
#include "polyfills/base/base_export.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
+#include "build/build_config.h"
namespace gurl_base {
@@ -39,26 +40,31 @@
// Split the given string on ANY of the given separators, returning copies of
// the result.
//
+// Note this is inverse of JoinString() defined in string_util.h.
+//
// To split on either commas or semicolons, keeping all whitespace:
//
// std::vector<std::string> tokens = gurl_base::SplitString(
-// input, ",;", gurl_base::KEEP_WHITESPACE, gurl_base::SPLIT_WANT_ALL);
-BASE_EXPORT std::vector<std::string> SplitString(
- StringPiece input,
- StringPiece separators,
- WhitespaceHandling whitespace,
- SplitResult result_type);
-BASE_EXPORT std::vector<string16> SplitString(
- StringPiece16 input,
- StringPiece16 separators,
- WhitespaceHandling whitespace,
- SplitResult result_type);
+// input, ", WARN_UNUSED_RESULT;", gurl_base::KEEP_WHITESPACE,
+// gurl_base::SPLIT_WANT_ALL) WARN_UNUSED_RESULT;
+BASE_EXPORT std::vector<std::string> SplitString(StringPiece input,
+ StringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input,
+ StringPiece16 separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type)
+ WARN_UNUSED_RESULT;
// Like SplitString above except it returns a vector of StringPieces which
// reference the original buffer without copying. Although you have to be
// careful to keep the original string unmodified, this provides an efficient
// way to iterate through tokens in a string.
//
+// Note this is inverse of JoinString() defined in string_util.h.
+//
// To iterate through all whitespace-separated tokens in an input string:
//
// for (const auto& cur :
@@ -70,12 +76,12 @@
StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
- SplitResult result_type);
+ SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
- SplitResult result_type);
+ SplitResult result_type) WARN_UNUSED_RESULT;
using StringPairs = std::vector<std::pair<std::string, std::string>>;
@@ -102,12 +108,12 @@
StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
- SplitResult result_type);
+ SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<std::string> SplitStringUsingSubstr(
StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
- SplitResult result_type);
+ SplitResult result_type) WARN_UNUSED_RESULT;
// Like SplitStringUsingSubstr above except it returns a vector of StringPieces
// which reference the original buffer without copying. Although you have to be
@@ -125,12 +131,38 @@
StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
- SplitResult result_type);
+ SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr(
StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
- SplitResult result_type);
+ SplitResult result_type) WARN_UNUSED_RESULT;
+
+#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
+BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type)
+ WARN_UNUSED_RESULT;
+
+BASE_EXPORT std::vector<WStringPiece> SplitStringPiece(
+ WStringPiece input,
+ WStringPiece separators,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
+
+BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr(
+ WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
+
+BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr(
+ WStringPiece input,
+ WStringPiece delimiter,
+ WhitespaceHandling whitespace,
+ SplitResult result_type) WARN_UNUSED_RESULT;
+#endif
} // namespace base
diff --git a/base/strings/string_split_unittest.cc b/base/strings/string_split_unittest.cc
index 993450a..f84d4b8 100644
--- a/base/strings/string_split_unittest.cc
+++ b/base/strings/string_split_unittest.cc
@@ -47,7 +47,7 @@
}
TEST_F(SplitStringIntoKeyValuePairsUsingSubstrTest,
- MissingKeyValuePairDelimeter) {
+ MissingKeyValuePairDelimiter) {
EXPECT_TRUE(SplitStringIntoKeyValuePairsUsingSubstr(
"key1:value1,,key3:value3",
':', // Key-value delimiter
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index 2b2591d..742319a 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc
@@ -51,21 +51,6 @@
return elem1.parameter < elem2.parameter;
}
-// Overloaded function to append one string onto the end of another. Having a
-// separate overload for |source| as both string and StringPiece allows for more
-// efficient usage from functions templated to work with either type (avoiding a
-// redundant call to the BasicStringPiece constructor in both cases).
-template <typename string_type>
-inline void AppendToString(string_type* target, const string_type& source) {
- target->append(source);
-}
-
-template <typename string_type>
-inline void AppendToString(string_type* target,
- const BasicStringPiece<string_type>& source) {
- source.AppendToString(target);
-}
-
// Assuming that a pointer is the size of a "machine word", then
// uintptr_t is an integer type that is also a machine word.
using MachineWord = uintptr_t;
@@ -237,17 +222,16 @@
bool ReplaceChars(const string16& input,
StringPiece16 replace_chars,
- const string16& replace_with,
+ StringPiece16 replace_with,
string16* output) {
- return ReplaceCharsT(input, replace_chars, StringPiece16(replace_with),
- output);
+ return ReplaceCharsT(input, replace_chars, replace_with, output);
}
bool ReplaceChars(const std::string& input,
StringPiece replace_chars,
- const std::string& replace_with,
+ StringPiece replace_with,
std::string* output) {
- return ReplaceCharsT(input, replace_chars, StringPiece(replace_with), output);
+ return ReplaceCharsT(input, replace_chars, replace_with, output);
}
bool RemoveChars(const string16& input,
@@ -262,8 +246,8 @@
return ReplaceCharsT(input, remove_chars, StringPiece(), output);
}
-template<typename Str>
-TrimPositions TrimStringT(const Str& input,
+template <typename Str>
+TrimPositions TrimStringT(BasicStringPiece<Str> input,
BasicStringPiece<Str> trim_chars,
TrimPositions positions,
Str* output) {
@@ -271,40 +255,40 @@
// a StringPiece version of input to be able to call find* on it with the
// StringPiece version of trim_chars (normally the trim_chars will be a
// constant so avoid making a copy).
- BasicStringPiece<Str> input_piece(input);
const size_t last_char = input.length() - 1;
- const size_t first_good_char = (positions & TRIM_LEADING) ?
- input_piece.find_first_not_of(trim_chars) : 0;
- const size_t last_good_char = (positions & TRIM_TRAILING) ?
- input_piece.find_last_not_of(trim_chars) : last_char;
+ const size_t first_good_char =
+ (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0;
+ const size_t last_good_char = (positions & TRIM_TRAILING)
+ ? input.find_last_not_of(trim_chars)
+ : last_char;
// When the string was all trimmed, report that we stripped off characters
// from whichever position the caller was interested in. For empty input, we
// stripped no characters, but we still need to clear |output|.
- if (input.empty() ||
- (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
+ if (input.empty() || first_good_char == Str::npos ||
+ last_good_char == Str::npos) {
bool input_was_empty = input.empty(); // in case output == &input
output->clear();
return input_was_empty ? TRIM_NONE : positions;
}
// Trim.
- *output =
- input.substr(first_good_char, last_good_char - first_good_char + 1);
+ output->assign(input.data() + first_good_char,
+ last_good_char - first_good_char + 1);
// Return where we trimmed from.
return static_cast<TrimPositions>(
- ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
- ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
+ (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) |
+ (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING));
}
-bool TrimString(const string16& input,
+bool TrimString(StringPiece16 input,
StringPiece16 trim_chars,
string16* output) {
return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
}
-bool TrimString(const std::string& input,
+bool TrimString(StringPiece input,
StringPiece trim_chars,
std::string* output) {
return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
@@ -370,7 +354,7 @@
output->clear();
}
-TrimPositions TrimWhitespace(const string16& input,
+TrimPositions TrimWhitespace(StringPiece16 input,
TrimPositions positions,
string16* output) {
return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
@@ -381,7 +365,7 @@
return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
}
-TrimPositions TrimWhitespaceASCII(const std::string& input,
+TrimPositions TrimWhitespaceASCII(StringPiece input,
TrimPositions positions,
std::string* output) {
return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
@@ -506,20 +490,29 @@
}
#endif
-bool IsStringUTF8(StringPiece str) {
- const char *src = str.data();
+template <bool (*Validator)(uint32_t)>
+inline static bool DoIsStringUTF8(StringPiece str) {
+ const char* src = str.data();
int32_t src_len = static_cast<int32_t>(str.length());
int32_t char_index = 0;
while (char_index < src_len) {
int32_t code_point;
CBU8_NEXT(src, char_index, src_len, code_point);
- if (!IsValidCharacter(code_point))
+ if (!Validator(code_point))
return false;
}
return true;
}
+bool IsStringUTF8(StringPiece str) {
+ return DoIsStringUTF8<IsValidCharacter>(str);
+}
+
+bool IsStringUTF8AllowingNoncharacters(StringPiece str) {
+ return DoIsStringUTF8<IsValidCodepoint>(str);
+}
+
// Implementation note: Normally this function will be called with a hardcoded
// constant for the lowercase_ascii parameter. Constructing a StringPiece from
// a C constant requires running strlen, so the result will be two passes
@@ -913,7 +906,7 @@
template <class string_type>
inline typename string_type::value_type* WriteIntoT(string_type* str,
size_t length_with_null) {
- GURL_DCHECK_GT(length_with_null, 1u);
+ GURL_DCHECK_GE(length_with_null, 1u);
str->reserve(length_with_null);
str->resize(length_with_null - 1);
return &((*str)[0]);
@@ -927,11 +920,6 @@
return WriteIntoT(str, length_with_null);
}
-#if defined(_MSC_VER) && !defined(__clang__)
-// Work around VC++ code-gen bug. https://crbug.com/804884
-#pragma optimize("", off)
-#endif
-
// Generic version for all JoinString overloads. |list_type| must be a sequence
// (std::vector or std::initializer_list) of strings/StringPieces (std::string,
// string16, StringPiece or StringPiece16). |string_type| is either std::string
@@ -939,7 +927,7 @@
template <typename list_type, typename string_type>
static string_type JoinStringT(const list_type& parts,
BasicStringPiece<string_type> sep) {
- if (parts.size() == 0)
+ if (gurl_base::empty(parts))
return string_type();
// Pre-allocate the eventual size of the string. Start with the size of all of
@@ -952,15 +940,12 @@
auto iter = parts.begin();
GURL_DCHECK(iter != parts.end());
- AppendToString(&result, *iter);
+ result.append(iter->data(), iter->size());
++iter;
for (; iter != parts.end(); ++iter) {
- sep.AppendToString(&result);
- // Using the overloaded AppendToString allows this template function to work
- // on both strings and StringPieces without creating an intermediate
- // StringPiece object.
- AppendToString(&result, *iter);
+ result.append(sep.data(), sep.size());
+ result.append(iter->data(), iter->size());
}
// Sanity-check that we pre-allocated correctly.
@@ -979,11 +964,6 @@
return JoinStringT(parts, separator);
}
-#if defined(_MSC_VER) && !defined(__clang__)
-// Work around VC++ code-gen bug. https://crbug.com/804884
-#pragma optimize("", on)
-#endif
-
std::string JoinString(const std::vector<StringPiece>& parts,
StringPiece separator) {
return JoinStringT(parts, separator);
@@ -1085,6 +1065,36 @@
return result;
}
+#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
+
+TrimPositions TrimWhitespace(WStringPiece input,
+ TrimPositions positions,
+ std::wstring* output) {
+ return TrimStringT(input, WStringPiece(kWhitespaceWide), positions, output);
+}
+
+WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) {
+ return TrimStringPieceT(input, WStringPiece(kWhitespaceWide), positions);
+}
+
+bool TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ std::wstring* output) {
+ return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
+}
+
+WStringPiece TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ TrimPositions positions) {
+ return TrimStringPieceT(input, trim_chars, positions);
+}
+
+wchar_t* WriteInto(std::wstring* str, size_t length_with_null) {
+ return WriteIntoT(str, length_with_null);
+}
+
+#endif
+
// The following code is compatible with the OpenBSD lcpy interface. See:
// http://www.gratisoft.us/todd/papers/strlcpy.html
// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index 5a8cb02..ed3118d 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -160,6 +160,7 @@
// by HTML5, and don't include control characters.
BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
+BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF.
BASE_EXPORT extern const char kWhitespaceASCII[];
BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
@@ -183,11 +184,11 @@
// NOTE: Safe to use the same variable for both |input| and |output|.
BASE_EXPORT bool ReplaceChars(const string16& input,
StringPiece16 replace_chars,
- const string16& replace_with,
+ StringPiece16 replace_with,
string16* output);
BASE_EXPORT bool ReplaceChars(const std::string& input,
StringPiece replace_chars,
- const std::string& replace_with,
+ StringPiece replace_with,
std::string* output);
enum TrimPositions {
@@ -203,10 +204,10 @@
//
// It is safe to use the same variable for both |input| and |output| (this is
// the normal usage to trim in-place).
-BASE_EXPORT bool TrimString(const string16& input,
+BASE_EXPORT bool TrimString(StringPiece16 input,
StringPiece16 trim_chars,
string16* output);
-BASE_EXPORT bool TrimString(const std::string& input,
+BASE_EXPORT bool TrimString(StringPiece input,
StringPiece trim_chars,
std::string* output);
@@ -268,6 +269,24 @@
inline const char16* as_u16cstr(WStringPiece str) {
return reinterpret_cast<const char16*>(str.data());
}
+
+// Utility functions to convert between gurl_base::WStringPiece and
+// gurl_base::StringPiece16.
+inline WStringPiece AsWStringPiece(StringPiece16 str) {
+ return WStringPiece(as_wcstr(str.data()), str.size());
+}
+
+inline StringPiece16 AsStringPiece16(WStringPiece str) {
+ return StringPiece16(as_u16cstr(str.data()), str.size());
+}
+
+inline std::wstring AsWString(StringPiece16 str) {
+ return std::wstring(as_wcstr(str.data()), str.size());
+}
+
+inline string16 AsString16(WStringPiece str) {
+ return string16(as_u16cstr(str.data()), str.size());
+}
#endif // defined(WCHAR_T_IS_UTF16)
// Trims any whitespace from either end of the input string.
@@ -277,12 +296,12 @@
//
// The std::string versions return where whitespace was found.
// NOTE: Safe to use the same variable for both input and output.
-BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
+BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input,
TrimPositions positions,
string16* output);
BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
TrimPositions positions);
-BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
+BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input,
TrimPositions positions,
std::string* output);
BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
@@ -309,21 +328,23 @@
BASE_EXPORT bool ContainsOnlyChars(StringPiece16 input,
StringPiece16 characters);
-// Returns true if the specified string matches the criteria. How can a wide
-// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
-// first case) or characters that use only 8-bits and whose 8-bit
-// representation looks like a UTF-8 string (the second case).
-//
-// Note that IsStringUTF8 checks not only if the input is structurally
-// valid but also if it doesn't contain any non-character codepoint
-// (e.g. U+FFFE). It's done on purpose because all the existing callers want
-// to have the maximum 'discriminating' power from other encodings. If
-// there's a use case for just checking the structural validity, we have to
-// add a new function for that.
-//
-// IsStringASCII assumes the input is likely all ASCII, and does not leave early
-// if it is not the case.
+// Returns true if |str| is structurally valid UTF-8 and also doesn't
+// contain any non-character code point (e.g. U+10FFFE). Prohibiting
+// non-characters increases the likelihood of detecting non-UTF-8 in
+// real-world text, for callers which do not need to accept
+// non-characters in strings.
BASE_EXPORT bool IsStringUTF8(StringPiece str);
+
+// Returns true if |str| contains valid UTF-8, allowing non-character
+// code points.
+BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str);
+
+// Returns true if |str| contains only valid ASCII character values.
+// Note 1: IsStringASCII executes in time determined solely by the
+// length of the string, not by its contents, so it is robust against
+// timing attacks for all strings of equal length.
+// Note 2: IsStringASCII assumes the input is likely all ASCII, and
+// does not leave early if it is not the case.
BASE_EXPORT bool IsStringASCII(StringPiece str);
BASE_EXPORT bool IsStringASCII(StringPiece16 str);
#if defined(WCHAR_T_IS_UTF32)
@@ -456,10 +477,6 @@
// convenient in that is can be used inline in the call, and fast in that it
// avoids copying the results of the call from a char* into a string.
//
-// |length_with_null| must be at least 2, since otherwise the underlying string
-// would have size 0, and trying to access &((*str)[0]) in that case can result
-// in a number of problems.
-//
// Internally, this takes linear time because the resize() call 0-fills the
// underlying array for potentially all
// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we
@@ -471,9 +488,11 @@
BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
-// Does the opposite of SplitString()/SplitStringPiece(). Joins a vector or list
-// of strings into a single string, inserting |separator| (which may be empty)
-// in between all elements.
+// Joins a vector or list of strings into a single string, inserting |separator|
+// (which may be empty) in between all elements.
+//
+// Note this is inverse of SplitString()/SplitStringPiece() defined in
+// string_split.h.
//
// If possible, callers should build a vector of StringPieces and use the
// StringPiece variant, so that they do not create unnecessary copies of
@@ -517,6 +536,25 @@
const string16& a,
size_t* offset);
+#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
+BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input,
+ TrimPositions positions,
+ std::wstring* output);
+
+BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input,
+ TrimPositions positions);
+
+BASE_EXPORT bool TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ std::wstring* output);
+
+BASE_EXPORT WStringPiece TrimString(WStringPiece input,
+ WStringPiece trim_chars,
+ TrimPositions positions);
+
+BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
+#endif
+
} // namespace base
#if defined(OS_WIN)
diff --git a/base/strings/string_util_constants.cc b/base/strings/string_util_constants.cc
index 3ca29b7..e9e4d93 100644
--- a/base/strings/string_util_constants.cc
+++ b/base/strings/string_util_constants.cc
@@ -6,61 +6,48 @@
namespace gurl_base {
-#define WHITESPACE_UNICODE \
- 0x0009, /* CHARACTER TABULATION */ \
- 0x000A, /* LINE FEED (LF) */ \
- 0x000B, /* LINE TABULATION */ \
- 0x000C, /* FORM FEED (FF) */ \
- 0x000D, /* CARRIAGE RETURN (CR) */ \
- 0x0020, /* SPACE */ \
- 0x0085, /* NEXT LINE (NEL) */ \
- 0x00A0, /* NO-BREAK SPACE */ \
- 0x1680, /* OGHAM SPACE MARK */ \
- 0x2000, /* EN QUAD */ \
- 0x2001, /* EM QUAD */ \
- 0x2002, /* EN SPACE */ \
- 0x2003, /* EM SPACE */ \
- 0x2004, /* THREE-PER-EM SPACE */ \
- 0x2005, /* FOUR-PER-EM SPACE */ \
- 0x2006, /* SIX-PER-EM SPACE */ \
- 0x2007, /* FIGURE SPACE */ \
- 0x2008, /* PUNCTUATION SPACE */ \
- 0x2009, /* THIN SPACE */ \
- 0x200A, /* HAIR SPACE */ \
- 0x2028, /* LINE SEPARATOR */ \
- 0x2029, /* PARAGRAPH SEPARATOR */ \
- 0x202F, /* NARROW NO-BREAK SPACE */ \
- 0x205F, /* MEDIUM MATHEMATICAL SPACE */ \
- 0x3000, /* IDEOGRAPHIC SPACE */ \
- 0
+#define WHITESPACE_ASCII_NO_CR_LF \
+ 0x09, /* CHARACTER TABULATION */ \
+ 0x0B, /* LINE TABULATION */ \
+ 0x0C, /* FORM FEED (FF) */ \
+ 0x20 /* SPACE */
-const wchar_t kWhitespaceWide[] = {
- WHITESPACE_UNICODE
-};
+#define WHITESPACE_ASCII \
+ WHITESPACE_ASCII_NO_CR_LF, /* Comment to make clang-format linebreak */ \
+ 0x0A, /* LINE FEED (LF) */ \
+ 0x0D /* CARRIAGE RETURN (CR) */
-const char16 kWhitespaceUTF16[] = {
- WHITESPACE_UNICODE
-};
+#define WHITESPACE_UNICODE_NON_ASCII \
+ 0x0085, /* NEXT LINE (NEL) */ \
+ 0x00A0, /* NO-BREAK SPACE */ \
+ 0x1680, /* OGHAM SPACE MARK */ \
+ 0x2000, /* EN QUAD */ \
+ 0x2001, /* EM QUAD */ \
+ 0x2002, /* EN SPACE */ \
+ 0x2003, /* EM SPACE */ \
+ 0x2004, /* THREE-PER-EM SPACE */ \
+ 0x2005, /* FOUR-PER-EM SPACE */ \
+ 0x2006, /* SIX-PER-EM SPACE */ \
+ 0x2007, /* FIGURE SPACE */ \
+ 0x2008, /* PUNCTUATION SPACE */ \
+ 0x2009, /* THIN SPACE */ \
+ 0x200A, /* HAIR SPACE */ \
+ 0x2028, /* LINE SEPARATOR */ \
+ 0x2029, /* PARAGRAPH SEPARATOR */ \
+ 0x202F, /* NARROW NO-BREAK SPACE */ \
+ 0x205F, /* MEDIUM MATHEMATICAL SPACE */ \
+ 0x3000 /* IDEOGRAPHIC SPACE */
-const char kWhitespaceASCII[] = {
- 0x09, // CHARACTER TABULATION
- 0x0A, // LINE FEED (LF)
- 0x0B, // LINE TABULATION
- 0x0C, // FORM FEED (FF)
- 0x0D, // CARRIAGE RETURN (CR)
- 0x20, // SPACE
- 0
-};
+#define WHITESPACE_UNICODE_NO_CR_LF \
+ WHITESPACE_ASCII_NO_CR_LF, WHITESPACE_UNICODE_NON_ASCII
-const char16 kWhitespaceASCIIAs16[] = {
- 0x09, // CHARACTER TABULATION
- 0x0A, // LINE FEED (LF)
- 0x0B, // LINE TABULATION
- 0x0C, // FORM FEED (FF)
- 0x0D, // CARRIAGE RETURN (CR)
- 0x20, // SPACE
- 0
-};
+#define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII
+
+const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0};
+const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0};
+const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0};
+const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0};
+const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0};
const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index 51b4ee1..a2aab42 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc
@@ -69,6 +69,128 @@
return prev != output->length();
}
+using TestFunction = bool (*)(StringPiece str);
+
+// Helper used to test IsStringUTF8{,AllowingNoncharacters}.
+void TestStructurallyValidUtf8(TestFunction fn) {
+ EXPECT_TRUE(fn("abc"));
+ EXPECT_TRUE(fn("\xC2\x81"));
+ EXPECT_TRUE(fn("\xE1\x80\xBF"));
+ EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
+ EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
+ EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF"));
+
+ // U+FEFF used as UTF-8 BOM.
+ // clang-format off
+ EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc"));
+ // clang-format on
+
+ // Embedded nulls in canonical UTF-8 representation.
+ using std::string_literals::operator""s;
+ const std::string kEmbeddedNull = "embedded\0null"s;
+ EXPECT_TRUE(fn(kEmbeddedNull));
+}
+
+// Helper used to test IsStringUTF8{,AllowingNoncharacters}.
+void TestStructurallyInvalidUtf8(TestFunction fn) {
+ // Invalid encoding of U+1FFFE (0x8F instead of 0x9F)
+ EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE"));
+
+ // Surrogate code points
+ EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF"));
+ EXPECT_FALSE(fn("\xED\xA0\x8F"));
+ EXPECT_FALSE(fn("\xED\xBF\xBF"));
+
+ // Overlong sequences
+ EXPECT_FALSE(fn("\xC0\x80")); // U+0000
+ EXPECT_FALSE(fn("\xC1\x80\xC1\x81")); // "AB"
+ EXPECT_FALSE(fn("\xE0\x80\x80")); // U+0000
+ EXPECT_FALSE(fn("\xE0\x82\x80")); // U+0080
+ EXPECT_FALSE(fn("\xE0\x9F\xBF")); // U+07FF
+ EXPECT_FALSE(fn("\xF0\x80\x80\x8D")); // U+000D
+ EXPECT_FALSE(fn("\xF0\x80\x82\x91")); // U+0091
+ EXPECT_FALSE(fn("\xF0\x80\xA0\x80")); // U+0800
+ EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF")); // U+FEFF (BOM)
+ EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF")); // U+003F
+ EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5")); // U+00A5
+
+ // Beyond U+10FFFF (the upper limit of Unicode codespace)
+ EXPECT_FALSE(fn("\xF4\x90\x80\x80")); // U+110000
+ EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF")); // 5 bytes
+ EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80")); // 6 bytes
+
+ // BOM in UTF-16(BE|LE)
+ EXPECT_FALSE(fn("\xFE\xFF"));
+ EXPECT_FALSE(fn("\xFF\xFE"));
+
+ // Strings in legacy encodings. We can certainly make up strings
+ // in a legacy encoding that are valid in UTF-8, but in real data,
+ // most of them are invalid as UTF-8.
+
+ // cafe with U+00E9 in ISO-8859-1
+ EXPECT_FALSE(fn("caf\xE9"));
+ // U+AC00, U+AC001 in EUC-KR
+ EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2"));
+ // U+4F60 U+597D in Big5
+ EXPECT_FALSE(fn("\xA7\x41\xA6\x6E"));
+ // "abc" with U+201[CD] in windows-125[0-8]
+ // clang-format off
+ EXPECT_FALSE(fn("\x93" "abc\x94"));
+ // clang-format on
+ // U+0639 U+064E U+0644 U+064E in ISO-8859-6
+ EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE"));
+ // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
+ EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC"));
+
+ // BOM in UTF-32(BE|LE)
+ using std::string_literals::operator""s;
+ const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s;
+ EXPECT_FALSE(fn(kUtf32BeBom));
+ const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s;
+ EXPECT_FALSE(fn(kUtf32LeBom));
+}
+
+// Helper used to test IsStringUTF8{,AllowingNoncharacters}.
+void TestNoncharacters(TestFunction fn, bool expected_result) {
+ EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result); // U+FDD0
+ EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result); // U+FDDF
+ EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result); // U+FDEF
+ EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result); // U+FFFE
+ EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result); // U+FFFF
+ EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result); // U+01FFFE
+ EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result); // U+01FFFF
+ EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result); // U+02FFFE
+ EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result); // U+02FFFF
+ EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result); // U+03FFFE
+ EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result); // U+03FFFF
+ EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result); // U+04FFFE
+ EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result); // U+04FFFF
+ EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result); // U+05FFFE
+ EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result); // U+05FFFF
+ EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result); // U+06FFFE
+ EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result); // U+06FFFF
+ EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result); // U+07FFFE
+ EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result); // U+07FFFF
+ EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result); // U+08FFFE
+ EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result); // U+08FFFF
+ EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result); // U+09FFFE
+ EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result); // U+09FFFF
+ EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result); // U+0AFFFE
+ EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result); // U+0AFFFF
+ EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result); // U+0BFFFE
+ EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result); // U+0BFFFF
+ EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result); // U+0CFFFE
+ EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result); // U+0CFFFF
+ EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result); // U+0DFFFE
+ EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result); // U+0DFFFF
+ EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result); // U+0EFFFE
+ EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result); // U+0EFFFF
+ EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result); // U+0FFFFE
+ EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result); // U+0FFFFF
+ EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result); // U+10FFFE
+ EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result); // U+10FFFF
+}
+
} // namespace
TEST(StringUtilTest, TruncateUTF8ToByteSize) {
@@ -380,69 +502,19 @@
}
TEST(StringUtilTest, IsStringUTF8) {
- EXPECT_TRUE(IsStringUTF8("abc"));
- EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
- EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
- EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
- EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
- EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
+ {
+ SCOPED_TRACE("IsStringUTF8");
+ TestStructurallyValidUtf8(&IsStringUTF8);
+ TestStructurallyInvalidUtf8(&IsStringUTF8);
+ TestNoncharacters(&IsStringUTF8, false);
+ }
- // surrogate code points
- EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
- EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
- EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
-
- // overlong sequences
- EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
- EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
- EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
- EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
- EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
- EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
- EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
- EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
- EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
- EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
- EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
-
- // Beyond U+10FFFF (the upper limit of Unicode codespace)
- EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
- EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
- EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
-
- // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
- EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
- EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
- EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
- EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
-
- // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
- EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
- EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
- EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
- EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
- EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
- // Strings in legacy encodings. We can certainly make up strings
- // in a legacy encoding that are valid in UTF-8, but in real data,
- // most of them are invalid as UTF-8.
- EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
- EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
- EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
- // "abc" with U+201[CD] in windows-125[0-8]
- EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
- // U+0639 U+064E U+0644 U+064E in ISO-8859-6
- EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
- // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
- EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
-
- // Check that we support Embedded Nulls. The first uses the canonical UTF-8
- // representation, and the second uses a 2-byte sequence. The second version
- // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
- // given codepoint must be used.
- static const char kEmbeddedNull[] = "embedded\0null";
- EXPECT_TRUE(IsStringUTF8(
- std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
- EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
+ {
+ SCOPED_TRACE("IsStringUTF8AllowingNoncharacters");
+ TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters);
+ TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters);
+ TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true);
+ }
}
TEST(StringUtilTest, IsStringASCII) {
@@ -1414,6 +1486,13 @@
WritesCorrectly(2);
WritesCorrectly(5000);
+ // Validate that WriteInto handles 0-length strings
+ std::string empty;
+ const char kOriginal[] = "original";
+ strncpy(WriteInto(&empty, 1), kOriginal, 0);
+ EXPECT_STREQ("", empty.c_str());
+ EXPECT_EQ(0u, empty.size());
+
// Validate that WriteInto doesn't modify other strings
// when using a Copy-on-Write implementation.
const char kLive[] = "live";
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc
index 1a08ffb..cc58087 100644
--- a/base/strings/stringprintf.cc
+++ b/base/strings/stringprintf.cc
@@ -39,18 +39,25 @@
va_list argptr) {
return gurl_base::vswprintf(buffer, buf_size, format, argptr);
}
+inline int vsnprintfT(char16_t* buffer,
+ size_t buf_size,
+ const char16_t* format,
+ va_list argptr) {
+ return gurl_base::vswprintf(reinterpret_cast<wchar_t*>(buffer), buf_size,
+ reinterpret_cast<const wchar_t*>(format), argptr);
+}
#endif
// Templatized backend for StringPrintF/StringAppendF. This does not finalize
// the va_list, the caller is expected to do that.
-template <class StringType>
-static void StringAppendVT(StringType* dst,
- const typename StringType::value_type* format,
+template <class CharT>
+static void StringAppendVT(std::basic_string<CharT>* dst,
+ const CharT* format,
va_list ap) {
// First try with a small fixed size buffer.
// This buffer size should be kept in sync with StringUtilTest.GrowBoundary
// and StringUtilTest.StringPrintfBounds.
- typename StringType::value_type stack_buf[1024];
+ CharT stack_buf[1024];
va_list ap_copy;
va_copy(ap_copy, ap);
@@ -93,7 +100,7 @@
return;
}
- std::vector<typename StringType::value_type> mem_buf(mem_length);
+ std::vector<CharT> mem_buf(mem_length);
// NOTE: You can only use a va_list once. Since we're in a while loop, we
// need to make a new copy each time so we don't use up the original.
@@ -129,6 +136,15 @@
va_end(ap);
return result;
}
+
+std::u16string StringPrintf(const char16_t* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ std::u16string result;
+ StringAppendV(&result, format, ap);
+ va_end(ap);
+ return result;
+}
#endif
std::string StringPrintV(const char* format, va_list ap) {
@@ -156,6 +172,17 @@
va_end(ap);
return *dst;
}
+
+const std::u16string& SStringPrintf(std::u16string* dst,
+ const char16_t* format,
+ ...) {
+ va_list ap;
+ va_start(ap, format);
+ dst->clear();
+ StringAppendV(dst, format, ap);
+ va_end(ap);
+ return *dst;
+}
#endif
void StringAppendF(std::string* dst, const char* format, ...) {
@@ -172,6 +199,13 @@
StringAppendV(dst, format, ap);
va_end(ap);
}
+
+void StringAppendF(std::u16string* dst, const char16_t* format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ StringAppendV(dst, format, ap);
+ va_end(ap);
+}
#endif
void StringAppendV(std::string* dst, const char* format, va_list ap) {
@@ -182,6 +216,10 @@
void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
StringAppendVT(dst, format, ap);
}
+
+void StringAppendV(std::u16string* dst, const char16_t* format, va_list ap) {
+ StringAppendVT(dst, format, ap);
+}
#endif
} // namespace base
diff --git a/base/strings/stringprintf.h b/base/strings/stringprintf.h
index 2abdb68..5768bcc 100644
--- a/base/strings/stringprintf.h
+++ b/base/strings/stringprintf.h
@@ -19,8 +19,14 @@
BASE_EXPORT std::string StringPrintf(const char* format, ...)
PRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
#if defined(OS_WIN)
+// Note: Unfortunately compile time checking of the format string for UTF-16
+// strings is not supported by any compiler, thus these functions should be used
+// carefully and sparingly. Also applies to SStringPrintf and StringAppendV
+// below.
BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...)
WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
+BASE_EXPORT std::u16string StringPrintf(const char16_t* format, ...)
+ WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
#endif
// Return a C++ string given vprintf-like input.
@@ -35,6 +41,9 @@
BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst,
const wchar_t* format,
...) WPRINTF_FORMAT(2, 3);
+BASE_EXPORT const std::u16string& SStringPrintf(std::u16string* dst,
+ const char16_t* format,
+ ...) WPRINTF_FORMAT(2, 3);
#endif
// Append result to a supplied string.
@@ -43,6 +52,8 @@
#if defined(OS_WIN)
BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...)
WPRINTF_FORMAT(2, 3);
+BASE_EXPORT void StringAppendF(std::u16string* dst, const char16_t* format, ...)
+ WPRINTF_FORMAT(2, 3);
#endif
// Lower-level routine that takes a va_list and appends to a specified
@@ -53,6 +64,9 @@
BASE_EXPORT void StringAppendV(std::wstring* dst,
const wchar_t* format,
va_list ap) WPRINTF_FORMAT(2, 0);
+BASE_EXPORT void StringAppendV(std::u16string* dst,
+ const char16_t* format,
+ va_list ap) WPRINTF_FORMAT(2, 0);
#endif
} // namespace base
diff --git a/base/strings/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc
index 59e3403..c2e8707 100644
--- a/base/strings/stringprintf_unittest.cc
+++ b/base/strings/stringprintf_unittest.cc
@@ -18,7 +18,10 @@
// A helper for the StringAppendV test that follows.
//
// Just forwards its args to StringAppendV.
-static void StringAppendVTestHelper(std::string* out, const char* format, ...) {
+template <class CharT>
+static void StringAppendVTestHelper(std::basic_string<CharT>* out,
+ const CharT* format,
+ ...) {
va_list ap;
va_start(ap, format);
StringAppendV(out, format, ap);
@@ -35,6 +38,7 @@
EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
#if defined(OS_WIN)
EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
+ EXPECT_EQ(u"123hello w", StringPrintf(u"%3d%2ls %1lc", 123, u"hello", 'w'));
#endif
}
@@ -47,6 +51,10 @@
std::wstring valuew(L"Hello");
StringAppendF(&valuew, L"%ls", L"");
EXPECT_EQ(L"Hello", valuew);
+
+ std::u16string value16(u"Hello");
+ StringAppendF(&value16, u"%ls", u"");
+ EXPECT_EQ(u"Hello", value16);
#endif
}
@@ -59,6 +67,10 @@
std::wstring valuew(L"Hello");
StringAppendF(&valuew, L" %ls", L"World");
EXPECT_EQ(L"Hello World", valuew);
+
+ std::u16string value16(u"Hello");
+ StringAppendF(&value16, u" %ls", u"World");
+ EXPECT_EQ(u"Hello World", value16);
#endif
}
@@ -71,6 +83,10 @@
std::wstring valuew(L"Hello");
StringAppendF(&valuew, L" %d", 123);
EXPECT_EQ(L"Hello 123", valuew);
+
+ std::u16string value16(u"Hello");
+ StringAppendF(&value16, u" %d", 123);
+ EXPECT_EQ(u"Hello 123", value16);
#endif
}
@@ -79,12 +95,13 @@
TEST(StringPrintfTest, StringPrintfBounds) {
const int kSrcLen = 1026;
char src[kSrcLen];
- for (auto& i : src)
- i = 'A';
+ std::fill_n(src, kSrcLen, 'A');
wchar_t srcw[kSrcLen];
- for (auto& i : srcw)
- i = 'A';
+ std::fill_n(srcw, kSrcLen, 'A');
+
+ char16_t src16[kSrcLen];
+ std::fill_n(src16, kSrcLen, 'A');
for (int i = 1; i < 3; i++) {
src[kSrcLen - i] = 0;
@@ -97,6 +114,14 @@
std::wstring outw;
SStringPrintf(&outw, L"%ls", srcw);
EXPECT_STREQ(srcw, outw.c_str());
+
+ src16[kSrcLen - i] = 0;
+ std::u16string out16;
+ SStringPrintf(&out16, u"%ls", src16);
+ // EXPECT_STREQ does not support const char16_t* strings yet.
+ // Dispatch to the const wchar_t* overload instead.
+ EXPECT_STREQ(reinterpret_cast<const wchar_t*>(src16),
+ reinterpret_cast<const wchar_t*>(out16.c_str()));
#endif
}
}
@@ -129,6 +154,16 @@
std::string out;
StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
EXPECT_EQ("1 foo bar", out);
+
+#if defined(OS_WIN)
+ std::wstring outw;
+ StringAppendVTestHelper(&outw, L"%d foo %ls", 1, L"bar");
+ EXPECT_EQ(L"1 foo bar", outw);
+
+ std::u16string out16;
+ StringAppendVTestHelper(&out16, u"%d foo %ls", 1, u"bar");
+ EXPECT_EQ(u"1 foo bar", out16);
+#endif
}
// Test the boundary condition for the size of the string_util's
@@ -151,9 +186,6 @@
}
#if defined(OS_WIN)
-// vswprintf in Visual Studio 2013 fails when given U+FFFF. This tests that the
-// failure case is gracefuly handled. In Visual Studio 2015 the bad character
-// is passed through.
TEST(StringPrintfTest, Invalid) {
wchar_t invalid[2];
invalid[0] = 0xffff;
@@ -161,11 +193,7 @@
std::wstring out;
SStringPrintf(&out, L"%ls", invalid);
-#if _MSC_VER >= 1900
EXPECT_STREQ(invalid, out.c_str());
-#else
- EXPECT_STREQ(L"", out.c_str());
-#endif
}
#endif
diff --git a/base/strings/sys_string_conversions.h b/base/strings/sys_string_conversions.h
index 08082ae..7c3c575 100644
--- a/base/strings/sys_string_conversions.h
+++ b/base/strings/sys_string_conversions.h
@@ -31,14 +31,17 @@
// Converts between wide and UTF-8 representations of a string. On error, the
// result is system-dependent.
-BASE_EXPORT std::string SysWideToUTF8(const std::wstring& wide);
-BASE_EXPORT std::wstring SysUTF8ToWide(StringPiece utf8);
+BASE_EXPORT std::string SysWideToUTF8(const std::wstring& wide)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT std::wstring SysUTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
// Converts between wide and the system multi-byte representations of a string.
// DANGER: This will lose information and can change (on Windows, this can
// change between reboots).
-BASE_EXPORT std::string SysWideToNativeMB(const std::wstring& wide);
-BASE_EXPORT std::wstring SysNativeMBToWide(StringPiece native_mb);
+BASE_EXPORT std::string SysWideToNativeMB(const std::wstring& wide)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT std::wstring SysNativeMBToWide(StringPiece native_mb)
+ WARN_UNUSED_RESULT;
// Windows-specific ------------------------------------------------------------
@@ -47,9 +50,11 @@
// Converts between 8-bit and wide strings, using the given code page. The
// code page identifier is one accepted by the Windows function
// MultiByteToWideChar().
-BASE_EXPORT std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page);
+BASE_EXPORT std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page)
+ WARN_UNUSED_RESULT;
BASE_EXPORT std::string SysWideToMultiByte(const std::wstring& wide,
- uint32_t code_page);
+ uint32_t code_page)
+ WARN_UNUSED_RESULT;
#endif // defined(OS_WIN)
@@ -61,21 +66,25 @@
// Creates a string, and returns it with a refcount of 1. You are responsible
// for releasing it. Returns NULL on failure.
-BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8);
-BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16);
+BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16)
+ WARN_UNUSED_RESULT;
// Same, but returns an autoreleased NSString.
-BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8);
-BASE_EXPORT NSString* SysUTF16ToNSString(StringPiece16 utf16);
+BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8) WARN_UNUSED_RESULT;
+BASE_EXPORT NSString* SysUTF16ToNSString(StringPiece16 utf16)
+ WARN_UNUSED_RESULT;
// Converts a CFStringRef to an STL string. Returns an empty string on failure.
-BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref);
-BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref);
+BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref)
+ WARN_UNUSED_RESULT;
+BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref) WARN_UNUSED_RESULT;
// Same, but accepts NSString input. Converts nil NSString* to the appropriate
// string type of length 0.
-BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref);
-BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref);
+BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref) WARN_UNUSED_RESULT;
+BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT;
#endif // defined(OS_MACOSX)
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc
index 5bf7967..7d00bb4 100644
--- a/base/strings/utf_offset_string_conversions.cc
+++ b/base/strings/utf_offset_string_conversions.cc
@@ -90,16 +90,22 @@
auto adjusted_iter = adjustments_on_adjusted_string->begin();
auto first_iter = first_adjustments.begin();
// Simultaneously iterate over all |adjustments_on_adjusted_string| and
- // |first_adjustments|, adding adjustments to or correcting the adjustments
- // in |adjustments_on_adjusted_string| as we go. |shift| keeps track of the
- // current number of characters collapsed by |first_adjustments| up to this
- // point. |currently_collapsing| keeps track of the number of characters
- // collapsed by |first_adjustments| into the current |adjusted_iter|'s
- // length. These are characters that will change |shift| as soon as we're
- // done processing the current |adjusted_iter|; they are not yet reflected in
- // |shift|.
+ // |first_adjustments|, pushing adjustments at the end of
+ // |adjustments_builder| as we go. |shift| keeps track of the current number
+ // of characters collapsed by |first_adjustments| up to this point.
+ // |currently_collapsing| keeps track of the number of characters collapsed by
+ // |first_adjustments| into the current |adjusted_iter|'s length. These are
+ // characters that will change |shift| as soon as we're done processing the
+ // current |adjusted_iter|; they are not yet reflected in |shift|.
size_t shift = 0;
size_t currently_collapsing = 0;
+ // While we *could* update |adjustments_on_adjusted_string| in place by
+ // inserting new adjustments into the middle, we would be repeatedly calling
+ // |std::vector::insert|. That would cost O(n) time per insert, relative to
+ // distance from end of the string. By instead allocating
+ // |adjustments_builder| and calling |std::vector::push_back|, we only pay
+ // amortized constant time per push. We are trading space for time.
+ Adjustments adjustments_builder;
while (adjusted_iter != adjustments_on_adjusted_string->end()) {
if ((first_iter == first_adjustments.end()) ||
((adjusted_iter->original_offset + shift +
@@ -112,6 +118,7 @@
adjusted_iter->original_offset += shift;
shift += currently_collapsing;
currently_collapsing = 0;
+ adjustments_builder.push_back(*adjusted_iter);
++adjusted_iter;
} else if ((adjusted_iter->original_offset + shift) >
first_iter->original_offset) {
@@ -127,15 +134,9 @@
GURL_DCHECK_LE(first_iter->original_offset + first_iter->output_length,
adjusted_iter->original_offset + shift);
- // Add the |first_adjustment_iter| to the full set of adjustments while
- // making sure |adjusted_iter| continues pointing to the same element.
- // We do this by inserting the |first_adjustment_iter| right before
- // |adjusted_iter|, then incrementing |adjusted_iter| so it points to
- // the following element.
+ // Add the |first_iter| to the full set of adjustments.
shift += first_iter->original_length - first_iter->output_length;
- adjusted_iter = adjustments_on_adjusted_string->insert(
- adjusted_iter, *first_iter);
- ++adjusted_iter;
+ adjustments_builder.push_back(*first_iter);
++first_iter;
} else {
// The first adjustment adjusted something that then got further adjusted
@@ -168,10 +169,10 @@
// (Their offsets are already correct with respect to the original string.)
// Append them all.
GURL_DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());
- adjustments_on_adjusted_string->insert(
- adjustments_on_adjusted_string->end(), first_iter,
- first_adjustments.end());
+ adjustments_builder.insert(adjustments_builder.end(), first_iter,
+ first_adjustments.end());
}
+ *adjustments_on_adjusted_string = std::move(adjustments_builder);
}
// Converts the given source Unicode character type to the given destination
diff --git a/base/strings/utf_offset_string_conversions.h b/base/strings/utf_offset_string_conversions.h
index 8902ee5..c2e2ba7 100644
--- a/base/strings/utf_offset_string_conversions.h
+++ b/base/strings/utf_offset_string_conversions.h
@@ -98,7 +98,7 @@
gurl_base::OffsetAdjuster::Adjustments* adjustments);
BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
const gurl_base::StringPiece& utf8,
- gurl_base::OffsetAdjuster::Adjustments* adjustments);
+ gurl_base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT;
// As above, but instead internally examines the adjustments and applies them
// to |offsets_for_adjustment|. Input offsets greater than the length of the
// input string will be set to string16::npos. See comments by AdjustOffsets().
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h
index 84d18f7..075832e 100644
--- a/base/strings/utf_string_conversion_utils.h
+++ b/base/strings/utf_string_conversion_utils.h
@@ -17,16 +17,19 @@
namespace gurl_base {
inline bool IsValidCodepoint(uint32_t code_point) {
- // Excludes the surrogate code points ([0xD800, 0xDFFF]) and
- // codepoints larger than 0x10FFFF (the highest codepoint allowed).
- // Non-characters and unassigned codepoints are allowed.
+ // Excludes code points that are not Unicode scalar values, i.e.
+ // surrogate code points ([0xD800, 0xDFFF]). Additionally, excludes
+ // code points larger than 0x10FFFF (the highest codepoint allowed).
+ // Non-characters and unassigned code points are allowed.
+ // https://unicode.org/glossary/#unicode_scalar_value
return code_point < 0xD800u ||
(code_point >= 0xE000u && code_point <= 0x10FFFFu);
}
inline bool IsValidCharacter(uint32_t code_point) {
- // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in
- // 0xFFFE or 0xFFFF) from the set of valid code points.
+ // Excludes non-characters (U+FDD0..U+FDEF, and all code points
+ // ending in 0xFFFE or 0xFFFF) from the set of valid code points.
+ // https://unicode.org/faq/private_use.html#nonchar1
return code_point < 0xD800u || (code_point >= 0xE000u &&
code_point < 0xFDD0u) || (code_point > 0xFDEFu &&
code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h
index e64f420..745372c 100644
--- a/base/strings/utf_string_conversions.h
+++ b/base/strings/utf_string_conversions.h
@@ -23,31 +23,31 @@
// possible.
BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len,
std::string* output);
-BASE_EXPORT std::string WideToUTF8(WStringPiece wide);
+BASE_EXPORT std::string WideToUTF8(WStringPiece wide) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len,
std::wstring* output);
-BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8);
+BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len,
string16* output);
-BASE_EXPORT string16 WideToUTF16(WStringPiece wide);
+BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len,
std::wstring* output);
-BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16);
+BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
-BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8);
+BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len,
std::string* output);
-BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16);
+BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT;
// This converts an ASCII string, typically a hardcoded constant, to a UTF16
// string.
-BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii);
+BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT;
// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
// beforehand.
-BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16);
+BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT;
} // namespace base
diff --git a/base/strings/utf_string_conversions_fuzzer.cc b/base/strings/utf_string_conversions_fuzzer.cc
index 96bccda..55e75f7 100644
--- a/base/strings/utf_string_conversions_fuzzer.cc
+++ b/base/strings/utf_string_conversions_fuzzer.cc
@@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "base/macros.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
@@ -14,10 +15,10 @@
gurl_base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
size);
- gurl_base::UTF8ToWide(string_piece_input);
+ ignore_result(gurl_base::UTF8ToWide(string_piece_input));
gurl_base::UTF8ToWide(reinterpret_cast<const char*>(data), size,
&output_std_wstring);
- gurl_base::UTF8ToUTF16(string_piece_input);
+ ignore_result(gurl_base::UTF8ToUTF16(string_piece_input));
gurl_base::UTF8ToUTF16(reinterpret_cast<const char*>(data), size,
&output_string16);
@@ -25,10 +26,10 @@
if (size % 2 == 0) {
gurl_base::StringPiece16 string_piece_input16(
reinterpret_cast<const gurl_base::char16*>(data), size / 2);
- gurl_base::UTF16ToWide(output_string16);
+ ignore_result(gurl_base::UTF16ToWide(output_string16));
gurl_base::UTF16ToWide(reinterpret_cast<const gurl_base::char16*>(data), size / 2,
&output_std_wstring);
- gurl_base::UTF16ToUTF8(string_piece_input16);
+ ignore_result(gurl_base::UTF16ToUTF8(string_piece_input16));
gurl_base::UTF16ToUTF8(reinterpret_cast<const gurl_base::char16*>(data), size / 2,
&output_std_string);
}
@@ -36,10 +37,10 @@
// Test for wchar_t.
size_t wchar_t_size = sizeof(wchar_t);
if (size % wchar_t_size == 0) {
- gurl_base::WideToUTF8(output_std_wstring);
+ ignore_result(gurl_base::WideToUTF8(output_std_wstring));
gurl_base::WideToUTF8(reinterpret_cast<const wchar_t*>(data),
size / wchar_t_size, &output_std_string);
- gurl_base::WideToUTF16(output_std_wstring);
+ ignore_result(gurl_base::WideToUTF16(output_std_wstring));
gurl_base::WideToUTF16(reinterpret_cast<const wchar_t*>(data),
size / wchar_t_size, &output_string16);
}
@@ -49,7 +50,7 @@
if (gurl_base::IsStringASCII(string_piece_input)) {
output_string16 = gurl_base::ASCIIToUTF16(string_piece_input);
gurl_base::StringPiece16 string_piece_input16(output_string16);
- gurl_base::UTF16ToASCII(string_piece_input16);
+ ignore_result(gurl_base::UTF16ToASCII(string_piece_input16));
}
return 0;
diff --git a/build/build_config.h b/build/build_config.h
index 0d87d80..688b779 100644
--- a/build/build_config.h
+++ b/build/build_config.h
@@ -63,7 +63,7 @@
#define OS_QNX 1
#elif defined(_AIX)
#define OS_AIX 1
-#elif defined(__asmjs__)
+#elif defined(__asmjs__) || defined(__wasm__)
#define OS_ASMJS
#else
#error Please add support for your platform in build/build_config.h
@@ -139,7 +139,7 @@
#define ARCH_CPU_ARM64 1
#define ARCH_CPU_64_BITS 1
#define ARCH_CPU_LITTLE_ENDIAN 1
-#elif defined(__pnacl__) || defined(__asmjs__)
+#elif defined(__pnacl__) || defined(__asmjs__) || defined(__wasm__)
#define ARCH_CPU_32_BITS 1
#define ARCH_CPU_LITTLE_ENDIAN 1
#elif defined(__MIPSEL__)
diff --git a/polyfills/base/logging.h b/polyfills/base/logging.h
index def1745..41ddacd 100644
--- a/polyfills/base/logging.h
+++ b/polyfills/base/logging.h
@@ -24,10 +24,12 @@
#define GURL_CHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_CHECK(statement) GurlFakeLogSink({statement})
#define GURL_DCHECK_EQ(statement, statement2) GurlFakeLogSink({statement, statement2})
+#define GURL_DCHECK_GE(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_DCHECK_GT(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_DCHECK_IS_ON() false
#define GURL_DCHECK_LE(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_DCHECK_LT(statement, statement2) GurlFakeLogSink({statement, statement2})
+#define GURL_DCHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2})
#define GURL_DCHECK(statement) GurlFakeLogSink({statement})
#define GURL_DLOG(severity) GurlFakeLogSink(true)
#define GURL_LOG(severity) GurlFakeLogSink(true)
diff --git a/url/gurl.cc b/url/gurl.cc
index c8e424f..36c5ee2 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -396,14 +396,14 @@
return ComponentString(file_component);
}
-std::string GURL::PathForRequest() const {
+gurl_base::StringPiece GURL::PathForRequestPiece() const {
GURL_DCHECK(parsed_.path.len > 0)
<< "Canonical path for requests should be non-empty";
if (parsed_.ref.len >= 0) {
// Clip off the reference when it exists. The reference starts after the
// #-sign, so we have to subtract one to also remove it.
- return std::string(spec_, parsed_.path.begin,
- parsed_.ref.begin - parsed_.path.begin - 1);
+ return gurl_base::StringPiece(&spec_[parsed_.path.begin],
+ parsed_.ref.begin - parsed_.path.begin - 1);
}
// Compute the actual path length, rather than depending on the spec's
// terminator. If we're an inner_url, our spec continues on into our outer
@@ -412,7 +412,11 @@
if (parsed_.query.is_valid())
path_len = parsed_.query.end() - parsed_.path.begin;
- return std::string(spec_, parsed_.path.begin, path_len);
+ return gurl_base::StringPiece(&spec_[parsed_.path.begin], path_len);
+}
+
+std::string GURL::PathForRequest() const {
+ return PathForRequestPiece().as_string();
}
std::string GURL::HostNoBrackets() const {
diff --git a/url/gurl.h b/url/gurl.h
index 8c026f7..73d2b43 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -386,6 +386,9 @@
// parameter, and query portions of the URL. It is guaranteed to be ASCII.
std::string PathForRequest() const;
+ // Returns the same characters as PathForRequest(), avoiding a copy.
+ gurl_base::StringPiece PathForRequestPiece() const;
+
// Returns the host, excluding the square brackets surrounding IPv6 address
// literals. This can be useful for passing to getaddrinfo().
std::string HostNoBrackets() const;
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index 0375eae..b114920 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -289,21 +289,42 @@
bool expected_valid;
const char* expected;
} resolve_cases[] = {
- {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
- {"http://www.google.com/foo/", "bar", true, "http://www.google.com/foo/bar"},
- {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
- {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
- {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
- {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"},
- {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
- {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
- {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
+ {"http://www.google.com/", "foo.html", true,
+ "http://www.google.com/foo.html"},
+ {"http://www.google.com/foo/", "bar", true,
+ "http://www.google.com/foo/bar"},
+ {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
+ {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
+ {"http://www.google.com/", "http://images.google.com/foo.html", true,
+ "http://images.google.com/foo.html"},
+ {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html",
+ true, "http://images.google.com/foo.html"},
+ {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b",
+ true, "http://www.google.com/hello/world.html?a#b"},
+ {"http://www.google.com/foo#bar", "#com", true,
+ "http://www.google.com/foo#com"},
+ {"http://www.google.com/", "Https:images.google.com", true,
+ "https://images.google.com/"},
// A non-standard base can be replaced with a standard absolute URL.
- {"data:blahblah", "http://google.com/", true, "http://google.com/"},
- {"data:blahblah", "http:google.com", true, "http://google.com/"},
+ {"data:blahblah", "http://google.com/", true, "http://google.com/"},
+ {"data:blahblah", "http:google.com", true, "http://google.com/"},
// Filesystem URLs have different paths to test.
- {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"},
- {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"},
+ {"filesystem:http://www.google.com/type/", "foo.html", true,
+ "filesystem:http://www.google.com/type/foo.html"},
+ {"filesystem:http://www.google.com/type/", "../foo.html", true,
+ "filesystem:http://www.google.com/type/foo.html"},
+ // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:"
+ // or "x1:" valid schemes) when deciding if |relative| is an absolute url.
+ {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true,
+ "file:///some/dir/10.0.0.7:8080/foo.html"},
+ {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"},
+ {"file:///some/dir/", "x1://host", true, "x1://host"},
+ {"file:///some/dir/", "X1://host", true, "x1://host"},
+ {"file:///some/dir/", "x.://host", true, "x.://host"},
+ {"file:///some/dir/", "x+://host", true, "x+://host"},
+ {"file:///some/dir/", "x-://host", true, "x-://host"},
+ {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"},
+ {"file:///some/dir/", "://host", true, "file:///some/dir/://host"},
};
for (size_t i = 0; i < gurl_base::size(resolve_cases); i++) {
@@ -539,11 +560,14 @@
for (size_t i = 0; i < gurl_base::size(cases); i++) {
GURL url(cases[i].input);
- std::string path_request = url.PathForRequest();
- EXPECT_EQ(cases[i].expected, path_request);
+ EXPECT_EQ(cases[i].expected, url.PathForRequest());
+ EXPECT_EQ(cases[i].expected, url.PathForRequestPiece());
EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
- if (url.inner_url() && cases[i].inner_expected)
+ if (url.inner_url() && cases[i].inner_expected) {
EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
+ EXPECT_EQ(cases[i].inner_expected,
+ url.inner_url()->PathForRequestPiece());
+ }
}
}
@@ -567,11 +591,6 @@
{"ftp://www.google.com:21/", 21},
{"ftp://www.google.com:80/", 80},
- // gopher
- {"gopher://www.google.com/", 70},
- {"gopher://www.google.com:70/", 70},
- {"gopher://www.google.com:80/", 80},
-
// file - no port
{"file://www.google.com/", PORT_UNSPECIFIED},
{"file://www.google.com:443/", PORT_UNSPECIFIED},
diff --git a/url/origin.cc b/url/origin.cc
index 6eda15e..16e93b0 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -7,8 +7,12 @@
#include <stdint.h>
#include <algorithm>
+#include <vector>
+#include "base/base64.h"
+#include "base/containers/span.h"
#include "polyfills/base/logging.h"
+#include "base/pickle.h"
#include "base/stl_util.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
@@ -42,12 +46,12 @@
// It's SchemeHostPort's responsibility to filter out unrecognized schemes;
// sanity check that this is happening.
- GURL_DCHECK(tuple.IsInvalid() || url.IsStandard() ||
+ GURL_DCHECK(!tuple.IsValid() || url.IsStandard() ||
gurl_base::Contains(GetLocalSchemes(), url.scheme_piece()) ||
AllowNonStandardSchemesForAndroidWebView());
}
- if (tuple.IsInvalid())
+ if (!tuple.IsValid())
return Origin();
return Origin(std::move(tuple));
}
@@ -74,7 +78,7 @@
uint16_t port) {
SchemeHostPort tuple(scheme.as_string(), host.as_string(), port,
SchemeHostPort::CHECK_CANONICALIZATION);
- if (tuple.IsInvalid())
+ if (!tuple.IsValid())
return gurl_base::nullopt;
return Origin(std::move(tuple));
}
@@ -91,7 +95,7 @@
// For opaque origins, it is okay for the SchemeHostPort to be invalid;
// however, this should only arise when the arguments indicate the
// canonical representation of the invalid SchemeHostPort.
- if (precursor.IsInvalid() &&
+ if (!precursor.IsValid() &&
!(precursor_scheme.empty() && precursor_host.empty() &&
precursor_port == 0)) {
return gurl_base::nullopt;
@@ -105,7 +109,7 @@
uint16_t port) {
SchemeHostPort tuple(std::move(scheme), std::move(host), port,
SchemeHostPort::ALREADY_CANONICALIZED);
- if (tuple.IsInvalid())
+ if (!tuple.IsValid())
return Origin();
return Origin(std::move(tuple));
}
@@ -171,7 +175,7 @@
// And if it is unique opaque origin, it definitely is fine. But if there
// is a precursor stored, we should fall through to compare the tuples.
- if (tuple_.IsInvalid())
+ if (!tuple_.IsValid())
return true;
}
@@ -198,7 +202,7 @@
// opaque origin. It is valid case, as any browser-initiated navigation
// to about:blank or data: URL will result in a document with such
// origin and it is valid for it to create blob: URLs.
- if (tuple_.IsInvalid())
+ if (!tuple_.IsValid())
return true;
url_tuple = SchemeHostPort(GURL(url.GetContent()));
@@ -221,7 +225,7 @@
// If |this| does not have valid precursor tuple, it is unique opaque origin,
// which is what we expect non-standard schemes to get.
- if (tuple_.IsInvalid())
+ if (!tuple_.IsValid())
return true;
// However, when there is precursor present, the schemes must match.
@@ -257,7 +261,7 @@
: nonce_->raw_token().ToString();
std::string out = gurl_base::StrCat({Serialize(), " [internally: (", nonce, ")"});
- if (tuple_.IsInvalid())
+ if (!tuple_.IsValid())
gurl_base::StrAppend(&out, {" anonymous]"});
else
gurl_base::StrAppend(&out, {" derived from ", tuple_.Serialize(), "]"});
@@ -266,7 +270,7 @@
Origin::Origin(SchemeHostPort tuple) : tuple_(std::move(tuple)) {
GURL_DCHECK(!opaque());
- GURL_DCHECK(!tuple_.IsInvalid());
+ GURL_DCHECK(tuple_.IsValid());
}
// Constructs an opaque origin derived from |precursor|.
@@ -279,6 +283,82 @@
GURL_DCHECK_EQ(0U, port());
}
+// The pickle is saved in the following format, in order:
+// string - tuple_.GetURL().spec().
+// uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized.
+// uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized.
+gurl_base::Optional<std::string> Origin::SerializeWithNonce() const {
+ if (!opaque() && !tuple_.IsValid())
+ return gurl_base::nullopt;
+
+ gurl_base::Pickle pickle;
+ pickle.WriteString(tuple_.Serialize());
+ if (opaque() && !nonce_->raw_token().is_empty()) {
+ pickle.WriteUInt64(nonce_->token().GetHighForSerialization());
+ pickle.WriteUInt64(nonce_->token().GetLowForSerialization());
+ } else if (opaque()) {
+ // Nonce hasn't been initialized.
+ pickle.WriteUInt64(0);
+ pickle.WriteUInt64(0);
+ }
+
+ gurl_base::span<const uint8_t> data(
+ static_cast<const uint8_t*>(pickle.data()),
+ static_cast<const uint8_t*>(pickle.data()) + pickle.size());
+ // Base64 encode the data to make it nicer to play with.
+ return gurl_base::Base64Encode(data);
+}
+
+// static
+gurl_base::Optional<Origin> Origin::Deserialize(const std::string& value) {
+ std::string data;
+ if (!gurl_base::Base64Decode(value, &data))
+ return gurl_base::nullopt;
+ gurl_base::Pickle pickle(reinterpret_cast<char*>(&data[0]), data.size());
+ gurl_base::PickleIterator reader(pickle);
+
+ std::string pickled_url;
+ if (!reader.ReadString(&pickled_url))
+ return gurl_base::nullopt;
+ GURL url(pickled_url);
+
+ // If only a tuple was serialized, then this origin is not opaque. For opaque
+ // origins, we expect two uint64's to be left in the pickle.
+ bool is_opaque = !reader.ReachedEnd();
+
+ // Opaque origins without a tuple are ok.
+ if (!is_opaque && !url.is_valid())
+ return gurl_base::nullopt;
+ SchemeHostPort tuple(url);
+
+ // Possible successful early return if the pickled Origin was not opaque.
+ if (!is_opaque) {
+ Origin origin(tuple);
+ if (origin.opaque())
+ return gurl_base::nullopt; // Something went horribly wrong.
+ return origin;
+ }
+
+ uint64_t nonce_high = 0;
+ if (!reader.ReadUInt64(&nonce_high))
+ return gurl_base::nullopt;
+
+ uint64_t nonce_low = 0;
+ if (!reader.ReadUInt64(&nonce_low))
+ return gurl_base::nullopt;
+
+ Origin::Nonce nonce;
+ if (nonce_high != 0 && nonce_low != 0) {
+ // The serialized nonce wasn't empty, so copy it here.
+ nonce = Origin::Nonce(
+ gurl_base::UnguessableToken::Deserialize(nonce_high, nonce_low));
+ }
+ Origin origin;
+ origin.nonce_ = std::move(nonce);
+ origin.tuple_ = tuple;
+ return origin;
+}
+
std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
out << origin.GetDebugString();
return out;
@@ -351,4 +431,17 @@
return !(*this == other);
}
+namespace debug {
+
+ScopedOriginCrashKey::ScopedOriginCrashKey(
+ gurl_base::debug::CrashKeyString* crash_key,
+ const url::Origin* value)
+ : gurl_base::debug::ScopedCrashKeyString(
+ crash_key,
+ value ? value->GetDebugString() : "nullptr") {}
+
+ScopedOriginCrashKey::~ScopedOriginCrashKey() = default;
+
+} // namespace debug
+
} // namespace url
diff --git a/url/origin.h b/url/origin.h
index 58c9221..351c482 100644
--- a/url/origin.h
+++ b/url/origin.h
@@ -7,21 +7,37 @@
#include <stdint.h>
+#include <memory>
#include <string>
#include "polyfills/base/component_export.h"
#include "polyfills/base/debug/alias.h"
+#include "base/debug/crash_logging.h"
#include "base/optional.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/unguessable_token.h"
+#include "build/build_config.h"
#include "ipc/ipc_param_traits.h"
#include "url/scheme_host_port.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
+#if defined(OS_ANDROID)
+#include <jni.h>
+
+namespace gurl_base {
+namespace android {
+template <typename>
+class ScopedJavaLocalRef;
+template <typename>
+class JavaRef;
+} // namespace android
+} // namespace base
+#endif // OS_ANDROID
+
class GURL;
namespace blink {
@@ -39,6 +55,11 @@
struct UrlOriginAdapter;
} // namespace mojo
+namespace net {
+class NetworkIsolationKey;
+class OpaqueNonTransientNetworkIsolationKeyTest;
+} // namespace net
+
namespace url {
namespace mojom {
@@ -130,6 +151,9 @@
// 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
// out of everything in the URL which follows the scheme).
// 3. 'file' URLs all parse as ("file", "", 0).
+ //
+ // Note that the returned Origin may have a different scheme and host from
+ // |url| (e.g. in case of blob URLs - see OriginTest.ConstructFromGURL).
static Origin Create(const GURL& url);
// Creates an Origin for the resource |url| as if it were requested
@@ -266,8 +290,16 @@
// and precursor information.
std::string GetDebugString() const;
+#if defined(OS_ANDROID)
+ gurl_base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const;
+ static Origin FromJavaObject(
+ const gurl_base::android::JavaRef<jobject>& java_origin);
+#endif // OS_ANDROID
+
private:
friend class blink::SecurityOrigin;
+ friend class net::NetworkIsolationKey;
+ friend class net::OpaqueNonTransientNetworkIsolationKeyTest;
friend class OriginTest;
friend struct mojo::UrlOriginAdapter;
friend struct ipc_fuzzer::FuzzTraits<Origin>;
@@ -362,6 +394,16 @@
// used only when trying to send an Origin across an IPC pipe.
gurl_base::Optional<gurl_base::UnguessableToken> GetNonceForSerialization() const;
+ // Serializes this Origin, including its nonce if it is opaque. If an opaque
+ // origin's |tuple_| is invalid or the nonce isn't initialized, nullopt is
+ // returned. Use of this method should be limited as an opaque origin will
+ // never be matchable in future browser sessions.
+ gurl_base::Optional<std::string> SerializeWithNonce() const;
+
+ // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
+ // value was invalid in any way.
+ static gurl_base::Optional<Origin> Deserialize(const std::string& value);
+
// The tuple is used for both tuple origins (e.g. https://example.com:80), as
// well as for opaque origins, where it tracks the tuple origin from which
// the opaque origin was initially derived (we call this the "precursor"
@@ -388,6 +430,21 @@
#define DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) \
DEBUG_ALIAS_FOR_CSTR(var_name, (origin).Serialize().c_str(), 128)
+namespace debug {
+
+class COMPONENT_EXPORT(URL) ScopedOriginCrashKey
+ : public gurl_base::debug::ScopedCrashKeyString {
+ public:
+ ScopedOriginCrashKey(gurl_base::debug::CrashKeyString* crash_key,
+ const url::Origin* value);
+ ~ScopedOriginCrashKey();
+
+ ScopedOriginCrashKey(const ScopedOriginCrashKey&) = delete;
+ ScopedOriginCrashKey& operator=(const ScopedOriginCrashKey&) = delete;
+};
+
+} // namespace debug
+
} // namespace url
#endif // URL_ORIGIN_H_
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index 2754f23..514169b 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -55,7 +55,6 @@
AddStandardScheme("standard-but-noaccess", SchemeType::SCHEME_WITH_HOST);
AddNoAccessScheme("standard-but-noaccess");
}
- void TearDown() override { url::ResetForTests(); }
::testing::AssertionResult DoEqualityComparisons(const url::Origin& a,
const url::Origin& b,
@@ -96,7 +95,8 @@
return origin.GetNonceForSerialization();
}
- // Wrapper around url::Origin method to expose it to tests.
+ // Wrappers around url::Origin methods to expose it to tests.
+
gurl_base::Optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
gurl_base::StringPiece precursor_scheme,
gurl_base::StringPiece precursor_host,
@@ -105,6 +105,17 @@
return Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
precursor_scheme, precursor_host, precursor_port, nonce);
}
+
+ gurl_base::Optional<std::string> SerializeWithNonce(const Origin& origin) {
+ return origin.SerializeWithNonce();
+ }
+
+ gurl_base::Optional<Origin> Deserialize(const std::string& value) {
+ return Origin::Deserialize(value);
+ }
+
+ private:
+ ScopedSchemeRegistryForTests scoped_registry_;
};
TEST_F(OriginTest, OpaqueOriginComparison) {
@@ -116,14 +127,14 @@
EXPECT_EQ("", opaque_a.host());
EXPECT_EQ(0, opaque_a.port());
EXPECT_EQ(SchemeHostPort(), opaque_a.GetTupleOrPrecursorTupleIfOpaque());
- EXPECT_TRUE(opaque_a.GetTupleOrPrecursorTupleIfOpaque().IsInvalid());
+ EXPECT_FALSE(opaque_a.GetTupleOrPrecursorTupleIfOpaque().IsValid());
EXPECT_TRUE(opaque_b.opaque());
EXPECT_EQ("", opaque_b.scheme());
EXPECT_EQ("", opaque_b.host());
EXPECT_EQ(0, opaque_b.port());
EXPECT_EQ(SchemeHostPort(), opaque_b.GetTupleOrPrecursorTupleIfOpaque());
- EXPECT_TRUE(opaque_b.GetTupleOrPrecursorTupleIfOpaque().IsInvalid());
+ EXPECT_FALSE(opaque_b.GetTupleOrPrecursorTupleIfOpaque().IsValid());
// Two default-constructed Origins should always be cross origin to each
// other.
@@ -304,7 +315,6 @@
// Registered URLs
{"ftp://example.com/", "ftp", "example.com", 21},
- {"gopher://example.com/", "gopher", "example.com", 70},
{"ws://example.com/", "ws", "example.com", 80},
{"wss://example.com/", "wss", "example.com", 443},
{"wss://user:pass@example.com/", "wss", "example.com", 443},
@@ -354,9 +364,6 @@
123},
{"blob:https://example.com/guid-goes-here", "https", "example.com", 443},
{"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80},
-
- // Gopher:
- {"gopher://8u.9.Vx6", "gopher", "8u.9.vx6", 70},
};
for (const auto& test_case : cases) {
@@ -381,7 +388,7 @@
.DeriveNewOpaqueOrigin();
EXPECT_TRUE(derived_opaque.opaque());
EXPECT_NE(origin, derived_opaque);
- EXPECT_FALSE(derived_opaque.GetTupleOrPrecursorTupleIfOpaque().IsInvalid());
+ EXPECT_TRUE(derived_opaque.GetTupleOrPrecursorTupleIfOpaque().IsValid());
EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(),
derived_opaque.GetTupleOrPrecursorTupleIfOpaque());
EXPECT_EQ(derived_opaque, derived_opaque);
@@ -390,8 +397,8 @@
Origin::Resolve(GURL("data:text/html,baz"), origin);
EXPECT_TRUE(derived_opaque_via_data_url.opaque());
EXPECT_NE(origin, derived_opaque_via_data_url);
- EXPECT_FALSE(derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque()
- .IsInvalid());
+ EXPECT_TRUE(derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque()
+ .IsValid());
EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(),
derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque());
EXPECT_NE(derived_opaque, derived_opaque_via_data_url);
@@ -627,9 +634,9 @@
};
for (const auto& test_case : kTestCases) {
- SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url
- << ", " << test_case.lower_ascii_domain
- << ")");
+ SCOPED_TRACE(testing::Message()
+ << "(url, domain): (" << test_case.url << ", "
+ << test_case.lower_ascii_domain << ")");
GURL url(test_case.url);
ASSERT_TRUE(url.is_valid());
Origin origin = Origin::Create(url);
@@ -660,6 +667,7 @@
Origin origin = Origin::Create(GURL("cow://"));
EXPECT_TRUE(origin.opaque());
}
+
TEST_F(OriginTest, NonStandardSchemeWithAndroidWebViewHack) {
EnableNonStandardSchemesForAndroidWebView();
Origin origin = Origin::Create(GURL("cow://"));
@@ -667,10 +675,10 @@
EXPECT_EQ("cow", origin.scheme());
EXPECT_EQ("", origin.host());
EXPECT_EQ(0, origin.port());
- ResetForTests();
}
TEST_F(OriginTest, CanBeDerivedFrom) {
+ AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST);
Origin opaque_unique_origin = Origin();
Origin regular_origin = Origin::Create(GURL("https://a.com/"));
@@ -688,7 +696,6 @@
non_standard_scheme_origin.DeriveNewOpaqueOrigin();
// Also, add new standard scheme that is local to the test.
- AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST);
Origin new_standard_origin = Origin::Create(GURL("new-standard://host/"));
Origin new_standard_opaque_precursor_origin =
new_standard_origin.DeriveNewOpaqueOrigin();
@@ -863,4 +870,78 @@
"file:// [internally: file://example.com]");
}
+TEST_F(OriginTest, Deserialize) {
+ std::vector<GURL> valid_urls = {
+ GURL("https://a.com"), GURL("http://a"),
+ GURL("http://a:80"), GURL("file://a.com/etc/passwd"),
+ GURL("file:///etc/passwd"), GURL("http://192.168.1.1"),
+ GURL("http://[2001:db8::1]/"),
+ };
+ for (const GURL& url : valid_urls) {
+ SCOPED_TRACE(url.spec());
+ Origin origin = Origin::Create(url);
+ gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin);
+ ASSERT_TRUE(serialized);
+
+ gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ EXPECT_TRUE(DoEqualityComparisons(origin, deserialized.value(), true));
+ EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
+ }
+}
+
+TEST_F(OriginTest, DeserializeInvalid) {
+ EXPECT_EQ(gurl_base::nullopt, Deserialize(std::string()));
+ EXPECT_EQ(gurl_base::nullopt, Deserialize("deadbeef"));
+ EXPECT_EQ(gurl_base::nullopt, Deserialize("0123456789"));
+ EXPECT_EQ(gurl_base::nullopt, Deserialize("https://a.com"));
+ EXPECT_EQ(gurl_base::nullopt, Deserialize("https://192.168.1.1"));
+}
+
+TEST_F(OriginTest, SerializeTBDNonce) {
+ std::vector<GURL> invalid_urls = {
+ GURL("data:uniqueness"), GURL("data:,"),
+ GURL("data:text/html,Hello!"), GURL("javascript:alert(1)"),
+ GURL("about:blank"), GURL("google.com"),
+ };
+ for (const GURL& url : invalid_urls) {
+ SCOPED_TRACE(url.spec());
+ Origin origin = Origin::Create(url);
+ gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin);
+ gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ // Can't use DoEqualityComparisons here since empty nonces are never ==
+ // unless they are the same object.
+ EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
+ }
+
+ // Same basic test as above, but without a GURL to create tuple_.
+ Origin opaque;
+ gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque);
+ ASSERT_TRUE(serialized);
+
+ gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ // Can't use DoEqualityComparisons here since empty nonces are never == unless
+ // they are the same object.
+ EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+}
+
+TEST_F(OriginTest, DeserializeValidNonce) {
+ Origin opaque;
+ GetNonce(opaque);
+
+ gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque);
+ ASSERT_TRUE(serialized);
+
+ gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ EXPECT_TRUE(DoEqualityComparisons(opaque, deserialized.value(), true));
+ EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+}
+
} // namespace url
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
index 290e8a6..c90e0a7 100644
--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -134,15 +134,15 @@
ConstructPolicy policy)
: port_(0) {
if (!IsValidInput(scheme, host, port, policy)) {
- GURL_DCHECK(IsInvalid());
+ GURL_DCHECK(!IsValid());
return;
}
scheme_ = std::move(scheme);
host_ = std::move(host);
port_ = port;
- GURL_DCHECK(!IsInvalid()) << "Scheme: " << scheme_ << " Host: " << host_
- << " Port: " << port;
+ GURL_DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
+ << " Port: " << port;
}
SchemeHostPort::SchemeHostPort(gurl_base::StringPiece scheme,
@@ -172,19 +172,19 @@
if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
return;
- scheme.CopyToString(&scheme_);
- host.CopyToString(&host_);
+ scheme_ = std::string(scheme);
+ host_ = std::string(host);
port_ = port;
}
SchemeHostPort::~SchemeHostPort() = default;
-bool SchemeHostPort::IsInvalid() const {
+bool SchemeHostPort::IsValid() const {
// It suffices to just check |scheme_| for emptiness; the other fields are
// never present without it.
GURL_DCHECK(!scheme_.empty() || host_.empty());
GURL_DCHECK(!scheme_.empty() || port_ == 0);
- return scheme_.empty();
+ return !scheme_.empty();
}
std::string SchemeHostPort::Serialize() const {
@@ -198,7 +198,7 @@
url::Parsed parsed;
std::string serialized = SerializeInternal(&parsed);
- if (IsInvalid())
+ if (!IsValid())
return GURL(std::move(serialized), parsed, false);
// SchemeHostPort does not have enough information to determine if an empty
@@ -223,7 +223,7 @@
std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
std::string result;
- if (IsInvalid())
+ if (!IsValid())
return result;
// Reserve enough space for the "normal" case of scheme://host/.
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h
index a2dded1..903a398 100644
--- a/url/scheme_host_port.h
+++ b/url/scheme_host_port.h
@@ -122,7 +122,7 @@
const std::string& host() const { return host_; }
const std::string& scheme() const { return scheme_; }
uint16_t port() const { return port_; }
- bool IsInvalid() const;
+ bool IsValid() const;
// Serializes the SchemeHostPort tuple to a canonical form.
//
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc
index 5270c70..e3bcade 100644
--- a/url/scheme_host_port_unittest.cc
+++ b/url/scheme_host_port_unittest.cc
@@ -16,12 +16,11 @@
class SchemeHostPortTest : public testing::Test {
public:
SchemeHostPortTest() = default;
- ~SchemeHostPortTest() override {
- // Reset any added schemes.
- url::ResetForTests();
- }
+ ~SchemeHostPortTest() override = default;
private:
+ url::ScopedSchemeRegistryForTests scoped_registry_;
+
DISALLOW_COPY_AND_ASSIGN(SchemeHostPortTest);
};
@@ -52,7 +51,7 @@
EXPECT_EQ("", invalid.scheme());
EXPECT_EQ("", invalid.host());
EXPECT_EQ(0, invalid.port());
- EXPECT_TRUE(invalid.IsInvalid());
+ EXPECT_FALSE(invalid.IsValid());
EXPECT_EQ(invalid, invalid);
const char* urls[] = {
@@ -76,7 +75,7 @@
EXPECT_EQ("", tuple.scheme());
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
- EXPECT_TRUE(tuple.IsInvalid());
+ EXPECT_FALSE(tuple.IsValid());
EXPECT_EQ(tuple, tuple);
EXPECT_EQ(tuple, invalid);
EXPECT_EQ(invalid, tuple);
@@ -105,7 +104,7 @@
EXPECT_EQ(test.scheme, tuple.scheme());
EXPECT_EQ(test.host, tuple.host());
EXPECT_EQ(test.port, tuple.port());
- EXPECT_FALSE(tuple.IsInvalid());
+ EXPECT_TRUE(tuple.IsValid());
EXPECT_EQ(tuple, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
@@ -141,7 +140,7 @@
EXPECT_EQ("", tuple.scheme());
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
- EXPECT_TRUE(tuple.IsInvalid());
+ EXPECT_FALSE(tuple.IsValid());
EXPECT_EQ(tuple, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
@@ -170,7 +169,7 @@
EXPECT_EQ("", tuple.scheme());
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
- EXPECT_TRUE(tuple.IsInvalid());
+ EXPECT_FALSE(tuple.IsValid());
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -205,7 +204,7 @@
EXPECT_EQ(test.scheme, tuple.scheme());
EXPECT_EQ(test.host, tuple.host());
EXPECT_EQ(test.port, tuple.port());
- EXPECT_FALSE(tuple.IsInvalid());
+ EXPECT_TRUE(tuple.IsValid());
EXPECT_EQ(tuple, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc
index 47668f6..a5ec808 100644
--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc
@@ -7,6 +7,7 @@
#include <algorithm>
#include "polyfills/base/logging.h"
+#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_constants.h"
@@ -62,6 +63,39 @@
#endif // WIN32
+template <typename CHAR>
+bool IsValidScheme(const CHAR* url, const Component& scheme) {
+ // Caller should ensure that the |scheme| is not empty.
+ GURL_DCHECK_NE(0, scheme.len);
+
+ // From https://url.spec.whatwg.org/#scheme-start-state:
+ // scheme start state:
+ // 1. If c is an ASCII alpha, append c, lowercased, to buffer, and set
+ // state to scheme state.
+ // 2. Otherwise, if state override is not given, set state to no scheme
+ // state, and decrease pointer by one.
+ // 3. Otherwise, validation error, return failure.
+ // Note that both step 2 and step 3 mean that the scheme was not valid.
+ if (!gurl_base::IsAsciiAlpha(url[scheme.begin]))
+ return false;
+
+ // From https://url.spec.whatwg.org/#scheme-state:
+ // scheme state:
+ // 1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E
+ // (.), append c, lowercased, to buffer.
+ // 2. Otherwise, if c is U+003A (:), then [...]
+ //
+ // We begin at |scheme.begin + 1|, because the character at |scheme.begin| has
+ // already been checked by gurl_base::IsAsciiAlpha above.
+ int scheme_end = scheme.end();
+ for (int i = scheme.begin + 1; i < scheme_end; i++) {
+ if (!CanonicalSchemeChar(url[i]))
+ return false;
+ }
+
+ return true;
+}
+
// See IsRelativeURL in the header file for usage.
template<typename CHAR>
bool DoIsRelativeURL(const char* base,
@@ -126,17 +160,14 @@
}
// If the scheme isn't valid, then it's relative.
- int scheme_end = scheme.end();
- for (int i = scheme.begin; i < scheme_end; i++) {
- if (!CanonicalSchemeChar(url[i])) {
- if (!is_base_hierarchical) {
- // Don't allow relative URLs if the base scheme doesn't support it.
- return false;
- }
- *relative_component = MakeRange(begin, url_len);
- *is_relative = true;
- return true;
+ if (!IsValidScheme(url, scheme)) {
+ if (!is_base_hierarchical) {
+ // Don't allow relative URLs if the base scheme doesn't support it.
+ return false;
}
+ *relative_component = MakeRange(begin, url_len);
+ *is_relative = true;
+ return true;
}
// If the scheme is not the same, then we can't count it as relative.
@@ -287,9 +318,8 @@
// Canonical URLs always have a path, so we can use that offset. Reserve
// enough room for the base URL, the new path, and some extra bytes for
// possible escaped characters.
- output->ReserveSizeIfNeeded(
- base_parsed.path.begin +
- std::max(path.end(), std::max(query.end(), ref.end())));
+ output->ReserveSizeIfNeeded(base_parsed.path.begin +
+ std::max({path.end(), query.end(), ref.end()}));
output->Append(base_url, base_parsed.path.begin);
if (path.len > 0) {
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc
index 78f7773..6a94f50 100644
--- a/url/url_canon_stdurl.cc
+++ b/url/url_canon_stdurl.cc
@@ -128,10 +128,6 @@
else if (!strncmp(scheme, kWssScheme, scheme_len))
default_port = 443;
break;
- case 6:
- if (!strncmp(scheme, kGopherScheme, scheme_len))
- default_port = 70;
- break;
case 2:
if (!strncmp(scheme, kWsScheme, scheme_len))
default_port = 80;
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 9d1a458..c3f02fb 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -1393,7 +1393,7 @@
{"https://foo:80/", "https://foo:80/", true},
{"ftp://foo:21/", "ftp://foo/", true},
{"ftp://foo:80/", "ftp://foo:80/", true},
- {"gopher://foo:70/", "gopher://foo/", true},
+ {"gopher://foo:70/", "gopher://foo:70/", true},
{"gopher://foo:443/", "gopher://foo:443/", true},
{"ws://foo:80/", "ws://foo/", true},
{"ws://foo:81/", "ws://foo:81/", true},
@@ -2325,14 +2325,12 @@
{"ftp", 21},
{"ws", 80},
{"wss", 443},
- {"gopher", 70},
{"fake-scheme", PORT_UNSPECIFIED},
{"HTTP", PORT_UNSPECIFIED},
{"HTTPS", PORT_UNSPECIFIED},
{"FTP", PORT_UNSPECIFIED},
{"WS", PORT_UNSPECIFIED},
{"WSS", PORT_UNSPECIFIED},
- {"GOPHER", PORT_UNSPECIFIED},
};
for (auto& test_case : cases) {
diff --git a/url/url_constants.cc b/url/url_constants.cc
index 3540240..69399e4 100644
--- a/url/url_constants.cc
+++ b/url/url_constants.cc
@@ -20,11 +20,13 @@
const char kFileScheme[] = "file";
const char kFileSystemScheme[] = "filesystem";
const char kFtpScheme[] = "ftp";
-const char kGopherScheme[] = "gopher";
const char kHttpScheme[] = "http";
const char kHttpsScheme[] = "https";
const char kJavaScriptScheme[] = "javascript";
const char kMailToScheme[] = "mailto";
+// This is for QuicTransport (https://wicg.github.io/web-transport/).
+// See also: https://www.iana.org/assignments/uri-schemes/prov/quic-transport
+const char kQuicTransportScheme[] = "quic-transport";
const char kTelScheme[] = "tel";
const char kWsScheme[] = "ws";
const char kWssScheme[] = "wss";
diff --git a/url/url_constants.h b/url/url_constants.h
index c077b8d..3c04d68 100644
--- a/url/url_constants.h
+++ b/url/url_constants.h
@@ -26,11 +26,11 @@
COMPONENT_EXPORT(URL) extern const char kFileScheme[];
COMPONENT_EXPORT(URL) extern const char kFileSystemScheme[];
COMPONENT_EXPORT(URL) extern const char kFtpScheme[];
-COMPONENT_EXPORT(URL) extern const char kGopherScheme[];
COMPONENT_EXPORT(URL) extern const char kHttpScheme[];
COMPONENT_EXPORT(URL) extern const char kHttpsScheme[];
COMPONENT_EXPORT(URL) extern const char kJavaScriptScheme[];
COMPONENT_EXPORT(URL) extern const char kMailToScheme[];
+COMPONENT_EXPORT(URL) extern const char kQuicTransportScheme[];
COMPONENT_EXPORT(URL) extern const char kTelScheme[];
COMPONENT_EXPORT(URL) extern const char kWsScheme[];
COMPONENT_EXPORT(URL) extern const char kWssScheme[];
diff --git a/url/url_util.cc b/url/url_util.cc
index 47fc499..7c72bfc 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -6,8 +6,9 @@
#include <stddef.h>
#include <string.h>
+#include <atomic>
-#include "base/debug/leak_annotations.h"
+#include "base/compiler_specific.h"
#include "polyfills/base/logging.h"
#include "base/no_destructor.h"
#include "base/stl_util.h"
@@ -21,6 +22,12 @@
namespace {
+// A pair for representing a standard scheme name and the SchemeType for it.
+struct SchemeWithType {
+ std::string scheme;
+ SchemeType type;
+};
+
// List of currently registered schemes and associated properties.
struct SchemeRegistry {
// Standard format schemes (see header for details).
@@ -34,11 +41,11 @@
// canonicalization.
{kFileScheme, SCHEME_WITH_HOST},
{kFtpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
- {kGopherScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
{kWssScheme,
SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket secure.
{kWsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket.
{kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+ {kQuicTransportScheme, SCHEME_WITH_HOST_AND_PORT},
};
// Schemes that are allowed for referrers.
@@ -49,10 +56,7 @@
// Schemes that do not trigger mixed content warning.
std::vector<std::string> secure_schemes = {
- kHttpsScheme,
- kAboutScheme,
- kDataScheme,
- kWssScheme,
+ kHttpsScheme, kAboutScheme, kDataScheme, kQuicTransportScheme, kWssScheme,
};
// Schemes that normal pages cannot link to or access (i.e., with the same
@@ -93,11 +97,26 @@
bool allow_non_standard_schemes = false;
};
-SchemeRegistry* GetSchemeRegistry() {
+// See the LockSchemeRegistries declaration in the header.
+bool scheme_registries_locked = false;
+
+// Ensure that the schemes aren't modified after first use.
+static std::atomic<bool> g_scheme_registries_used{false};
+
+// Gets the scheme registry without locking the schemes. This should *only* be
+// used for adding schemes to the registry.
+SchemeRegistry* GetSchemeRegistryWithoutLocking() {
static gurl_base::NoDestructor<SchemeRegistry> registry;
return registry.get();
}
+const SchemeRegistry& GetSchemeRegistry() {
+#if GURL_DCHECK_IS_ON()
+ g_scheme_registries_used.store(true);
+#endif
+ return *GetSchemeRegistryWithoutLocking();
+}
+
// Pass this enum through for methods which would like to know if whitespace
// removal is necessary.
enum WhitespaceRemovalPolicy {
@@ -105,9 +124,6 @@
DO_NOT_REMOVE_WHITESPACE,
};
-// See the LockSchemeRegistries declaration in the header.
-bool scheme_registries_locked = false;
-
// This template converts a given character type to the corresponding
// StringPiece type.
template<typename CHAR> struct CharToStringPiece {
@@ -157,7 +173,7 @@
template<typename CHAR>
bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
return DoIsInSchemes(spec, scheme, type,
- GetSchemeRegistry()->standard_schemes);
+ GetSchemeRegistry().standard_schemes);
}
@@ -168,7 +184,7 @@
Component* found_scheme) {
// Before extracting scheme, canonicalize the URL to remove any whitespace.
// This matches the canonicalization done in DoCanonicalize function.
- RawCanonOutputT<CHAR> whitespace_buffer;
+ STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
int spec_len;
const CHAR* spec =
RemoveURLWhitespace(str, str_len, &whitespace_buffer, &spec_len, nullptr);
@@ -197,7 +213,7 @@
// Remove any whitespace from the middle of the relative URL if necessary.
// Possibly this will result in copying to the new buffer.
- RawCanonOutputT<CHAR> whitespace_buffer;
+ STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
if (whitespace_policy == REMOVE_WHITESPACE) {
spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len,
&output_parsed->potentially_dangling_markup);
@@ -276,7 +292,7 @@
Parsed* output_parsed) {
// Remove any whitespace from the middle of the relative URL, possibly
// copying to the new buffer.
- RawCanonOutputT<CHAR> whitespace_buffer;
+ STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
int relative_length;
const CHAR* relative = RemoveURLWhitespace(
in_relative, in_relative_length, &whitespace_buffer, &relative_length,
@@ -317,7 +333,7 @@
Parsed base_parsed_authority;
ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority);
if (base_parsed_authority.host.is_nonempty()) {
- RawCanonOutputT<char> temporary_output;
+ STACK_UNINITIALIZED RawCanonOutputT<char> temporary_output;
bool did_resolve_succeed =
ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,
relative_component, charset_converter,
@@ -369,7 +385,7 @@
if (replacements.IsSchemeOverridden()) {
// Canonicalize the new scheme so it is 8-bit and can be concatenated with
// the existing spec.
- RawCanonOutput<128> scheme_replaced;
+ STACK_UNINITIALIZED RawCanonOutput<128> scheme_replaced;
Component scheme_replaced_parsed;
CanonicalizeScheme(replacements.sources().scheme,
replacements.components().scheme,
@@ -386,7 +402,7 @@
// We now need to completely re-parse the resulting string since its meaning
// may have changed with the different scheme.
- RawCanonOutput<128> recanonicalized;
+ STACK_UNINITIALIZED RawCanonOutput<128> recanonicalized;
Parsed recanonicalized_parsed;
DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
REMOVE_WHITESPACE, charset_converter, &recanonicalized,
@@ -441,8 +457,16 @@
return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
}
-void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
- GURL_DCHECK(schemes);
+void DoSchemeModificationPreamble() {
+ // If this assert triggers, it means you've called Add*Scheme after
+ // the SchemeRegistry has been used.
+ //
+ // This normally means you're trying to set up a new scheme too late or using
+ // the SchemeRegistry too early in your application's init process. Make sure
+ // that you haven't added any static GURL initializers in tests.
+ GURL_DCHECK(!g_scheme_registries_used.load())
+ << "Trying to add a scheme after the lists have been used.";
+
// If this assert triggers, it means you've called Add*Scheme after
// LockSchemeRegistries has been called (see the header file for
// LockSchemeRegistries for more).
@@ -452,122 +476,145 @@
// and calls LockSchemeRegistries, and add your new scheme there.
GURL_DCHECK(!scheme_registries_locked)
<< "Trying to add a scheme after the lists have been locked.";
+}
- size_t scheme_len = strlen(new_scheme);
- if (scheme_len == 0)
- return;
-
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
+ DoSchemeModificationPreamble();
+ GURL_DCHECK(schemes);
+ GURL_DCHECK(strlen(new_scheme) > 0);
GURL_DCHECK_EQ(gurl_base::ToLowerASCII(new_scheme), new_scheme);
- schemes->push_back(std::string(new_scheme));
+ GURL_DCHECK(std::find(schemes->begin(), schemes->end(), new_scheme) ==
+ schemes->end());
+ schemes->push_back(new_scheme);
}
void DoAddSchemeWithType(const char* new_scheme,
SchemeType type,
std::vector<SchemeWithType>* schemes) {
+ DoSchemeModificationPreamble();
GURL_DCHECK(schemes);
- // If this assert triggers, it means you've called Add*Scheme after
- // LockSchemeRegistries has been called (see the header file for
- // LockSchemeRegistries for more).
- //
- // This normally means you're trying to set up a new scheme too late in your
- // application's init process. Locate where your app does this initialization
- // and calls LockSchemeRegistries, and add your new scheme there.
- GURL_DCHECK(!scheme_registries_locked)
- << "Trying to add a scheme after the lists have been locked.";
-
- size_t scheme_len = strlen(new_scheme);
- if (scheme_len == 0)
- return;
-
+ GURL_DCHECK(strlen(new_scheme) > 0);
GURL_DCHECK_EQ(gurl_base::ToLowerASCII(new_scheme), new_scheme);
- // Duplicate the scheme into a new buffer and add it to the list of standard
- // schemes. This pointer will be leaked on shutdown.
- char* dup_scheme = new char[scheme_len + 1];
- ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);
- memcpy(dup_scheme, new_scheme, scheme_len + 1);
-
- SchemeWithType scheme_with_type;
- scheme_with_type.scheme = dup_scheme;
- scheme_with_type.type = type;
- schemes->push_back(scheme_with_type);
+ GURL_DCHECK(std::find_if(schemes->begin(), schemes->end(),
+ [&new_scheme](const SchemeWithType& scheme) {
+ return scheme.scheme == new_scheme;
+ }) == schemes->end());
+ schemes->push_back({new_scheme, type});
}
} // namespace
-void ResetForTests() {
- *GetSchemeRegistry() = SchemeRegistry();
+void ClearSchemesForTests() {
+ GURL_DCHECK(!g_scheme_registries_used.load())
+ << "Schemes already used "
+ << "(use ScopedSchemeRegistryForTests to relax for tests).";
+ GURL_DCHECK(!scheme_registries_locked)
+ << "Schemes already locked "
+ << "(use ScopedSchemeRegistryForTests to relax for tests).";
+ *GetSchemeRegistryWithoutLocking() = SchemeRegistry();
}
+class ScopedSchemeRegistryInternal {
+ public:
+ ScopedSchemeRegistryInternal()
+ : registry_(std::make_unique<SchemeRegistry>(
+ *GetSchemeRegistryWithoutLocking())) {
+ g_scheme_registries_used.store(false);
+ scheme_registries_locked = false;
+ }
+ ~ScopedSchemeRegistryInternal() {
+ *GetSchemeRegistryWithoutLocking() = *registry_;
+ g_scheme_registries_used.store(true);
+ scheme_registries_locked = true;
+ }
+
+ private:
+ std::unique_ptr<SchemeRegistry> registry_;
+};
+
+ScopedSchemeRegistryForTests::ScopedSchemeRegistryForTests()
+ : internal_(std::make_unique<ScopedSchemeRegistryInternal>()) {}
+
+ScopedSchemeRegistryForTests::~ScopedSchemeRegistryForTests() = default;
+
void EnableNonStandardSchemesForAndroidWebView() {
- GetSchemeRegistry()->allow_non_standard_schemes = true;
+ DoSchemeModificationPreamble();
+ GetSchemeRegistryWithoutLocking()->allow_non_standard_schemes = true;
}
bool AllowNonStandardSchemesForAndroidWebView() {
- return GetSchemeRegistry()->allow_non_standard_schemes;
+ return GetSchemeRegistry().allow_non_standard_schemes;
}
void AddStandardScheme(const char* new_scheme, SchemeType type) {
- DoAddSchemeWithType(new_scheme, type, &GetSchemeRegistry()->standard_schemes);
+ DoAddSchemeWithType(new_scheme, type,
+ &GetSchemeRegistryWithoutLocking()->standard_schemes);
}
void AddReferrerScheme(const char* new_scheme, SchemeType type) {
- DoAddSchemeWithType(new_scheme, type, &GetSchemeRegistry()->referrer_schemes);
+ DoAddSchemeWithType(new_scheme, type,
+ &GetSchemeRegistryWithoutLocking()->referrer_schemes);
}
void AddSecureScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->secure_schemes);
+ DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->secure_schemes);
}
const std::vector<std::string>& GetSecureSchemes() {
- return GetSchemeRegistry()->secure_schemes;
+ return GetSchemeRegistry().secure_schemes;
}
void AddLocalScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->local_schemes);
+ DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->local_schemes);
}
const std::vector<std::string>& GetLocalSchemes() {
- return GetSchemeRegistry()->local_schemes;
+ return GetSchemeRegistry().local_schemes;
}
void AddNoAccessScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->no_access_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->no_access_schemes);
}
const std::vector<std::string>& GetNoAccessSchemes() {
- return GetSchemeRegistry()->no_access_schemes;
+ return GetSchemeRegistry().no_access_schemes;
}
void AddCorsEnabledScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->cors_enabled_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->cors_enabled_schemes);
}
const std::vector<std::string>& GetCorsEnabledSchemes() {
- return GetSchemeRegistry()->cors_enabled_schemes;
+ return GetSchemeRegistry().cors_enabled_schemes;
}
void AddWebStorageScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->web_storage_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->web_storage_schemes);
}
const std::vector<std::string>& GetWebStorageSchemes() {
- return GetSchemeRegistry()->web_storage_schemes;
+ return GetSchemeRegistry().web_storage_schemes;
}
void AddCSPBypassingScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->csp_bypassing_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->csp_bypassing_schemes);
}
const std::vector<std::string>& GetCSPBypassingSchemes() {
- return GetSchemeRegistry()->csp_bypassing_schemes;
+ return GetSchemeRegistry().csp_bypassing_schemes;
}
void AddEmptyDocumentScheme(const char* new_scheme) {
- DoAddScheme(new_scheme, &GetSchemeRegistry()->empty_document_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->empty_document_schemes);
}
const std::vector<std::string>& GetEmptyDocumentSchemes() {
- return GetSchemeRegistry()->empty_document_schemes;
+ return GetSchemeRegistry().empty_document_schemes;
}
void LockSchemeRegistries() {
@@ -599,7 +646,7 @@
bool IsReferrerScheme(const char* spec, const Component& scheme) {
SchemeType unused_scheme_type;
return DoIsInSchemes(spec, scheme, &unused_scheme_type,
- GetSchemeRegistry()->referrer_schemes);
+ GetSchemeRegistry().referrer_schemes);
}
bool FindAndCompareScheme(const char* str,
@@ -653,7 +700,7 @@
}
bool HostIsIPAddress(gurl_base::StringPiece host) {
- url::RawCanonOutputT<char, 128> ignored_output;
+ STACK_UNINITIALIZED url::RawCanonOutputT<char, 128> ignored_output;
url::CanonHostInfo host_info;
url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
&ignored_output, &host_info);
@@ -732,7 +779,7 @@
int length,
DecodeURLMode mode,
CanonOutputW* output) {
- RawCanonOutputT<char> unescaped_chars;
+ STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
for (int i = 0; i < length; i++) {
if (input[i] == '%') {
unsigned char ch;
diff --git a/url/url_util.h b/url/url_util.h
index 473ae5f..1816637 100644
--- a/url/url_util.h
+++ b/url/url_util.h
@@ -5,6 +5,7 @@
#ifndef URL_URL_UTIL_H_
#define URL_URL_UTIL_H_
+#include <memory>
#include <string>
#include <vector>
@@ -19,8 +20,22 @@
// Init ------------------------------------------------------------------------
-// Resets all custom schemes to the default values. Not thread-safe.
-COMPONENT_EXPORT(URL) void ResetForTests();
+// Used for tests that need to reset schemes. Note that this can only be used
+// in conjunction with ScopedSchemeRegistryForTests.
+COMPONENT_EXPORT(URL) void ClearSchemesForTests();
+
+class ScopedSchemeRegistryInternal;
+
+// Stores the SchemeRegistry upon creation, allowing tests to modify a copy of
+// it, and restores the original SchemeRegistry when deleted.
+class COMPONENT_EXPORT(URL) ScopedSchemeRegistryForTests {
+ public:
+ ScopedSchemeRegistryForTests();
+ ~ScopedSchemeRegistryForTests();
+
+ private:
+ std::unique_ptr<ScopedSchemeRegistryInternal> internal_;
+};
// Schemes ---------------------------------------------------------------------
@@ -37,15 +52,9 @@
// Whether or not SchemeHostPort and Origin allow non-standard schemes.
COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView();
-// A pair for representing a standard scheme name and the SchemeType for it.
-struct COMPONENT_EXPORT(URL) SchemeWithType {
- const char* scheme;
- SchemeType type;
-};
-
// The following Add*Scheme method are not threadsafe and can not be called
// concurrently with any other url_util function. They will assert if the lists
-// of schemes have been locked (see LockSchemeRegistries).
+// of schemes have been locked (see LockSchemeRegistries), or used.
// Adds an application-defined scheme to the internal list of "standard-format"
// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index 741c1dc..a63294f 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -17,12 +17,11 @@
class URLUtilTest : public testing::Test {
public:
URLUtilTest() = default;
- ~URLUtilTest() override {
- // Reset any added schemes.
- ResetForTests();
- }
+ ~URLUtilTest() override = default;
private:
+ ScopedSchemeRegistryForTests scoped_registry_;
+
DISALLOW_COPY_AND_ASSIGN(URLUtilTest);
};
@@ -92,21 +91,24 @@
}
TEST_F(URLUtilTest, AddReferrerScheme) {
- const char kFooScheme[] = "foo";
+ static const char kFooScheme[] = "foo";
EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+ url::ScopedSchemeRegistryForTests scoped_registry;
AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
}
TEST_F(URLUtilTest, ShutdownCleansUpSchemes) {
- const char kFooScheme[] = "foo";
+ static const char kFooScheme[] = "foo";
EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
- AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
- EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+ {
+ url::ScopedSchemeRegistryForTests scoped_registry;
+ AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
+ EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+ }
- ResetForTests();
EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
}