Update googleurl to the latest version This updates googleurl to the bfe9d19846d916ccc17fb8c5d9e1d256fc77a243 revision of upstream from Mon Mar 16 23:52:37 2020 +0000.
diff --git a/AUTHORS b/AUTHORS index 13d16fb..a8d90ac 100644 --- a/AUTHORS +++ b/AUTHORS
@@ -7,7 +7,10 @@ # Organization <fnmatch pattern> # # See python fnmatch module documentation for more information. +# +# Please keep the list sorted. +# BEGIN individuals section. Aaron Boushley <boushley@gmail.com> Aaron Jacobs <samusaaron3@gmail.com> Aaron Leventhal <aaronlevbugs@gmail.com> @@ -18,9 +21,11 @@ Abhishek Agarwal <abhishek.a21@samsung.com> Abhishek Kanike <abhishek.ka@samsung.com> Abhishek Singh <abhi.rathore@samsung.com> +Abul Hasan Md Osama <ahm.osama@samsung.com> Adam Bonner <abonner-chromium@solscope.com> Adam Bujalski <abujalski@gmail.com> Adam Kallai <kadam@inf.u-szeged.hu> +Adam Labuda <a.labuda@samsung.com> Adam Roben <adam@github.com> Adam Treat <adam.treat@samsung.com> Adam Yi <i@adamyi.com> @@ -41,6 +46,7 @@ Alex Scheele <alexscheele@gmail.com> Alexander Douglas <agdoug@amazon.com> Alexander Guettler <alexander@guettler.io> +Alexander Rezepkin <etu@vivaldi.net> Alexander Shalamov <alexander.shalamov@intel.com> Alexander Sulfrian <alexander@sulfrian.net> Alexandre Abreu <wiss1976@gmail.com> @@ -65,6 +71,7 @@ Amruth Raj <amruthraj@motorola.com> Amruth Raj <ckqr36@motorola.com> Anand Ratn <anand.ratn@samsung.com> +Anant Jain <anant90@gmail.com> Anastasios Cassiotis <tom.cassiotis@gmail.com> anatoly techtonik <techtonik@gmail.com> Ancil George <ancilgeorge@samsung.com> @@ -74,6 +81,7 @@ Andrei Parvu <parvu@adobe.com> Andrew Boyarshin <andrew.boyarshin@gmail.com> Andrew Brampton <me@bramp.net> +Andrew Brindamour <abrindamour@bluejeans.com> Andrew Hung <andrhung@amazon.com> Andrew Jorgensen <ajorgens@amazon.com> Andrew MacPherson <andrew.macpherson@soundtrap.com> @@ -133,6 +141,7 @@ Branden Archer <bma4@zips.uakron.edu> Brendan Kirby <brendan.kirby@imgtec.com> Brendan Long <self@brendanlong.com> +Brian Clifton <clifton@brave.com> Brian G. Merrell <bgmerrell@gmail.com> Brian Konzman, SJ <b.g.konzman@gmail.com> Brian Luft <brian@electroly.com> @@ -184,6 +193,7 @@ Clement Scheelfeldt Skau <clementskau@gmail.com> Clinton Staley <clintstaley@gmail.com> Connor Pearson <cjp822@gmail.com> +Conrad Irwin <conrad.irwin@gmail.com> Craig Schlenter <craig.schlenter@gmail.com> Csaba Osztrogonác <ossy.szeged@gmail.com> Daegyu Lee <na7jun8gi@gmail.com> @@ -200,6 +210,7 @@ Daniel Lockyer <thisisdaniellockyer@gmail.com> Daniel Nishi <dhnishi@gmail.com> Daniel Platz <daplatz@googlemail.com> +Daniel Playfair Cal <daniel.playfair.cal@gmail.com> Daniel Shaulov <dshaulov@ptc.com> Daniel Trebbien <dtrebbien@gmail.com> Daniel Waxweiler <daniel.waxweiler@gmail.com> @@ -213,6 +224,7 @@ David Fox <david@davidjfox.com> David Futcher <david.mike.futcher@gmail.com> David Leen <davileen@amazon.com> +David Manouchehri <david@davidmanouchehri.com> David McAllister <mcdavid@amazon.com> David Michael Barr <david.barr@samsung.com> David Spellman <dspell@amazon.com> @@ -222,6 +234,7 @@ Debug Wang <debugwang@tencent.com> Deepak Dilip Borade <deepak.db@samsung.com> Deepak Mittal <deepak.m1@samsung.com> +Deepak Mohan <hop2deep@gmail.com> Deepak Sharma <deepak.sharma@amd.com> Deepak Singla <deepak.s@samsung.com> Deokjin Kim <deokjin81.kim@samsung.com> @@ -273,13 +286,16 @@ Evangelos Foutras <evangelos@foutrelis.com> Evgeniy Dushistov <dushistov@gmail.com> Evgeny Agafonchikov <evgeny.agafonchikov@akvelon.com> +Fabian Henneke <fabian.henneke@gmail.com> Fabien Tassin <fta@sofaraway.org> +Felipe Erias Morandeira <felipeerias@gmail.com> Felix H. Dahlke <fhd@ubercode.de> Fengrong Fang <fr.fang@samsung.com> Fernando Jiménez Moreno <ferjmoreno@gmail.com> Finbar Crago <finbar.crago@gmail.com> François Beaufort <beaufort.francois@gmail.com> Francois Kritzinger <francoisk777@gmail.com> +Francois Marier <francois@brave.com> Francois Rauch <leopardb@gmail.com> Frankie Dintino <fdintino@theatlantic.com> Franklin Ta <fta2012@gmail.com> @@ -347,6 +363,7 @@ Huayong Xu <huayong.xu@samsung.com> Hugo Holgersson <hugo.holgersson@sonymobile.com> Hui Wang <wanghui07050707@gmail.com> +Hui Wang <wanghui210@huawei.com> Huiwon Jo <jhwon0415@gmail.com> Huy Duong <huy.duongdinh@gmail.com> Hwanseung Lee <hs1217.lee@gmail.com> @@ -365,6 +382,8 @@ Ilia Demianenko <ilia.demianenko@gmail.com> Ilia K <ki.stfu@gmail.com> Ilya Konstantinov <ilya.konstantinov@gmail.com> +Imam Mohammad Bokhary <imam.bokhary@samsung.com> +Imranur Rahman <i.rahman@samsung.com> Imranur Rahman <ir.shimul@gmail.com> Ion Rosca <rosca@adobe.com> Irmak Kavasoglu <irmakkavasoglu@gmail.com> @@ -415,12 +434,16 @@ Jesse Miller <jesse@jmiller.biz> Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com> Jiadong Zhu <jiadong.zhu@linaro.org> +Jiahe Zhang <jiahe.zhang@intel.com> Jiajia Qin <jiajia.qin@intel.com> Jiajie Hu <jiajie.hu@intel.com> Jianjun Zhu <jianjun.zhu@intel.com> Jianneng Zhong <muzuiget@gmail.com> Jiawei Shao <jiawei.shao@intel.com> +Jiaxun Wei <leuisken@gmail.com> +Jiaxun Yang <jiaxun.yang@flygoat.com> Jie Chen <jie.a.chen@intel.com> +Jihan Chao <jihan@bluejeans.com> Jihoon Chung <j.c@navercorp.com> Jihoon Chung <jihoon@gmail.com> Jihun Brent Kim <devgrapher@gmail.com> @@ -451,9 +474,11 @@ Jonathan Frazer <listedegarde@gmail.com> Jonathan Garbee <jonathan@garbee.me> Jonathan Hacker <jhacker@arcanefour.com> +Jonathan Kingston <kingstonmailbox@gmail.com> Jongdeok Kim <jongdeok.kim@navercorp.com> Jongheon Kim <sapzape@gmail.com> JongKwon Lee <jongkwon.lee@navercorp.com> +Jongmok Kim <jongmok.kim@navercorp.com> Jongsoo Lee <leejongsoo@gmail.com> Joone Hur <joone.hur@intel.com> Joonghun Park <pjh0718@gmail.com> @@ -466,6 +491,7 @@ Joshua Roesslein <jroesslein@gmail.com> Josué Ratelle <jorat1346@gmail.com> Josyula Venkat Narasimham <venkat.nj@samsung.com> +Joyer Huang <collger@gmail.com> Juan Jose Lopez Jaimez <jj.lopezjaimez@gmail.com> Juhui Lee <juhui24.lee@samsung.com> Julien Brianceau <jbriance@cisco.com> @@ -479,6 +505,7 @@ JungJik Lee <jungjik.lee@samsung.com> Jungkee Song <jungkee.song@samsung.com> Junmin Zhu <junmin.zhu@intel.com> +Junsong Li <ljs.darkfish@gmail.com> Justin Okamoto <justmoto@amazon.com> Justin Ribeiro <justin@justinribeiro.com> Jüri Valdmann <juri.valdmann@qt.io> @@ -503,12 +530,14 @@ Kenneth Rohde Christiansen <kenneth.r.christiansen@intel.com> Kenneth Strickland <ken.strickland@gmail.com> Kenneth Zhou <knthzh@gmail.com> +Kenny Levinsen <kl@kl.wtf> Keonho Kim <keonho07.kim@samsung.com> Ketan Goyal <ketan.goyal@samsung.com> Kevin Gibbons <bakkot@gmail.com> Kevin Lee Helpingstine <sig11@reprehensible.net> Kevin M. McCormick <mckev@amazon.com> Khasim Syed Mohammed <khasim.mohammed@linaro.org> +Khem Raj <raj.khem@gmail.com> Kihong Kwon <kihong.kwon@samsung.com> Kihoon Ko <rhrlgns777@gmail.com> Kihwang Kim <pwangkk@gmail.com> @@ -614,6 +643,7 @@ Md Jobed Hossain <jrony15@gmail.com> Md Sami Uddin <md.sami@samsung.com> Michael Cirone <mikecirone@gmail.com> +Michael Forney <mforney@mforney.org> Michael Gilbert <floppymaster@gmail.com> Michael Lopez <lopes92290@gmail.com> Michael Morrison <codebythepound@gmail.com> @@ -630,6 +660,7 @@ Mikhail Pozdnyakov <mikhail.pozdnyakov@intel.com> Milko Leporis <milko.leporis@imgtec.com> Milton Chiang <milton.chiang@mediatek.com> +Milutin Smiljanic <msmiljanic.gm@gmail.com> Minggang Wang <minggang.wang@intel.com> Mingmin Xie <melvinxie@gmail.com> Minjeong Lee <apenr1234@gmail.com> @@ -679,6 +710,7 @@ Oliver Dunk <oliver@oliverdunk.com> Olli Raula (Old name Olli Syrjälä) <olli.raula@intel.com> Omar Sandoval <osandov@osandov.com> +Owen Yuwono <owenyuwono@gmail.com> Pan Deng <pan.deng@intel.com> Parag Radke <nrqv63@motorola.com> Paritosh Kumar <paritosh.in@samsung.com> @@ -701,6 +733,7 @@ Pavel Ivanov <paivanof@gmail.com> Pawel Forysiuk <p.forysiuk@samsung.com> Paweł Hajdan jr <phajdan.jr@gmail.com> +Piotr Zarycki <piotr.zarycki@gmail.com> Payal Pandey <payal.pandey@samsung.com> Peng Hu <penghu@tencent.com> Peng Jiang <leiyi.jp@gmail.com> @@ -735,12 +768,14 @@ Qi Yang <qi1988.yang@samsung.com> Qiankun Miao <qiankun.miao@intel.com> Qing Zhang <qing.zhang@intel.com> +Qingmei Li <qingmei.li@vivo.com> Radu Stavila <stavila@adobe.com> Radu Velea <radu.velea@intel.com> Rafael Antognolli <rafael.antognolli@intel.com> Raghavendra Ghatage <r.ghatage@samsung.com> Raghu Ram Nagaraj <r.nagaraj@samsung.com> Rahul Gupta <rahul.g@samsung.com> +Rajesh Mahindra <rmahindra@uber.com> Rajneesh Rana <rajneesh.r@samsung.com> Raman Tenneti <raman.tenneti@gmail.com> Ramkumar Gokarnesan <ramkumar.gokarnesan@gmail.com> @@ -824,6 +859,7 @@ Sergey Kipet <sergey.kipet@gmail.com> Sergey Putilin <p.sergey@samsung.com> Sergey Shekyan <shekyan@gmail.com> +Sergey Talantov <sergey.talantov@gmail.com> Sergio Carlos Morales Angeles <carloschilazo@gmail.com> Sergiy Belozorov <rryk.ua@gmail.com> Seshadri Mahalingam <seshadri.mahalingam@gmail.com> @@ -848,6 +884,7 @@ Shirish S <shirish.s@amd.com> Shiva Kumar <shiva.k1@samsung.com> Shivakumar JM <shiva.jm@samsung.com> +Shobhit Goel <shobhit.goel@samsung.com> Shouqun Liu <liushouqun@xiaomi.com> Shouqun Liu <shouqun.liu@intel.com> Shreeram Kushwaha <shreeram.k@samsung.com> @@ -857,6 +894,7 @@ Siba Samal <siba.samal@samsung.com> Siddharth Bagai <b.siddharth@samsung.com> Siddharth Shankar <funkysidd@gmail.com> +Simeon Kuran <simeon.kuran@gmail.com> Simon Arlott <simon.arlott@gmail.com> Simon La Macchia <smacchia@amazon.com> Siva Kumar Gunturi <siva.gunturi@samsung.com> @@ -870,6 +908,7 @@ Sreerenj Balachandran <sreerenj.balachandran@intel.com> Srirama Chandra Sekhar Mogali <srirama.m@samsung.com> Staphany Park <stapark008@gmail.com> +Stephan Hartmann <stha09@googlemail.com> Stephen Searles <stephen.searles@gmail.com> Steve Sanders <steve@zanderz.com> Steven Pennington <spenn@engr.uvic.ca> @@ -898,6 +937,7 @@ Szabolcs David <davidsz@inf.u-szeged.hu> Szymon Piechowicz <szymonpiechowicz@o2.pl> Taeheon Kim <skyrabbits1@gmail.com> +Taeho Nam <thn7440@gmail.com> Taehoon Lee <taylor.hoon@gmail.com> Takashi Fujita <tgfjt.mail@gmail.com> Takeshi Kurosawa <taken.spc@gmail.com> @@ -915,6 +955,7 @@ Thomas White <im.toms.inbox@gmail.com> Tiago Vignatti <tiago.vignatti@intel.com> Tibor Dusnoki <tibor.dusnoki.91@gmail.com> +Tibor Dusnoki <tdusnoki@inf.u-szeged.hu> Tim Ansell <mithro@mithis.com> Tim Niederhausen <tim@rnc-ag.de> Timo Gurr <timo.gurr@gmail.com> @@ -925,9 +966,11 @@ Tom Harwood <tfh@skip.org> Tomas Popela <tomas.popela@gmail.com> Torsten Kurbad <google@tk-webart.de> +Toshihito Kikuchi <leamovret@gmail.com> Trent Willis <trentmwillis@gmail.com> Trevor Perrin <unsafe@trevp.net> Tripta Gupta <tripta.g@samsung.com> +Tuukka Toivonen <tuukka.toivonen@intel.com> U. Artie Eoff <ullysses.a.eoff@intel.com> Umar Hansa <umar.hansa@gmail.com> Upendra Gowda <upendrag.gowda@gmail.com> @@ -937,14 +980,16 @@ Valentin Ilie <valentin.ilie@intel.com> Vamshikrishna Yellenki <vamshi@motorola.com> Vani Hegde <vani.hegde@samsung.com> -Varun Chowdhary Paturi <v.paturi@samsung.com> Vartul Katiyar <vartul.k@samsung.com> +Varun Chowdhary Paturi <v.paturi@samsung.com> +Varun Varada <varuncvarada@gmail.com> Vedran Šajatović <vedran.sajatovic@gmail.com> Vernon Tang <vt@foilhead.net> Viatcheslav Ostapenko <sl.ostapenko@samsung.com> Victor Costan <costan@gmail.com> Viet-Trung Luu <viettrungluu@gmail.com> Vinay Anantharaman <vinaya@adobe.com> +Vinoth Chandar <vinoth@uber.com> Vipul Bhasin <vipul.bhasin@gmail.com> Visa Putkinen <v.putkinen@partner.samsung.com> Vishal Bhatnagar <vishal.b@samsung.com> @@ -954,8 +999,10 @@ Waihung Fu <fufranci@amazon.com> Wanming Lin <wanming.lin@intel.com> Wei Li <wei.c.li@intel.com> +Wenxiang Qian <leonwxqian@gmail.com> WenSheng He <wensheng.he@samsung.com> Wesley Lancel <wesleylancel@gmail.com> +Wei Wang <wei4.wang@intel.com> Wesley Wigham <wwigham@gmail.com> Will Hirsch <chromium@willhirsch.co.uk> Will Shackleton <w.shackleton@gmail.com> @@ -979,7 +1026,7 @@ Yael Aharon <yael.aharon@intel.com> Yan Wang <yan0422.wang@samsung.com> Yang Gu <yang.gu@intel.com> -Yannic Bonenberger <contact@yannic-bonenberger.com> +Yannic Bonenberger <yannic.bonenberger@gmail.com> Yarin Kaul <yarin.kaul@gmail.com> Yash Vempati <vempatiy@amazon.com> Ye Liu <cbakgly@gmail.com> @@ -1005,7 +1052,9 @@ Youngmin Yoo <youngmin.yoo@samsung.com> Youngsoo Choi <kenshin.choi@samsung.com> Youngsun Suh <zard17@gmail.com> +Yuan-Pin Yu <yjames@uber.com> Yuhong Sha <yuhong.sha@samsung.com> +Yuki Tsuchiya <Yuki.Tsuchiya@sony.com> Yumikiyo Osanai <yumios.art@gmail.com> Yunchao He <yunchao.he@intel.com> Yupei Lin <yplam@yplam.com> @@ -1013,12 +1062,14 @@ Yura Yaroshevich <yura.yaroshevich@gmail.com> Yuri Gorobets <yuri.gorobets@gmail.com> Yuriy Taraday <yorik.sar@gmail.com> +Yuta Kasai <kasai.yuta0810@gmail.com> Yuvanesh Natarajan <yuvanesh.n1@samsung.com> Zeno Albisser <zeno.albisser@digia.com> Zeqin Chen <talonchen@tencent.com> Zhaoze Zhou <zhaoze.zhou@partner.samsung.com> Zheda Chen <zheda.chen@intel.com> Zheng Chuang <zhengchuangscu@gmail.com> +Zheng Xu <zxu@kobo.com> Zhengkun Li <zhengkli@amazon.com> Zhenyu Liang <zhenyu.liang@intel.com> Zhenyu Shan <zhenyu.shan@intel.com> @@ -1029,12 +1080,9 @@ Zoltan Kuscsik <zoltan.kuscsik@linaro.org> Zsolt Borbely <zsborbely.u-szeged@partner.samsung.com> 方觉 (Fang Jue) <fangjue23303@gmail.com> -Rajesh Mahindra <rmahindra@uber.com> -Yuan-Pin Yu <yjames@uber.com> -Vinoth Chandar <vinoth@uber.com> -Zheng Xu <zxu@kobo.com> -Junsong Li <ljs.darkfish@gmail.com> +# END individuals section. +# BEGIN organizations section. ACCESS CO., LTD. <*@access-company.com> Akamai Inc. <*@akamai.com> ARM Holdings <*@arm.com> @@ -1042,10 +1090,12 @@ Bocoup <*@bocoup.com> Canonical Limited <*@canonical.com> Cloudflare, Inc. <*@cloudflare.com> +CloudMosa, Inc. <*@cloudmosa.com> Code Aurora Forum <*@codeaurora.org> Collabora Limited <*@collabora.com> Comodo CA Limited Cosium <*@cosium.com> +Dell Technologies Inc. <*@dell.corp-partner.google.com> Duck Duck Go, Inc. <*@duckduckgo.com> Endless Mobile, Inc. <*@endlessm.com> Estimote, Inc. <*@estimote.com> @@ -1062,6 +1112,8 @@ LG Electronics, Inc. <*@lge.com> Loongson Technology Corporation Limited. <*@loongson.cn> Macadamian <*@macadamian.com> +Mail.ru Group <*@corp.mail.ru> +Make Positive Provar Limited <*@provartesting.com> Mediatek <*@mediatek.com> Microsoft <*@microsoft.com> MIPS Technologies, Inc. <*@mips.com> @@ -1087,4 +1139,4 @@ Vewd Software AS <*@vewd.com> Vivaldi Technologies AS <*@vivaldi.com> Yandex LLC <*@yandex-team.ru> -Make Positive Provar Limited <*@provartesting.com> +# END organizations section.
diff --git a/base/compiler_specific.h b/base/compiler_specific.h index 7e2c510..2962537 100644 --- a/base/compiler_specific.h +++ b/base/compiler_specific.h
@@ -7,58 +7,10 @@ #include "build/build_config.h" -#if defined(COMPILER_MSVC) - -#if !defined(__clang__) +#if defined(COMPILER_MSVC) && !defined(__clang__) #error "Only clang-cl is supported on Windows, see https://crbug.com/988071" #endif -// Macros for suppressing and disabling warnings on MSVC. -// -// Warning numbers are enumerated at: -// http://msdn.microsoft.com/en-us/library/8x5x43k7(VS.80).aspx -// -// The warning pragma: -// http://msdn.microsoft.com/en-us/library/2c8f766e(VS.80).aspx -// -// Using __pragma instead of #pragma inside macros: -// http://msdn.microsoft.com/en-us/library/d9x1s805.aspx - -// MSVC_PUSH_DISABLE_WARNING pushes |n| onto a stack of warnings to be disabled. -// The warning remains disabled until popped by MSVC_POP_WARNING. -#define MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \ - __pragma(warning(disable:n)) - -// Pop effects of innermost MSVC_PUSH_* macro. -#define MSVC_POP_WARNING() __pragma(warning(pop)) - -#else // Not MSVC - -#define MSVC_PUSH_DISABLE_WARNING(n) -#define MSVC_POP_WARNING() -#define MSVC_DISABLE_OPTIMIZE() -#define MSVC_ENABLE_OPTIMIZE() - -#endif // COMPILER_MSVC - -// These macros can be helpful when investigating compiler bugs or when -// investigating issues in local optimized builds, by temporarily disabling -// optimizations for a single function or file. These macros should never be -// used to permanently work around compiler bugs or other mysteries, and should -// not be used in landed changes. -#if !defined(OFFICIAL_BUILD) -#if defined(__clang__) -#define DISABLE_OPTIMIZE() __pragma(clang optimize off) -#define ENABLE_OPTIMIZE() __pragma(clang optimize on) -#elif defined(COMPILER_MSVC) -#define DISABLE_OPTIMIZE() __pragma(optimize("", off)) -#define ENABLE_OPTIMIZE() __pragma(optimize("", on)) -#else -// These macros are not currently available for other compiler options. -#endif -// These macros are not available in official builds. -#endif // !defined(OFFICIAL_BUILD) - // Annotate a variable indicating it's ok if the variable is not used. // (Typically used to silence a compiler warning when the assignment // is important for some other reason.) @@ -141,7 +93,7 @@ // For member functions, the implicit this parameter counts as index 1. #if defined(COMPILER_GCC) || defined(__clang__) #define PRINTF_FORMAT(format_param, dots_param) \ - __attribute__((format(printf, format_param, dots_param))) + __attribute__((format(printf, format_param, dots_param))) #else #define PRINTF_FORMAT(format_param, dots_param) #endif @@ -170,14 +122,14 @@ // Mark a memory region fully initialized. // Use this to annotate code that deliberately reads uninitialized data, for // example a GC scavenging root set pointers from the stack. -#define MSAN_UNPOISON(p, size) __msan_unpoison(p, size) +#define MSAN_UNPOISON(p, size) __msan_unpoison(p, size) // Check a memory region for initializedness, as if it was being used here. // If any bits are uninitialized, crash with an MSan report. // Use this to sanitize data which MSan won't be able to track, e.g. before // passing data to another process via shared memory. #define MSAN_CHECK_MEM_IS_INITIALIZED(p, size) \ - __msan_check_mem_is_initialized(p, size) + __msan_check_mem_is_initialized(p, size) #else // MEMORY_SANITIZER #define MSAN_UNPOISON(p, size) #define MSAN_CHECK_MEM_IS_INITIALIZED(p, size) @@ -260,4 +212,39 @@ #endif #endif +#if defined(__clang__) && __has_attribute(uninitialized) +// Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for +// the specified variable. +// Library-wide alternative is +// 'configs -= [ "//build/config/compiler:default_init_stack_vars" ]' in .gn +// file. +// +// See "init_stack_vars" in build/config/compiler/BUILD.gn and +// http://crbug.com/977230 +// "init_stack_vars" is enabled for non-official builds and we hope to enable it +// in official build in 2020 as well. The flag writes fixed pattern into +// uninitialized parts of all local variables. In rare cases such initialization +// is undesirable and attribute can be used: +// 1. Degraded performance +// In most cases compiler is able to remove additional stores. E.g. if memory is +// never accessed or properly initialized later. Preserved stores mostly will +// not affect program performance. However if compiler failed on some +// performance critical code we can get a visible regression in a benchmark. +// 2. memset, memcpy calls +// Compiler may replaces some memory writes with memset or memcpy calls. This is +// not -ftrivial-auto-var-init specific, but it can happen more likely with the +// flag. It can be a problem if code is not linked with C run-time library. +// +// Note: The flag is security risk mitigation feature. So in future the +// attribute uses should be avoided when possible. However to enable this +// mitigation on the most of the code we need to be less strict now and minimize +// number of exceptions later. So if in doubt feel free to use attribute, but +// please document the problem for someone who is going to cleanup it later. +// E.g. platform, bot, benchmark or test name in patch description or next to +// the attribute. +#define STACK_UNINITIALIZED __attribute__((uninitialized)) +#else +#define STACK_UNINITIALIZED +#endif + #endif // BASE_COMPILER_SPECIFIC_H_
diff --git a/base/macros.h b/base/macros.h index cda8e3a..c67bdbd 100644 --- a/base/macros.h +++ b/base/macros.h
@@ -10,6 +10,10 @@ #ifndef BASE_MACROS_H_ #define BASE_MACROS_H_ +// ALL DISALLOW_xxx MACROS ARE DEPRECATED; DO NOT USE IN NEW CODE. +// Use explicit deletions instead. See the section on copyability/movability in +// //styleguide/c++/c++-dos-and-donts.md for more information. + // Put this in the declarations for a class to be uncopyable. #define DISALLOW_COPY(TypeName) \ TypeName(const TypeName&) = delete
diff --git a/base/optional.h b/base/optional.h index 345147c..a043122 100644 --- a/base/optional.h +++ b/base/optional.h
@@ -30,11 +30,13 @@ namespace internal { +struct DummyUnionMember {}; + template <typename T, bool = std::is_trivially_destructible<T>::value> struct OptionalStorageBase { - // Initializing |empty_| here instead of using default member initializing - // to avoid errors in g++ 4.8. - constexpr OptionalStorageBase() : empty_('\0') {} + // Provide non-defaulted default ctor to make sure it's not deleted by + // non-trivial T::T() in the union. + constexpr OptionalStorageBase() : dummy_() {} template <class... Args> constexpr explicit OptionalStorageBase(in_place_t, Args&&... args) @@ -65,19 +67,28 @@ bool is_populated_ = false; union { - // |empty_| exists so that the union will always be initialized, even when + // |dummy_| exists so that the union will always be initialized, even when // it doesn't contain a value. Union members must be initialized for the - // constructor to be 'constexpr'. - char empty_; + // constructor to be 'constexpr'. Having a special trivial class for it is + // better than e.g. using char, because the latter will have to be + // zero-initialized, and the compiler can't optimize this write away, since + // it assumes this might be a programmer's invariant. This can also cause + // problems for conservative GC in Oilpan. Compiler is free to split shared + // and non-shared parts of the union in separate memory locations (or + // registers). If conservative GC is triggered at this moment, the stack + // scanning routine won't find the correct object pointed from + // Optional<HeapObject*>. This dummy valueless struct lets the compiler know + // that we don't care about the value of this union member. + DummyUnionMember dummy_; T value_; }; }; template <typename T> struct OptionalStorageBase<T, true /* trivially destructible */> { - // Initializing |empty_| here instead of using default member initializing - // to avoid errors in g++ 4.8. - constexpr OptionalStorageBase() : empty_('\0') {} + // Provide non-defaulted default ctor to make sure it's not deleted by + // non-trivial T::T() in the union. + constexpr OptionalStorageBase() : dummy_() {} template <class... Args> constexpr explicit OptionalStorageBase(in_place_t, Args&&... args) @@ -106,10 +117,19 @@ bool is_populated_ = false; union { - // |empty_| exists so that the union will always be initialized, even when + // |dummy_| exists so that the union will always be initialized, even when // it doesn't contain a value. Union members must be initialized for the - // constructor to be 'constexpr'. - char empty_; + // constructor to be 'constexpr'. Having a special trivial class for it is + // better than e.g. using char, because the latter will have to be + // zero-initialized, and the compiler can't optimize this write away, since + // it assumes this might be a programmer's invariant. This can also cause + // problems for conservative GC in Oilpan. Compiler is free to split shared + // and non-shared parts of the union in separate memory locations (or + // registers). If conservative GC is triggered at this moment, the stack + // scanning routine won't find the correct object pointed from + // Optional<HeapObject*>. This dummy valueless struct lets the compiler know + // that we don't care about the value of this union member. + DummyUnionMember dummy_; T value_; }; };
diff --git a/base/stl_util.h b/base/stl_util.h index d6ca464..7fc8108 100644 --- a/base/stl_util.h +++ b/base/stl_util.h
@@ -31,15 +31,18 @@ namespace internal { -// Calls erase on iterators of matching elements. +// Calls erase on iterators of matching elements and returns the number of +// removed elements. template <typename Container, typename Predicate> -void IterateAndEraseIf(Container& container, Predicate pred) { - for (auto it = container.begin(); it != container.end();) { +size_t IterateAndEraseIf(Container& container, Predicate pred) { + size_t old_size = container.size(); + for (auto it = container.begin(), last = container.end(); it != last;) { if (pred(*it)) it = container.erase(it); else ++it; } + return old_size - container.size(); } template <typename Iter> @@ -144,6 +147,30 @@ return il.begin(); } +// std::array::data() was not constexpr prior to C++17 [1]. +// Hence these overloads are provided. +// +// [1] https://en.cppreference.com/w/cpp/container/array/data +template <typename T, size_t N> +constexpr T* data(std::array<T, N>& array) noexcept { + return !array.empty() ? &array[0] : nullptr; +} + +template <typename T, size_t N> +constexpr const T* data(const std::array<T, N>& array) noexcept { + return !array.empty() ? &array[0] : nullptr; +} + +// C++14 implementation of C++17's std::as_const(): +// https://en.cppreference.com/w/cpp/utility/as_const +template <typename T> +constexpr std::add_const_t<T>& as_const(T& t) noexcept { + return t; +} + +template <typename T> +void as_const(const T&& t) = delete; + // Returns a const reference to the underlying container of a container adapter. // Works for std::priority_queue, std::queue, and std::stack. template <class A> @@ -473,8 +500,9 @@ a2.begin(), a2.end()); } -// Erase/EraseIf are based on library fundamentals ts v2 erase/erase_if -// http://en.cppreference.com/w/cpp/experimental/lib_extensions_2 +// Erase/EraseIf are based on C++20's uniform container erasure API: +// - https://eel.is/c++draft/libraryindex#:erase +// - https://eel.is/c++draft/libraryindex#:erase_if // They provide a generic way to erase elements from a container. // The functions here implement these for the standard containers until those // functions are available in the C++ standard. @@ -484,89 +512,109 @@ // have it either. template <typename CharT, typename Traits, typename Allocator, typename Value> -void Erase(std::basic_string<CharT, Traits, Allocator>& container, - const Value& value) { - container.erase(std::remove(container.begin(), container.end(), value), - container.end()); +size_t Erase(std::basic_string<CharT, Traits, Allocator>& container, + const Value& value) { + auto it = std::remove(container.begin(), container.end(), value); + size_t removed = std::distance(it, container.end()); + container.erase(it, container.end()); + return removed; } template <typename CharT, typename Traits, typename Allocator, class Predicate> -void EraseIf(std::basic_string<CharT, Traits, Allocator>& container, - Predicate pred) { - container.erase(std::remove_if(container.begin(), container.end(), pred), - container.end()); +size_t EraseIf(std::basic_string<CharT, Traits, Allocator>& container, + Predicate pred) { + auto it = std::remove_if(container.begin(), container.end(), pred); + size_t removed = std::distance(it, container.end()); + container.erase(it, container.end()); + return removed; } template <class T, class Allocator, class Value> -void Erase(std::deque<T, Allocator>& container, const Value& value) { - container.erase(std::remove(container.begin(), container.end(), value), - container.end()); +size_t Erase(std::deque<T, Allocator>& container, const Value& value) { + auto it = std::remove(container.begin(), container.end(), value); + size_t removed = std::distance(it, container.end()); + container.erase(it, container.end()); + return removed; } template <class T, class Allocator, class Predicate> -void EraseIf(std::deque<T, Allocator>& container, Predicate pred) { - container.erase(std::remove_if(container.begin(), container.end(), pred), - container.end()); +size_t EraseIf(std::deque<T, Allocator>& container, Predicate pred) { + auto it = std::remove_if(container.begin(), container.end(), pred); + size_t removed = std::distance(it, container.end()); + container.erase(it, container.end()); + return removed; } template <class T, class Allocator, class Value> -void Erase(std::vector<T, Allocator>& container, const Value& value) { - container.erase(std::remove(container.begin(), container.end(), value), - container.end()); +size_t Erase(std::vector<T, Allocator>& container, const Value& value) { + auto it = std::remove(container.begin(), container.end(), value); + size_t removed = std::distance(it, container.end()); + container.erase(it, container.end()); + return removed; } template <class T, class Allocator, class Predicate> -void EraseIf(std::vector<T, Allocator>& container, Predicate pred) { - container.erase(std::remove_if(container.begin(), container.end(), pred), - container.end()); +size_t EraseIf(std::vector<T, Allocator>& container, Predicate pred) { + auto it = std::remove_if(container.begin(), container.end(), pred); + size_t removed = std::distance(it, container.end()); + container.erase(it, container.end()); + return removed; } template <class T, class Allocator, class Value> -void Erase(std::forward_list<T, Allocator>& container, const Value& value) { +size_t Erase(std::forward_list<T, Allocator>& container, const Value& value) { // Unlike std::forward_list::remove, this function template accepts // heterogeneous types and does not force a conversion to the container's // value type before invoking the == operator. - container.remove_if([&](const T& cur) { return cur == value; }); + return EraseIf(container, [&](const T& cur) { return cur == value; }); } template <class T, class Allocator, class Predicate> -void EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) { +size_t EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) { + // Note: std::forward_list does not have a size() API, thus we need to use the + // O(n) std::distance work-around. However, given that EraseIf is O(n) + // already, this should not make a big difference. + size_t old_size = std::distance(container.begin(), container.end()); container.remove_if(pred); + return old_size - std::distance(container.begin(), container.end()); } template <class T, class Allocator, class Value> -void Erase(std::list<T, Allocator>& container, const Value& value) { +size_t Erase(std::list<T, Allocator>& container, const Value& value) { // Unlike std::list::remove, this function template accepts heterogeneous // types and does not force a conversion to the container's value type before // invoking the == operator. - container.remove_if([&](const T& cur) { return cur == value; }); + return EraseIf(container, [&](const T& cur) { return cur == value; }); } template <class T, class Allocator, class Predicate> -void EraseIf(std::list<T, Allocator>& container, Predicate pred) { +size_t EraseIf(std::list<T, Allocator>& container, Predicate pred) { + size_t old_size = container.size(); container.remove_if(pred); + return old_size - container.size(); } template <class Key, class T, class Compare, class Allocator, class Predicate> -void EraseIf(std::map<Key, T, Compare, Allocator>& container, Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf(std::map<Key, T, Compare, Allocator>& container, + Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } template <class Key, class T, class Compare, class Allocator, class Predicate> -void EraseIf(std::multimap<Key, T, Compare, Allocator>& container, - Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf(std::multimap<Key, T, Compare, Allocator>& container, + Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } template <class Key, class Compare, class Allocator, class Predicate> -void EraseIf(std::set<Key, Compare, Allocator>& container, Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf(std::set<Key, Compare, Allocator>& container, Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } template <class Key, class Compare, class Allocator, class Predicate> -void EraseIf(std::multiset<Key, Compare, Allocator>& container, - Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf(std::multiset<Key, Compare, Allocator>& container, + Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } template <class Key, @@ -575,9 +623,9 @@ class KeyEqual, class Allocator, class Predicate> -void EraseIf(std::unordered_map<Key, T, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf(std::unordered_map<Key, T, Hash, KeyEqual, Allocator>& container, + Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } template <class Key, @@ -586,10 +634,10 @@ class KeyEqual, class Allocator, class Predicate> -void EraseIf( +size_t EraseIf( std::unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& container, Predicate pred) { - internal::IterateAndEraseIf(container, pred); + return internal::IterateAndEraseIf(container, pred); } template <class Key, @@ -597,9 +645,9 @@ class KeyEqual, class Allocator, class Predicate> -void EraseIf(std::unordered_set<Key, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf(std::unordered_set<Key, Hash, KeyEqual, Allocator>& container, + Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } template <class Key, @@ -607,9 +655,10 @@ class KeyEqual, class Allocator, class Predicate> -void EraseIf(std::unordered_multiset<Key, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - internal::IterateAndEraseIf(container, pred); +size_t EraseIf( + std::unordered_multiset<Key, Hash, KeyEqual, Allocator>& container, + Predicate pred) { + return internal::IterateAndEraseIf(container, pred); } // A helper class to be used as the predicate with |EraseIf| to implement
diff --git a/base/strings/strcat.cc b/base/strings/strcat.cc index 1774a15..db41a69 100644 --- a/base/strings/strcat.cc +++ b/base/strings/strcat.cc
@@ -8,8 +8,8 @@ namespace { -// Reserves an additional amount of size in the given string, growing by at -// least 2x. Used by StrAppend(). +// Reserves an additional amount of capacity in the given string, growing by at +// least 2x if necessary. Used by StrAppendT(). // // The "at least 2x" growing rule duplicates the exponential growth of // std::string. The problem is that most implementations of reserve() will grow @@ -18,11 +18,15 @@ // call to StrAppend() would definitely cause a reallocation, and loops with // StrAppend() calls would have O(n^2) complexity to execute. Instead, we want // StrAppend() to have the same semantics as std::string::append(). -// -// If the string is empty, we assume that exponential growth is not necessary. template <typename String> -void ReserveAdditional(String* str, typename String::size_type additional) { - str->reserve(std::max(str->size() + additional, str->size() * 2)); +void ReserveAdditionalIfNeeded(String* str, + typename String::size_type additional) { + const size_t required = str->size() + additional; + // Check whether we need to reserve additional capacity at all. + if (required <= str->capacity()) + return; + + str->reserve(std::max(required, str->capacity() * 2)); } template <typename DestString, typename InputString> @@ -30,7 +34,7 @@ size_t additional_size = 0; for (const auto& cur : pieces) additional_size += cur.size(); - ReserveAdditional(dest, additional_size); + ReserveAdditionalIfNeeded(dest, additional_size); for (const auto& cur : pieces) dest->append(cur.data(), cur.size());
diff --git a/base/strings/strcat.h b/base/strings/strcat.h index bcdfe17..220fa24 100644 --- a/base/strings/strcat.h +++ b/base/strings/strcat.h
@@ -59,10 +59,13 @@ // for this call and generate slightly less code. This is something we can // explore more in the future. -BASE_EXPORT std::string StrCat(span<const StringPiece> pieces); -BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces); -BASE_EXPORT std::string StrCat(span<const std::string> pieces); -BASE_EXPORT string16 StrCat(span<const string16> pieces); +BASE_EXPORT std::string StrCat(span<const StringPiece> pieces) + WARN_UNUSED_RESULT; +BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces) + WARN_UNUSED_RESULT; +BASE_EXPORT std::string StrCat(span<const std::string> pieces) + WARN_UNUSED_RESULT; +BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT; // Initializer list forwards to the array version. inline std::string StrCat(std::initializer_list<StringPiece> pieces) {
diff --git a/base/strings/strcat_unittest.cc b/base/strings/strcat_unittest.cc index d51b840..9374c39 100644 --- a/base/strings/strcat_unittest.cc +++ b/base/strings/strcat_unittest.cc
@@ -64,4 +64,22 @@ EXPECT_EQ(ASCIIToUTF16("foo122333"), result); } +TEST(StrAppendT, ReserveAdditionalIfNeeded) { + std::string str = "foo"; + const char* prev_data = str.data(); + size_t prev_capacity = str.capacity(); + // Fully exhaust current capacity. + StrAppend(&str, {std::string(str.capacity() - str.size(), 'o')}); + // Expect that we hit capacity, but didn't require a re-alloc. + EXPECT_EQ(str.capacity(), str.size()); + EXPECT_EQ(prev_data, str.data()); + EXPECT_EQ(prev_capacity, str.capacity()); + + // Force a re-alloc by appending another character. + StrAppend(&str, {"o"}); + + // Expect at least 2x growth in capacity. + EXPECT_LE(2 * prev_capacity, str.capacity()); +} + } // namespace base
diff --git a/base/strings/string_number_conversions.cc b/base/strings/string_number_conversions.cc index 2bf6142..701d71c 100644 --- a/base/strings/string_number_conversions.cc +++ b/base/strings/string_number_conversions.cc
@@ -13,10 +13,11 @@ #include <type_traits> #include "polyfills/base/logging.h" +#include "base/no_destructor.h" #include "base/numerics/safe_math.h" -#include "base/scoped_clear_last_error.h" +#include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" -#include "base/third_party/dmg_fp/dmg_fp.h" +#include "base/third_party/double_conversion/double-conversion/double-conversion.h" namespace gurl_base { @@ -360,21 +361,29 @@ return IntToStringT<string16, unsigned long long>::IntToString(value); } +static const double_conversion::DoubleToStringConverter* +GetDoubleToStringConverter() { + static NoDestructor<double_conversion::DoubleToStringConverter> converter( + double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + nullptr, nullptr, 'e', -6, 12, 0, 0); + return converter.get(); +} + std::string NumberToString(double value) { - // According to g_fmt.cc, it is sufficient to declare a buffer of size 32. char buffer[32]; - dmg_fp::g_fmt(buffer, value); - return std::string(buffer); + double_conversion::StringBuilder builder(buffer, sizeof(buffer)); + GetDoubleToStringConverter()->ToShortest(value, &builder); + return std::string(buffer, builder.position()); } gurl_base::string16 NumberToString16(double value) { - // According to g_fmt.cc, it is sufficient to declare a buffer of size 32. char buffer[32]; - dmg_fp::g_fmt(buffer, value); + double_conversion::StringBuilder builder(buffer, sizeof(buffer)); + GetDoubleToStringConverter()->ToShortest(value, &builder); // The number will be ASCII. This creates the string using the "input // iterator" variant which promotes from 8-bit to 16-bit via "=". - return gurl_base::string16(&buffer[0], &buffer[strlen(buffer)]); + return gurl_base::string16(&buffer[0], &buffer[builder.position()]); } bool StringToInt(StringPiece input, int* output) { @@ -417,35 +426,37 @@ return String16ToIntImpl(input, output); } -bool StringToDouble(const std::string& input, double* output) { - // Thread-safe? It is on at least Mac, Linux, and Windows. - internal::ScopedClearLastError clear_errno; +template <typename STRING, typename CHAR> +bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) { + static NoDestructor<double_conversion::StringToDoubleConverter> converter( + double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | + double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, + 0.0, 0, nullptr, nullptr); - char* endptr = nullptr; - *output = dmg_fp::strtod(input.c_str(), &endptr); + int processed_characters_count; + *output = converter->StringToDouble(data, input.size(), + &processed_characters_count); // Cases to return false: - // - If errno is ERANGE, there was an overflow or underflow. // - If the input string is empty, there was nothing to parse. - // - If endptr does not point to the end of the string, there are either - // characters remaining in the string after a parsed number, or the string - // does not begin with a parseable number. endptr is compared to the - // expected end given the string's stated length to correctly catch cases - // where the string contains embedded NUL characters. + // - If the value saturated to HUGE_VAL. + // - If the entire string was not processed, there are either characters + // remaining in the string after a parsed number, or the string does not + // begin with a parseable number. // - If the first character is a space, there was leading whitespace - return errno == 0 && - !input.empty() && - input.c_str() + input.length() == endptr && - !isspace(input[0]); + return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL && + static_cast<size_t>(processed_characters_count) == input.size() && + !IsUnicodeWhitespace(input[0]); } -// Note: if you need to add String16ToDouble, first ask yourself if it's -// really necessary. If it is, probably the best implementation here is to -// convert to 8-bit and then use the 8-bit version. +bool StringToDouble(StringPiece input, double* output) { + return StringToDoubleImpl(input, input.data(), output); +} -// Note: if you need to add an iterator range version of StringToDouble, first -// ask yourself if it's really necessary. If it is, probably the best -// implementation here is to instantiate a string and use the string version. +bool StringToDouble(StringPiece16 input, double* output) { + return StringToDoubleImpl( + input, reinterpret_cast<const uint16_t*>(input.data()), output); +} std::string HexEncode(const void* bytes, size_t size) { static const char kHexChars[] = "0123456789ABCDEF"; @@ -485,7 +496,8 @@ input.begin(), input.end(), output); } -bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) { +template <typename Container> +static bool HexStringToByteContainer(StringPiece input, Container* output) { GURL_DCHECK_EQ(output->size(), 0u); size_t count = input.size(); if (count == 0 || (count % 2) != 0) @@ -502,4 +514,32 @@ return true; } +bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) { + return HexStringToByteContainer(input, output); +} + +bool HexStringToString(StringPiece input, std::string* output) { + return HexStringToByteContainer(input, output); +} + +bool HexStringToSpan(StringPiece input, gurl_base::span<uint8_t> output) { + size_t count = input.size(); + if (count == 0 || (count % 2) != 0) + return false; + + if (count / 2 != output.size()) + return false; + + for (uintptr_t i = 0; i < count / 2; ++i) { + uint8_t msb = 0; // most significant 4 bits + uint8_t lsb = 0; // least significant 4 bits + if (!CharToDigit<16>(input[i * 2], &msb) || + !CharToDigit<16>(input[i * 2 + 1], &lsb)) { + return false; + } + output[i] = (msb << 4) | lsb; + } + return true; +} + } // namespace base
diff --git a/base/strings/string_number_conversions.h b/base/strings/string_number_conversions.h index a3acab8..872ead2 100644 --- a/base/strings/string_number_conversions.h +++ b/base/strings/string_number_conversions.h
@@ -98,7 +98,8 @@ // If your input is locale specific, use ICU to read the number. // WARNING: Will write to |output| even when returning false. // Read the comments here and above StringToInt() carefully. -BASE_EXPORT bool StringToDouble(const std::string& input, double* output); +BASE_EXPORT bool StringToDouble(StringPiece input, double* output); +BASE_EXPORT bool StringToDouble(StringPiece16 input, double* output); // Hex encoding ---------------------------------------------------------------- @@ -140,6 +141,17 @@ BASE_EXPORT bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output); +// Same as HexStringToBytes, but for an std::string. +BASE_EXPORT bool HexStringToString(StringPiece input, std::string* output); + +// Decodes the hex string |input| into a presized |output|. The output buffer +// must be sized exactly to |input.size() / 2| or decoding will fail and no +// bytes will be written to |output|. Decoding an empty input is also +// considered a failure. When decoding fails due to encountering invalid input +// characters, |output| will have been filled with the decoded bytes up until +// the failure. +BASE_EXPORT bool HexStringToSpan(StringPiece input, gurl_base::span<uint8_t> output); + } // namespace base #endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
diff --git a/base/strings/string_number_conversions_unittest.cc b/base/strings/string_number_conversions_unittest.cc index 93405e2..6f8d171 100644 --- a/base/strings/string_number_conversions_unittest.cc +++ b/base/strings/string_number_conversions_unittest.cc
@@ -672,7 +672,8 @@ EXPECT_EQ(0xc0ffeeU, output); } -TEST(StringNumberConversionsTest, HexStringToBytes) { +// Tests for HexStringToBytes, HexStringToString, HexStringToSpan. +TEST(StringNumberConversionsTest, HexStringToBytesStringSpan) { static const struct { const std::string input; const char* output; @@ -698,16 +699,65 @@ "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true}, }; - for (size_t i = 0; i < gurl_base::size(cases); ++i) { - std::vector<uint8_t> output; - std::vector<uint8_t> compare; - EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) << - i << ": " << cases[i].input; - for (size_t j = 0; j < cases[i].output_len; ++j) - compare.push_back(static_cast<uint8_t>(cases[i].output[j])); - ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input; - EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) << - i << ": " << cases[i].input; + for (size_t test_i = 0; test_i < gurl_base::size(cases); ++test_i) { + const auto& test = cases[test_i]; + + std::string expected_output(test.output, test.output_len); + + // Test HexStringToBytes(). + { + std::vector<uint8_t> output; + EXPECT_EQ(test.success, HexStringToBytes(test.input, &output)) + << test_i << ": " << test.input; + EXPECT_EQ(expected_output, std::string(output.begin(), output.end())); + } + + // Test HexStringToString(). + { + std::string output; + EXPECT_EQ(test.success, HexStringToString(test.input, &output)) + << test_i << ": " << test.input; + EXPECT_EQ(expected_output, output) << test_i << ": " << test.input; + } + + // Test HexStringToSpan() with a properly sized output. + { + std::vector<uint8_t> output; + output.resize(test.input.size() / 2); + + EXPECT_EQ(test.success, HexStringToSpan(test.input, output)) + << test_i << ": " << test.input; + + // On failure the output will only have been partially written (with + // everything after the failure being 0). + for (size_t i = 0; i < test.output_len; ++i) { + EXPECT_EQ(test.output[i], static_cast<char>(output[i])) + << test_i << ": " << test.input; + } + for (size_t i = test.output_len; i < output.size(); ++i) { + EXPECT_EQ('\0', static_cast<char>(output[i])) + << test_i << ": " << test.input; + } + } + + // Test HexStringToSpan() with an output that is 1 byte too small. + { + std::vector<uint8_t> output; + if (test.input.size() > 1) + output.resize(test.input.size() / 2 - 1); + + EXPECT_FALSE(HexStringToSpan(test.input, output)) + << test_i << ": " << test.input; + } + + // Test HexStringToSpan() with an output that is 1 byte too large. + { + std::vector<uint8_t> output; + output.resize(test.input.size() / 2 + 1); + + EXPECT_FALSE(HexStringToSpan(test.input, output)) + << test_i << ": " << test.input; + } } } @@ -791,6 +841,8 @@ }; for (size_t i = 0; i < gurl_base::size(cases); ++i) { + SCOPED_TRACE( + StringPrintf("case %" PRIuS " \"%s\"", i, cases[i].input.c_str())); double output; errno = 1; EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output)); @@ -814,13 +866,14 @@ double input; const char* expected; } cases[] = { - {0.0, "0"}, - {1.25, "1.25"}, - {1.33518e+012, "1.33518e+12"}, - {1.33489e+012, "1.33489e+12"}, - {1.33505e+012, "1.33505e+12"}, - {1.33545e+009, "1335450000"}, - {1.33503e+009, "1335030000"}, + {0.0, "0"}, + {0.5, "0.5"}, + {1.25, "1.25"}, + {1.33518e+012, "1.33518e+12"}, + {1.33489e+012, "1.33489e+12"}, + {1.33505e+012, "1.33505e+12"}, + {1.33545e+009, "1335450000"}, + {1.33503e+009, "1335030000"}, }; for (const auto& i : cases) { @@ -832,12 +885,12 @@ const char input_bytes[8] = {0, 0, 0, 0, '\xee', '\x6d', '\x73', '\x42'}; double input = 0; memcpy(&input, input_bytes, gurl_base::size(input_bytes)); - EXPECT_EQ("1335179083776", NumberToString(input)); + EXPECT_EQ("1.335179083776e+12", NumberToString(input)); const char input_bytes2[8] = {0, 0, 0, '\xa0', '\xda', '\x6c', '\x73', '\x42'}; input = 0; memcpy(&input, input_bytes2, gurl_base::size(input_bytes2)); - EXPECT_EQ("1334890332160", NumberToString(input)); + EXPECT_EQ("1.33489033216e+12", NumberToString(input)); } TEST(StringNumberConversionsTest, HexEncode) { @@ -892,6 +945,7 @@ }; for (const auto& test : cases) { + SCOPED_TRACE(StringPrintf("input: \"%s\"", test.input)); double output; EXPECT_TRUE(StringToDouble(test.input, &output)); EXPECT_EQ(bit_cast<uint64_t>(output), test.expected);
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc index 68f3efc..74f3335 100644 --- a/base/strings/string_piece.cc +++ b/base/strings/string_piece.cc
@@ -53,36 +53,6 @@ namespace internal { template<typename STR> -void CopyToStringT(const BasicStringPiece<STR>& self, STR* target) { - if (self.empty()) - target->clear(); - else - target->assign(self.data(), self.size()); -} - -void CopyToString(const StringPiece& self, std::string* target) { - CopyToStringT(self, target); -} - -void CopyToString(const StringPiece16& self, string16* target) { - CopyToStringT(self, target); -} - -template<typename STR> -void AppendToStringT(const BasicStringPiece<STR>& self, STR* target) { - if (!self.empty()) - target->append(self.data(), self.size()); -} - -void AppendToString(const StringPiece& self, std::string* target) { - AppendToStringT(self, target); -} - -void AppendToString(const StringPiece16& self, string16* target) { - AppendToStringT(self, target); -} - -template<typename STR> size_t copyT(const BasicStringPiece<STR>& self, typename STR::value_type* buf, size_t n, @@ -219,8 +189,11 @@ size_t find_first_of(const StringPiece16& self, const StringPiece16& s, size_t pos) { + // Use the faster std::find() if searching for a single character. StringPiece16::const_iterator found = - std::find_first_of(self.begin() + pos, self.end(), s.begin(), s.end()); + s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0]) + : std::find_first_of(self.begin() + pos, self.end(), + s.begin(), s.end()); if (found == self.end()) return StringPiece16::npos; return found - self.begin(); @@ -435,16 +408,5 @@ return substrT(self, pos, n); } -#if GURL_DCHECK_IS_ON() -void AssertIteratorsInOrder(std::string::const_iterator begin, - std::string::const_iterator end) { - GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; -} -void AssertIteratorsInOrder(string16::const_iterator begin, - string16::const_iterator end) { - GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; -} -#endif - } // namespace internal } // namespace base
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h index 5359af6..964ec67 100644 --- a/base/strings/string_piece.h +++ b/base/strings/string_piece.h
@@ -47,12 +47,6 @@ // template internal to the .cc file. namespace internal { -BASE_EXPORT void CopyToString(const StringPiece& self, std::string* target); -BASE_EXPORT void CopyToString(const StringPiece16& self, string16* target); - -BASE_EXPORT void AppendToString(const StringPiece& self, std::string* target); -BASE_EXPORT void AppendToString(const StringPiece16& self, string16* target); - BASE_EXPORT size_t copy(const StringPiece& self, char* buf, size_t n, @@ -141,21 +135,12 @@ size_t pos, size_t n); -#if GURL_DCHECK_IS_ON() -// Asserts that begin <= end to catch some errors with iterator usage. -BASE_EXPORT void AssertIteratorsInOrder(std::string::const_iterator begin, - std::string::const_iterator end); -BASE_EXPORT void AssertIteratorsInOrder(string16::const_iterator begin, - string16::const_iterator end); -#endif - } // namespace internal // BasicStringPiece ------------------------------------------------------------ // Defines the types, methods, operators, and data members common to both -// StringPiece and StringPiece16. Do not refer to this class directly, but -// rather to BasicStringPiece, StringPiece, or StringPiece16. +// StringPiece and StringPiece16. // // This is templatized by string class type rather than character type, so // BasicStringPiece<std::string> or BasicStringPiece<gurl_base::string16>. @@ -178,23 +163,32 @@ // in a "const char*" or a "string" wherever a "StringPiece" is // expected (likewise for char16, string16, StringPiece16). constexpr BasicStringPiece() : ptr_(NULL), length_(0) {} - // TODO(dcheng): Construction from nullptr is not allowed for + // TODO(crbug.com/1049498): Construction from nullptr is not allowed for // std::basic_string_view, so remove the special handling for it. // Note: This doesn't just use STRING_TYPE::traits_type::length(), since that // isn't constexpr until C++17. constexpr BasicStringPiece(const value_type* str) : ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {} + // Explicitly disallow construction from nullptr. Note that this does not + // catch construction from runtime strings that might be null. + // Note: The following is just a more elaborate way of spelling + // `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is + // not supported by the PNaCl toolchain. + // TODO(crbug.com/1049498): Remove once we GURL_CHECK(str) in the constructor + // above. + template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>> + BasicStringPiece(T) { + static_assert(sizeof(T) == 0, // Always false. + "StringPiece does not support construction from nullptr, use " + "the default constructor instead."); + } BasicStringPiece(const STRING_TYPE& str) : ptr_(str.data()), length_(str.size()) {} constexpr BasicStringPiece(const value_type* offset, size_type len) : ptr_(offset), length_(len) {} BasicStringPiece(const typename STRING_TYPE::const_iterator& begin, const typename STRING_TYPE::const_iterator& end) { -#if GURL_DCHECK_IS_ON() - // This assertion is done out-of-line to avoid bringing in logging.h and - // instantiating logging macros for every instantiation. - internal::AssertIteratorsInOrder(begin, end); -#endif + GURL_DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; length_ = static_cast<size_t>(std::distance(begin, end)); // The length test before assignment is to avoid dereferencing an iterator @@ -211,19 +205,6 @@ constexpr size_type length() const noexcept { return length_; } bool empty() const { return length_ == 0; } - void clear() { - ptr_ = NULL; - length_ = 0; - } - void set(const value_type* data, size_type len) { - ptr_ = data; - length_ = len; - } - void set(const value_type* str) { - ptr_ = str; - length_ = str ? STRING_TYPE::traits_type::length(str) : 0; - } - constexpr value_type operator[](size_type i) const { GURL_CHECK(i < length_); return ptr_[i]; @@ -280,16 +261,6 @@ size_type max_size() const { return length_; } size_type capacity() const { return length_; } - // Sets the value of the given string target type to be the current string. - // This saves a temporary over doing |a = b.as_string()| - void CopyToString(STRING_TYPE* target) const { - internal::CopyToString(*this, target); - } - - void AppendToString(STRING_TYPE* target) const { - internal::AppendToString(*this, target); - } - size_type copy(value_type* buf, size_type n, size_type pos = 0) const { return internal::copy(*this, buf, n, pos); }
diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc index 8e245e6..0777549 100644 --- a/base/strings/string_piece_unittest.cc +++ b/base/strings/string_piece_unittest.cc
@@ -190,7 +190,7 @@ ASSERT_TRUE(e.empty()); ASSERT_EQ(e.begin(), e.end()); - d.clear(); + d = BasicStringPiece<TypeParam>(); ASSERT_EQ(d.size(), 0U); ASSERT_TRUE(d.empty()); ASSERT_EQ(d.data(), nullptr); @@ -213,7 +213,7 @@ BasicStringPiece<TypeParam> c(xyz); BasicStringPiece<TypeParam> d(foobar); - d.clear(); + d = Piece(); Piece e; TypeParam temp(TestFixture::as_string("123")); temp.push_back('\0'); @@ -511,14 +511,14 @@ c.remove_suffix(c.size()); ASSERT_EQ(c, e); - // set - c.set(foobar.c_str()); + // assignment + c = foobar.c_str(); ASSERT_EQ(c, a); - c.set(foobar.c_str(), 6); + c = {foobar.c_str(), 6}; ASSERT_EQ(c, a); - c.set(foobar.c_str(), 0); + c = {foobar.c_str(), 0}; ASSERT_EQ(c, e); - c.set(foobar.c_str(), 7); // Note, has an embedded NULL + c = {foobar.c_str(), 7}; // Note, has an embedded NULL ASSERT_NE(c, a); // as_string @@ -543,25 +543,6 @@ StringPiece e; std::string s2; - // CopyToString - a.CopyToString(&s2); - ASSERT_EQ(s2.size(), 6U); - ASSERT_EQ(s2, "foobar"); - b.CopyToString(&s2); - ASSERT_EQ(s2.size(), 7U); - ASSERT_EQ(s1, s2); - e.CopyToString(&s2); - ASSERT_TRUE(s2.empty()); - - // AppendToString - s2.erase(); - a.AppendToString(&s2); - ASSERT_EQ(s2.size(), 6U); - ASSERT_EQ(s2, "foobar"); - a.AppendToString(&s2); - ASSERT_EQ(s2.size(), 12U); - ASSERT_EQ(s2, "foobarfoobar"); - // starts_with ASSERT_TRUE(a.starts_with(a)); ASSERT_TRUE(a.starts_with("foo")); @@ -587,21 +568,16 @@ ASSERT_TRUE(!e.ends_with(a)); StringPiece c; - c.set("foobar", 6); + c = {"foobar", 6}; ASSERT_EQ(c, a); - c.set("foobar", 0); + c = {"foobar", 0}; ASSERT_EQ(c, e); - c.set("foobar", 7); + c = {"foobar", 7}; ASSERT_NE(c, a); } TYPED_TEST(CommonStringPieceTest, CheckNULL) { - // we used to crash here, but now we don't. - BasicStringPiece<TypeParam> s(nullptr); - ASSERT_EQ(s.data(), nullptr); - ASSERT_EQ(s.size(), 0U); - - s.set(nullptr); + BasicStringPiece<TypeParam> s; ASSERT_EQ(s.data(), nullptr); ASSERT_EQ(s.size(), 0U); @@ -699,7 +675,7 @@ BasicStringPiece<TypeParam>( str.c_str(), static_cast<typename BasicStringPiece<TypeParam>::size_type>(0))); - ASSERT_EQ(empty, BasicStringPiece<TypeParam>(nullptr)); + ASSERT_EQ(empty, BasicStringPiece<TypeParam>()); ASSERT_TRUE( empty == BasicStringPiece<TypeParam>(
diff --git a/base/strings/string_split.cc b/base/strings/string_split.cc index ef9c74d..3816501 100644 --- a/base/strings/string_split.cc +++ b/base/strings/string_split.cc
@@ -14,27 +14,15 @@ namespace { -// PieceToOutputType converts a StringPiece as needed to a given output type, -// which is either the same type of StringPiece (a NOP) or the corresponding -// non-piece string type. -// -// The default converter is a NOP, it works when the OutputType is the -// correct StringPiece. -template<typename Str, typename OutputType> -OutputType PieceToOutputType(BasicStringPiece<Str> piece) { - return piece; -} -template<> // Convert StringPiece to std::string -std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { - return piece.as_string(); -} -template<> // Convert StringPiece16 to string16. -string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { - return piece.as_string(); -} - // Returns either the ASCII or UTF-16 whitespace. template<typename Str> BasicStringPiece<Str> WhitespaceForType(); +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +template <> +WStringPiece WhitespaceForType<std::wstring>() { + return kWhitespaceWide; +} +#endif + template<> StringPiece16 WhitespaceForType<string16>() { return kWhitespaceUTF16; } @@ -42,37 +30,12 @@ return kWhitespaceASCII; } -// Optimize the single-character case to call find() on the string instead, -// since this is the common case and can be made faster. This could have been -// done with template specialization too, but would have been less clear. -// -// There is no corresponding FindFirstNotOf because StringPiece already -// implements these different versions that do the optimized searching. -size_t FindFirstOf(StringPiece piece, char c, size_t pos) { - return piece.find(c, pos); -} -size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { - return piece.find(c, pos); -} -size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { - return piece.find_first_of(one_of, pos); -} -size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { - return piece.find_first_of(one_of, pos); -} - // General string splitter template. Can take 8- or 16-bit input, can produce -// the corresponding string or StringPiece output, and can take single- or -// multiple-character delimiters. -// -// DelimiterType is either a character (Str::value_type) or a string piece of -// multiple characters (BasicStringPiece<Str>). StringPiece has a version of -// find for both of these cases, and the single-character version is the most -// common and can be implemented faster, which is why this is a template. -template<typename Str, typename OutputStringType, typename DelimiterType> +// the corresponding string or StringPiece output. +template <typename OutputStringType, typename Str> static std::vector<OutputStringType> SplitStringT( BasicStringPiece<Str> str, - DelimiterType delimiter, + BasicStringPiece<Str> delimiter, WhitespaceHandling whitespace, SplitResult result_type) { std::vector<OutputStringType> result; @@ -81,7 +44,7 @@ size_t start = 0; while (start != Str::npos) { - size_t end = FindFirstOf(str, delimiter, start); + size_t end = str.find_first_of(delimiter, start); BasicStringPiece<Str> piece; if (end == Str::npos) { @@ -96,7 +59,7 @@ piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); if (result_type == SPLIT_WANT_ALL || !piece.empty()) - result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); + result.emplace_back(piece); } return result; } @@ -115,7 +78,7 @@ DVLOG(1) << "cannot find delimiter in: " << input; return false; // No delimiter. } - input.substr(0, end_key_pos).CopyToString(&result_pair.first); + result_pair.first = std::string(input.substr(0, end_key_pos)); // Find the value string. StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos); @@ -124,22 +87,23 @@ DVLOG(1) << "cannot parse value from input: " << input; return false; // No value. } - remains.substr(begin_value_pos, remains.size() - begin_value_pos) - .CopyToString(&result_pair.second); + + result_pair.second = std::string( + remains.substr(begin_value_pos, remains.size() - begin_value_pos)); return true; } -template <typename Str, typename OutputStringType> -void SplitStringUsingSubstrT(BasicStringPiece<Str> input, - BasicStringPiece<Str> delimiter, - WhitespaceHandling whitespace, - SplitResult result_type, - std::vector<OutputStringType>* result) { +template <typename OutputStringType, typename Str> +std::vector<OutputStringType> SplitStringUsingSubstrT( + BasicStringPiece<Str> input, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { using Piece = BasicStringPiece<Str>; using size_type = typename Piece::size_type; - result->clear(); + std::vector<OutputStringType> result; for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; begin_index = end_index + delimiter.size()) { end_index = input.find(delimiter, begin_index); @@ -151,8 +115,10 @@ term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); if (result_type == SPLIT_WANT_ALL || !term.empty()) - result->push_back(PieceToOutputType<Str, OutputStringType>(term)); + result.emplace_back(term); } + + return result; } } // namespace @@ -161,48 +127,29 @@ StringPiece separators, WhitespaceHandling whitespace, SplitResult result_type) { - if (separators.size() == 1) { - return SplitStringT<std::string, std::string, char>( - input, separators[0], whitespace, result_type); - } - return SplitStringT<std::string, std::string, StringPiece>( - input, separators, whitespace, result_type); + return SplitStringT<std::string>(input, separators, whitespace, result_type); } std::vector<string16> SplitString(StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, SplitResult result_type) { - if (separators.size() == 1) { - return SplitStringT<string16, string16, char16>( - input, separators[0], whitespace, result_type); - } - return SplitStringT<string16, string16, StringPiece16>( - input, separators, whitespace, result_type); + return SplitStringT<string16>(input, separators, whitespace, result_type); } std::vector<StringPiece> SplitStringPiece(StringPiece input, StringPiece separators, WhitespaceHandling whitespace, SplitResult result_type) { - if (separators.size() == 1) { - return SplitStringT<std::string, StringPiece, char>( - input, separators[0], whitespace, result_type); - } - return SplitStringT<std::string, StringPiece, StringPiece>( - input, separators, whitespace, result_type); + return SplitStringT<StringPiece>(input, separators, whitespace, result_type); } std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, SplitResult result_type) { - if (separators.size() == 1) { - return SplitStringT<string16, StringPiece16, char16>( - input, separators[0], whitespace, result_type); - } - return SplitStringT<string16, StringPiece16, StringPiece16>( - input, separators, whitespace, result_type); + return SplitStringT<StringPiece16>(input, separators, whitespace, + result_type); } bool SplitStringIntoKeyValuePairs(StringPiece input, @@ -240,18 +187,16 @@ StringPiece16 delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - std::vector<string16> result; - SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); - return result; + return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace, + result_type); } std::vector<std::string> SplitStringUsingSubstr(StringPiece input, StringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - std::vector<std::string> result; - SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); - return result; + return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace, + result_type); } std::vector<StringPiece16> SplitStringPieceUsingSubstr( @@ -260,8 +205,8 @@ WhitespaceHandling whitespace, SplitResult result_type) { std::vector<StringPiece16> result; - SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); - return result; + return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace, + result_type); } std::vector<StringPiece> SplitStringPieceUsingSubstr( @@ -269,9 +214,41 @@ StringPiece delimiter, WhitespaceHandling whitespace, SplitResult result_type) { - std::vector<StringPiece> result; - SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); - return result; + return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace, + result_type); } +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<std::wstring>(input, separators, whitespace, result_type); +} + +std::vector<WStringPiece> SplitStringPiece(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<WStringPiece>(input, separators, whitespace, result_type); +} + +std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace, + result_type); +} + +std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace, + result_type); +} +#endif + } // namespace base
diff --git a/base/strings/string_split.h b/base/strings/string_split.h index 1894d05..02c2c59 100644 --- a/base/strings/string_split.h +++ b/base/strings/string_split.h
@@ -12,6 +12,7 @@ #include "polyfills/base/base_export.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" +#include "build/build_config.h" namespace gurl_base { @@ -39,26 +40,31 @@ // Split the given string on ANY of the given separators, returning copies of // the result. // +// Note this is inverse of JoinString() defined in string_util.h. +// // To split on either commas or semicolons, keeping all whitespace: // // std::vector<std::string> tokens = gurl_base::SplitString( -// input, ",;", gurl_base::KEEP_WHITESPACE, gurl_base::SPLIT_WANT_ALL); -BASE_EXPORT std::vector<std::string> SplitString( - StringPiece input, - StringPiece separators, - WhitespaceHandling whitespace, - SplitResult result_type); -BASE_EXPORT std::vector<string16> SplitString( - StringPiece16 input, - StringPiece16 separators, - WhitespaceHandling whitespace, - SplitResult result_type); +// input, ", WARN_UNUSED_RESULT;", gurl_base::KEEP_WHITESPACE, +// gurl_base::SPLIT_WANT_ALL) WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<std::string> SplitString(StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; // Like SplitString above except it returns a vector of StringPieces which // reference the original buffer without copying. Although you have to be // careful to keep the original string unmodified, this provides an efficient // way to iterate through tokens in a string. // +// Note this is inverse of JoinString() defined in string_util.h. +// // To iterate through all whitespace-separated tokens in an input string: // // for (const auto& cur : @@ -70,12 +76,12 @@ StringPiece input, StringPiece separators, WhitespaceHandling whitespace, - SplitResult result_type); + SplitResult result_type) WARN_UNUSED_RESULT; BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( StringPiece16 input, StringPiece16 separators, WhitespaceHandling whitespace, - SplitResult result_type); + SplitResult result_type) WARN_UNUSED_RESULT; using StringPairs = std::vector<std::pair<std::string, std::string>>; @@ -102,12 +108,12 @@ StringPiece16 input, StringPiece16 delimiter, WhitespaceHandling whitespace, - SplitResult result_type); + SplitResult result_type) WARN_UNUSED_RESULT; BASE_EXPORT std::vector<std::string> SplitStringUsingSubstr( StringPiece input, StringPiece delimiter, WhitespaceHandling whitespace, - SplitResult result_type); + SplitResult result_type) WARN_UNUSED_RESULT; // Like SplitStringUsingSubstr above except it returns a vector of StringPieces // which reference the original buffer without copying. Although you have to be @@ -125,12 +131,38 @@ StringPiece16 input, StringPiece16 delimiter, WhitespaceHandling whitespace, - SplitResult result_type); + SplitResult result_type) WARN_UNUSED_RESULT; BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr( StringPiece input, StringPiece delimiter, WhitespaceHandling whitespace, - SplitResult result_type); + SplitResult result_type) WARN_UNUSED_RESULT; + +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPiece( + WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +#endif } // namespace base
diff --git a/base/strings/string_split_unittest.cc b/base/strings/string_split_unittest.cc index 993450a..f84d4b8 100644 --- a/base/strings/string_split_unittest.cc +++ b/base/strings/string_split_unittest.cc
@@ -47,7 +47,7 @@ } TEST_F(SplitStringIntoKeyValuePairsUsingSubstrTest, - MissingKeyValuePairDelimeter) { + MissingKeyValuePairDelimiter) { EXPECT_TRUE(SplitStringIntoKeyValuePairsUsingSubstr( "key1:value1,,key3:value3", ':', // Key-value delimiter
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc index 2b2591d..742319a 100644 --- a/base/strings/string_util.cc +++ b/base/strings/string_util.cc
@@ -51,21 +51,6 @@ return elem1.parameter < elem2.parameter; } -// Overloaded function to append one string onto the end of another. Having a -// separate overload for |source| as both string and StringPiece allows for more -// efficient usage from functions templated to work with either type (avoiding a -// redundant call to the BasicStringPiece constructor in both cases). -template <typename string_type> -inline void AppendToString(string_type* target, const string_type& source) { - target->append(source); -} - -template <typename string_type> -inline void AppendToString(string_type* target, - const BasicStringPiece<string_type>& source) { - source.AppendToString(target); -} - // Assuming that a pointer is the size of a "machine word", then // uintptr_t is an integer type that is also a machine word. using MachineWord = uintptr_t; @@ -237,17 +222,16 @@ bool ReplaceChars(const string16& input, StringPiece16 replace_chars, - const string16& replace_with, + StringPiece16 replace_with, string16* output) { - return ReplaceCharsT(input, replace_chars, StringPiece16(replace_with), - output); + return ReplaceCharsT(input, replace_chars, replace_with, output); } bool ReplaceChars(const std::string& input, StringPiece replace_chars, - const std::string& replace_with, + StringPiece replace_with, std::string* output) { - return ReplaceCharsT(input, replace_chars, StringPiece(replace_with), output); + return ReplaceCharsT(input, replace_chars, replace_with, output); } bool RemoveChars(const string16& input, @@ -262,8 +246,8 @@ return ReplaceCharsT(input, remove_chars, StringPiece(), output); } -template<typename Str> -TrimPositions TrimStringT(const Str& input, +template <typename Str> +TrimPositions TrimStringT(BasicStringPiece<Str> input, BasicStringPiece<Str> trim_chars, TrimPositions positions, Str* output) { @@ -271,40 +255,40 @@ // a StringPiece version of input to be able to call find* on it with the // StringPiece version of trim_chars (normally the trim_chars will be a // constant so avoid making a copy). - BasicStringPiece<Str> input_piece(input); const size_t last_char = input.length() - 1; - const size_t first_good_char = (positions & TRIM_LEADING) ? - input_piece.find_first_not_of(trim_chars) : 0; - const size_t last_good_char = (positions & TRIM_TRAILING) ? - input_piece.find_last_not_of(trim_chars) : last_char; + const size_t first_good_char = + (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; + const size_t last_good_char = (positions & TRIM_TRAILING) + ? input.find_last_not_of(trim_chars) + : last_char; // When the string was all trimmed, report that we stripped off characters // from whichever position the caller was interested in. For empty input, we // stripped no characters, but we still need to clear |output|. - if (input.empty() || - (first_good_char == Str::npos) || (last_good_char == Str::npos)) { + if (input.empty() || first_good_char == Str::npos || + last_good_char == Str::npos) { bool input_was_empty = input.empty(); // in case output == &input output->clear(); return input_was_empty ? TRIM_NONE : positions; } // Trim. - *output = - input.substr(first_good_char, last_good_char - first_good_char + 1); + output->assign(input.data() + first_good_char, + last_good_char - first_good_char + 1); // Return where we trimmed from. return static_cast<TrimPositions>( - ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | - ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); + (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) | + (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); } -bool TrimString(const string16& input, +bool TrimString(StringPiece16 input, StringPiece16 trim_chars, string16* output) { return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; } -bool TrimString(const std::string& input, +bool TrimString(StringPiece input, StringPiece trim_chars, std::string* output) { return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; @@ -370,7 +354,7 @@ output->clear(); } -TrimPositions TrimWhitespace(const string16& input, +TrimPositions TrimWhitespace(StringPiece16 input, TrimPositions positions, string16* output) { return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); @@ -381,7 +365,7 @@ return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); } -TrimPositions TrimWhitespaceASCII(const std::string& input, +TrimPositions TrimWhitespaceASCII(StringPiece input, TrimPositions positions, std::string* output) { return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); @@ -506,20 +490,29 @@ } #endif -bool IsStringUTF8(StringPiece str) { - const char *src = str.data(); +template <bool (*Validator)(uint32_t)> +inline static bool DoIsStringUTF8(StringPiece str) { + const char* src = str.data(); int32_t src_len = static_cast<int32_t>(str.length()); int32_t char_index = 0; while (char_index < src_len) { int32_t code_point; CBU8_NEXT(src, char_index, src_len, code_point); - if (!IsValidCharacter(code_point)) + if (!Validator(code_point)) return false; } return true; } +bool IsStringUTF8(StringPiece str) { + return DoIsStringUTF8<IsValidCharacter>(str); +} + +bool IsStringUTF8AllowingNoncharacters(StringPiece str) { + return DoIsStringUTF8<IsValidCodepoint>(str); +} + // Implementation note: Normally this function will be called with a hardcoded // constant for the lowercase_ascii parameter. Constructing a StringPiece from // a C constant requires running strlen, so the result will be two passes @@ -913,7 +906,7 @@ template <class string_type> inline typename string_type::value_type* WriteIntoT(string_type* str, size_t length_with_null) { - GURL_DCHECK_GT(length_with_null, 1u); + GURL_DCHECK_GE(length_with_null, 1u); str->reserve(length_with_null); str->resize(length_with_null - 1); return &((*str)[0]); @@ -927,11 +920,6 @@ return WriteIntoT(str, length_with_null); } -#if defined(_MSC_VER) && !defined(__clang__) -// Work around VC++ code-gen bug. https://crbug.com/804884 -#pragma optimize("", off) -#endif - // Generic version for all JoinString overloads. |list_type| must be a sequence // (std::vector or std::initializer_list) of strings/StringPieces (std::string, // string16, StringPiece or StringPiece16). |string_type| is either std::string @@ -939,7 +927,7 @@ template <typename list_type, typename string_type> static string_type JoinStringT(const list_type& parts, BasicStringPiece<string_type> sep) { - if (parts.size() == 0) + if (gurl_base::empty(parts)) return string_type(); // Pre-allocate the eventual size of the string. Start with the size of all of @@ -952,15 +940,12 @@ auto iter = parts.begin(); GURL_DCHECK(iter != parts.end()); - AppendToString(&result, *iter); + result.append(iter->data(), iter->size()); ++iter; for (; iter != parts.end(); ++iter) { - sep.AppendToString(&result); - // Using the overloaded AppendToString allows this template function to work - // on both strings and StringPieces without creating an intermediate - // StringPiece object. - AppendToString(&result, *iter); + result.append(sep.data(), sep.size()); + result.append(iter->data(), iter->size()); } // Sanity-check that we pre-allocated correctly. @@ -979,11 +964,6 @@ return JoinStringT(parts, separator); } -#if defined(_MSC_VER) && !defined(__clang__) -// Work around VC++ code-gen bug. https://crbug.com/804884 -#pragma optimize("", on) -#endif - std::string JoinString(const std::vector<StringPiece>& parts, StringPiece separator) { return JoinStringT(parts, separator); @@ -1085,6 +1065,36 @@ return result; } +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) + +TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output) { + return TrimStringT(input, WStringPiece(kWhitespaceWide), positions, output); +} + +WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) { + return TrimStringPieceT(input, WStringPiece(kWhitespaceWide), positions); +} + +bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::wstring* output) { + return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; +} + +WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions) { + return TrimStringPieceT(input, trim_chars, positions); +} + +wchar_t* WriteInto(std::wstring* str, size_t length_with_null) { + return WriteIntoT(str, length_with_null); +} + +#endif + // The following code is compatible with the OpenBSD lcpy interface. See: // http://www.gratisoft.us/todd/papers/strlcpy.html // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
diff --git a/base/strings/string_util.h b/base/strings/string_util.h index 5a8cb02..ed3118d 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h
@@ -160,6 +160,7 @@ // by HTML5, and don't include control characters. BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode. BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode. +BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF. BASE_EXPORT extern const char kWhitespaceASCII[]; BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode. @@ -183,11 +184,11 @@ // NOTE: Safe to use the same variable for both |input| and |output|. BASE_EXPORT bool ReplaceChars(const string16& input, StringPiece16 replace_chars, - const string16& replace_with, + StringPiece16 replace_with, string16* output); BASE_EXPORT bool ReplaceChars(const std::string& input, StringPiece replace_chars, - const std::string& replace_with, + StringPiece replace_with, std::string* output); enum TrimPositions { @@ -203,10 +204,10 @@ // // It is safe to use the same variable for both |input| and |output| (this is // the normal usage to trim in-place). -BASE_EXPORT bool TrimString(const string16& input, +BASE_EXPORT bool TrimString(StringPiece16 input, StringPiece16 trim_chars, string16* output); -BASE_EXPORT bool TrimString(const std::string& input, +BASE_EXPORT bool TrimString(StringPiece input, StringPiece trim_chars, std::string* output); @@ -268,6 +269,24 @@ inline const char16* as_u16cstr(WStringPiece str) { return reinterpret_cast<const char16*>(str.data()); } + +// Utility functions to convert between gurl_base::WStringPiece and +// gurl_base::StringPiece16. +inline WStringPiece AsWStringPiece(StringPiece16 str) { + return WStringPiece(as_wcstr(str.data()), str.size()); +} + +inline StringPiece16 AsStringPiece16(WStringPiece str) { + return StringPiece16(as_u16cstr(str.data()), str.size()); +} + +inline std::wstring AsWString(StringPiece16 str) { + return std::wstring(as_wcstr(str.data()), str.size()); +} + +inline string16 AsString16(WStringPiece str) { + return string16(as_u16cstr(str.data()), str.size()); +} #endif // defined(WCHAR_T_IS_UTF16) // Trims any whitespace from either end of the input string. @@ -277,12 +296,12 @@ // // The std::string versions return where whitespace was found. // NOTE: Safe to use the same variable for both input and output. -BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, +BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input, TrimPositions positions, string16* output); BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input, TrimPositions positions); -BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, +BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input, TrimPositions positions, std::string* output); BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input, @@ -309,21 +328,23 @@ BASE_EXPORT bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters); -// Returns true if the specified string matches the criteria. How can a wide -// string be 8-bit or UTF8? It contains only characters that are < 256 (in the -// first case) or characters that use only 8-bits and whose 8-bit -// representation looks like a UTF-8 string (the second case). -// -// Note that IsStringUTF8 checks not only if the input is structurally -// valid but also if it doesn't contain any non-character codepoint -// (e.g. U+FFFE). It's done on purpose because all the existing callers want -// to have the maximum 'discriminating' power from other encodings. If -// there's a use case for just checking the structural validity, we have to -// add a new function for that. -// -// IsStringASCII assumes the input is likely all ASCII, and does not leave early -// if it is not the case. +// Returns true if |str| is structurally valid UTF-8 and also doesn't +// contain any non-character code point (e.g. U+10FFFE). Prohibiting +// non-characters increases the likelihood of detecting non-UTF-8 in +// real-world text, for callers which do not need to accept +// non-characters in strings. BASE_EXPORT bool IsStringUTF8(StringPiece str); + +// Returns true if |str| contains valid UTF-8, allowing non-character +// code points. +BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str); + +// Returns true if |str| contains only valid ASCII character values. +// Note 1: IsStringASCII executes in time determined solely by the +// length of the string, not by its contents, so it is robust against +// timing attacks for all strings of equal length. +// Note 2: IsStringASCII assumes the input is likely all ASCII, and +// does not leave early if it is not the case. BASE_EXPORT bool IsStringASCII(StringPiece str); BASE_EXPORT bool IsStringASCII(StringPiece16 str); #if defined(WCHAR_T_IS_UTF32) @@ -456,10 +477,6 @@ // convenient in that is can be used inline in the call, and fast in that it // avoids copying the results of the call from a char* into a string. // -// |length_with_null| must be at least 2, since otherwise the underlying string -// would have size 0, and trying to access &((*str)[0]) in that case can result -// in a number of problems. -// // Internally, this takes linear time because the resize() call 0-fills the // underlying array for potentially all // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we @@ -471,9 +488,11 @@ BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); -// Does the opposite of SplitString()/SplitStringPiece(). Joins a vector or list -// of strings into a single string, inserting |separator| (which may be empty) -// in between all elements. +// Joins a vector or list of strings into a single string, inserting |separator| +// (which may be empty) in between all elements. +// +// Note this is inverse of SplitString()/SplitStringPiece() defined in +// string_split.h. // // If possible, callers should build a vector of StringPieces and use the // StringPiece variant, so that they do not create unnecessary copies of @@ -517,6 +536,25 @@ const string16& a, size_t* offset); +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output); + +BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input, + TrimPositions positions); + +BASE_EXPORT bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::wstring* output); + +BASE_EXPORT WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions); + +BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); +#endif + } // namespace base #if defined(OS_WIN)
diff --git a/base/strings/string_util_constants.cc b/base/strings/string_util_constants.cc index 3ca29b7..e9e4d93 100644 --- a/base/strings/string_util_constants.cc +++ b/base/strings/string_util_constants.cc
@@ -6,61 +6,48 @@ namespace gurl_base { -#define WHITESPACE_UNICODE \ - 0x0009, /* CHARACTER TABULATION */ \ - 0x000A, /* LINE FEED (LF) */ \ - 0x000B, /* LINE TABULATION */ \ - 0x000C, /* FORM FEED (FF) */ \ - 0x000D, /* CARRIAGE RETURN (CR) */ \ - 0x0020, /* SPACE */ \ - 0x0085, /* NEXT LINE (NEL) */ \ - 0x00A0, /* NO-BREAK SPACE */ \ - 0x1680, /* OGHAM SPACE MARK */ \ - 0x2000, /* EN QUAD */ \ - 0x2001, /* EM QUAD */ \ - 0x2002, /* EN SPACE */ \ - 0x2003, /* EM SPACE */ \ - 0x2004, /* THREE-PER-EM SPACE */ \ - 0x2005, /* FOUR-PER-EM SPACE */ \ - 0x2006, /* SIX-PER-EM SPACE */ \ - 0x2007, /* FIGURE SPACE */ \ - 0x2008, /* PUNCTUATION SPACE */ \ - 0x2009, /* THIN SPACE */ \ - 0x200A, /* HAIR SPACE */ \ - 0x2028, /* LINE SEPARATOR */ \ - 0x2029, /* PARAGRAPH SEPARATOR */ \ - 0x202F, /* NARROW NO-BREAK SPACE */ \ - 0x205F, /* MEDIUM MATHEMATICAL SPACE */ \ - 0x3000, /* IDEOGRAPHIC SPACE */ \ - 0 +#define WHITESPACE_ASCII_NO_CR_LF \ + 0x09, /* CHARACTER TABULATION */ \ + 0x0B, /* LINE TABULATION */ \ + 0x0C, /* FORM FEED (FF) */ \ + 0x20 /* SPACE */ -const wchar_t kWhitespaceWide[] = { - WHITESPACE_UNICODE -}; +#define WHITESPACE_ASCII \ + WHITESPACE_ASCII_NO_CR_LF, /* Comment to make clang-format linebreak */ \ + 0x0A, /* LINE FEED (LF) */ \ + 0x0D /* CARRIAGE RETURN (CR) */ -const char16 kWhitespaceUTF16[] = { - WHITESPACE_UNICODE -}; +#define WHITESPACE_UNICODE_NON_ASCII \ + 0x0085, /* NEXT LINE (NEL) */ \ + 0x00A0, /* NO-BREAK SPACE */ \ + 0x1680, /* OGHAM SPACE MARK */ \ + 0x2000, /* EN QUAD */ \ + 0x2001, /* EM QUAD */ \ + 0x2002, /* EN SPACE */ \ + 0x2003, /* EM SPACE */ \ + 0x2004, /* THREE-PER-EM SPACE */ \ + 0x2005, /* FOUR-PER-EM SPACE */ \ + 0x2006, /* SIX-PER-EM SPACE */ \ + 0x2007, /* FIGURE SPACE */ \ + 0x2008, /* PUNCTUATION SPACE */ \ + 0x2009, /* THIN SPACE */ \ + 0x200A, /* HAIR SPACE */ \ + 0x2028, /* LINE SEPARATOR */ \ + 0x2029, /* PARAGRAPH SEPARATOR */ \ + 0x202F, /* NARROW NO-BREAK SPACE */ \ + 0x205F, /* MEDIUM MATHEMATICAL SPACE */ \ + 0x3000 /* IDEOGRAPHIC SPACE */ -const char kWhitespaceASCII[] = { - 0x09, // CHARACTER TABULATION - 0x0A, // LINE FEED (LF) - 0x0B, // LINE TABULATION - 0x0C, // FORM FEED (FF) - 0x0D, // CARRIAGE RETURN (CR) - 0x20, // SPACE - 0 -}; +#define WHITESPACE_UNICODE_NO_CR_LF \ + WHITESPACE_ASCII_NO_CR_LF, WHITESPACE_UNICODE_NON_ASCII -const char16 kWhitespaceASCIIAs16[] = { - 0x09, // CHARACTER TABULATION - 0x0A, // LINE FEED (LF) - 0x0B, // LINE TABULATION - 0x0C, // FORM FEED (FF) - 0x0D, // CARRIAGE RETURN (CR) - 0x20, // SPACE - 0 -}; +#define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII + +const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0}; +const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0}; +const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0}; +const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0}; +const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0}; const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc index 51b4ee1..a2aab42 100644 --- a/base/strings/string_util_unittest.cc +++ b/base/strings/string_util_unittest.cc
@@ -69,6 +69,128 @@ return prev != output->length(); } +using TestFunction = bool (*)(StringPiece str); + +// Helper used to test IsStringUTF8{,AllowingNoncharacters}. +void TestStructurallyValidUtf8(TestFunction fn) { + EXPECT_TRUE(fn("abc")); + EXPECT_TRUE(fn("\xC2\x81")); + EXPECT_TRUE(fn("\xE1\x80\xBF")); + EXPECT_TRUE(fn("\xF1\x80\xA0\xBF")); + EXPECT_TRUE(fn("\xF1\x80\xA0\xBF")); + EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF")); + + // U+FEFF used as UTF-8 BOM. + // clang-format off + EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc")); + // clang-format on + + // Embedded nulls in canonical UTF-8 representation. + using std::string_literals::operator""s; + const std::string kEmbeddedNull = "embedded\0null"s; + EXPECT_TRUE(fn(kEmbeddedNull)); +} + +// Helper used to test IsStringUTF8{,AllowingNoncharacters}. +void TestStructurallyInvalidUtf8(TestFunction fn) { + // Invalid encoding of U+1FFFE (0x8F instead of 0x9F) + EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE")); + + // Surrogate code points + EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF")); + EXPECT_FALSE(fn("\xED\xA0\x8F")); + EXPECT_FALSE(fn("\xED\xBF\xBF")); + + // Overlong sequences + EXPECT_FALSE(fn("\xC0\x80")); // U+0000 + EXPECT_FALSE(fn("\xC1\x80\xC1\x81")); // "AB" + EXPECT_FALSE(fn("\xE0\x80\x80")); // U+0000 + EXPECT_FALSE(fn("\xE0\x82\x80")); // U+0080 + EXPECT_FALSE(fn("\xE0\x9F\xBF")); // U+07FF + EXPECT_FALSE(fn("\xF0\x80\x80\x8D")); // U+000D + EXPECT_FALSE(fn("\xF0\x80\x82\x91")); // U+0091 + EXPECT_FALSE(fn("\xF0\x80\xA0\x80")); // U+0800 + EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF")); // U+FEFF (BOM) + EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF")); // U+003F + EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5")); // U+00A5 + + // Beyond U+10FFFF (the upper limit of Unicode codespace) + EXPECT_FALSE(fn("\xF4\x90\x80\x80")); // U+110000 + EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF")); // 5 bytes + EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80")); // 6 bytes + + // BOM in UTF-16(BE|LE) + EXPECT_FALSE(fn("\xFE\xFF")); + EXPECT_FALSE(fn("\xFF\xFE")); + + // Strings in legacy encodings. We can certainly make up strings + // in a legacy encoding that are valid in UTF-8, but in real data, + // most of them are invalid as UTF-8. + + // cafe with U+00E9 in ISO-8859-1 + EXPECT_FALSE(fn("caf\xE9")); + // U+AC00, U+AC001 in EUC-KR + EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2")); + // U+4F60 U+597D in Big5 + EXPECT_FALSE(fn("\xA7\x41\xA6\x6E")); + // "abc" with U+201[CD] in windows-125[0-8] + // clang-format off + EXPECT_FALSE(fn("\x93" "abc\x94")); + // clang-format on + // U+0639 U+064E U+0644 U+064E in ISO-8859-6 + EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE")); + // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 + EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC")); + + // BOM in UTF-32(BE|LE) + using std::string_literals::operator""s; + const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s; + EXPECT_FALSE(fn(kUtf32BeBom)); + const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s; + EXPECT_FALSE(fn(kUtf32LeBom)); +} + +// Helper used to test IsStringUTF8{,AllowingNoncharacters}. +void TestNoncharacters(TestFunction fn, bool expected_result) { + EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result); // U+FDD0 + EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result); // U+FDDF + EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result); // U+FDEF + EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result); // U+FFFE + EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result); // U+FFFF + EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result); // U+01FFFE + EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result); // U+01FFFF + EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result); // U+02FFFE + EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result); // U+02FFFF + EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result); // U+03FFFE + EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result); // U+03FFFF + EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result); // U+04FFFE + EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result); // U+04FFFF + EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result); // U+05FFFE + EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result); // U+05FFFF + EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result); // U+06FFFE + EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result); // U+06FFFF + EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result); // U+07FFFE + EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result); // U+07FFFF + EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result); // U+08FFFE + EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result); // U+08FFFF + EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result); // U+09FFFE + EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result); // U+09FFFF + EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result); // U+0AFFFE + EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result); // U+0AFFFF + EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result); // U+0BFFFE + EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result); // U+0BFFFF + EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result); // U+0CFFFE + EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result); // U+0CFFFF + EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result); // U+0DFFFE + EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result); // U+0DFFFF + EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result); // U+0EFFFE + EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result); // U+0EFFFF + EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result); // U+0FFFFE + EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result); // U+0FFFFF + EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result); // U+10FFFE + EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result); // U+10FFFF +} + } // namespace TEST(StringUtilTest, TruncateUTF8ToByteSize) { @@ -380,69 +502,19 @@ } TEST(StringUtilTest, IsStringUTF8) { - EXPECT_TRUE(IsStringUTF8("abc")); - EXPECT_TRUE(IsStringUTF8("\xc2\x81")); - EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); - EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); - EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); - EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM + { + SCOPED_TRACE("IsStringUTF8"); + TestStructurallyValidUtf8(&IsStringUTF8); + TestStructurallyInvalidUtf8(&IsStringUTF8); + TestNoncharacters(&IsStringUTF8, false); + } - // surrogate code points - EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); - EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); - EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); - - // overlong sequences - EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 - EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" - EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 - EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 - EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff - EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D - EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 - EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 - EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) - EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F - EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 - - // Beyond U+10FFFF (the upper limit of Unicode codespace) - EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 - EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes - EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes - - // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) - EXPECT_FALSE(IsStringUTF8("\xfe\xff")); - EXPECT_FALSE(IsStringUTF8("\xff\xfe")); - EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); - EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); - - // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> - EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) - EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE - EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF - EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 - EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF - // Strings in legacy encodings. We can certainly make up strings - // in a legacy encoding that are valid in UTF-8, but in real data, - // most of them are invalid as UTF-8. - EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 - EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR - EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 - // "abc" with U+201[CD] in windows-125[0-8] - EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); - // U+0639 U+064E U+0644 U+064E in ISO-8859-6 - EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); - // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 - EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); - - // Check that we support Embedded Nulls. The first uses the canonical UTF-8 - // representation, and the second uses a 2-byte sequence. The second version - // is invalid UTF-8 since UTF-8 states that the shortest encoding for a - // given codepoint must be used. - static const char kEmbeddedNull[] = "embedded\0null"; - EXPECT_TRUE(IsStringUTF8( - std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); - EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); + { + SCOPED_TRACE("IsStringUTF8AllowingNoncharacters"); + TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters); + TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters); + TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true); + } } TEST(StringUtilTest, IsStringASCII) { @@ -1414,6 +1486,13 @@ WritesCorrectly(2); WritesCorrectly(5000); + // Validate that WriteInto handles 0-length strings + std::string empty; + const char kOriginal[] = "original"; + strncpy(WriteInto(&empty, 1), kOriginal, 0); + EXPECT_STREQ("", empty.c_str()); + EXPECT_EQ(0u, empty.size()); + // Validate that WriteInto doesn't modify other strings // when using a Copy-on-Write implementation. const char kLive[] = "live";
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc index 1a08ffb..cc58087 100644 --- a/base/strings/stringprintf.cc +++ b/base/strings/stringprintf.cc
@@ -39,18 +39,25 @@ va_list argptr) { return gurl_base::vswprintf(buffer, buf_size, format, argptr); } +inline int vsnprintfT(char16_t* buffer, + size_t buf_size, + const char16_t* format, + va_list argptr) { + return gurl_base::vswprintf(reinterpret_cast<wchar_t*>(buffer), buf_size, + reinterpret_cast<const wchar_t*>(format), argptr); +} #endif // Templatized backend for StringPrintF/StringAppendF. This does not finalize // the va_list, the caller is expected to do that. -template <class StringType> -static void StringAppendVT(StringType* dst, - const typename StringType::value_type* format, +template <class CharT> +static void StringAppendVT(std::basic_string<CharT>* dst, + const CharT* format, va_list ap) { // First try with a small fixed size buffer. // This buffer size should be kept in sync with StringUtilTest.GrowBoundary // and StringUtilTest.StringPrintfBounds. - typename StringType::value_type stack_buf[1024]; + CharT stack_buf[1024]; va_list ap_copy; va_copy(ap_copy, ap); @@ -93,7 +100,7 @@ return; } - std::vector<typename StringType::value_type> mem_buf(mem_length); + std::vector<CharT> mem_buf(mem_length); // NOTE: You can only use a va_list once. Since we're in a while loop, we // need to make a new copy each time so we don't use up the original. @@ -129,6 +136,15 @@ va_end(ap); return result; } + +std::u16string StringPrintf(const char16_t* format, ...) { + va_list ap; + va_start(ap, format); + std::u16string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} #endif std::string StringPrintV(const char* format, va_list ap) { @@ -156,6 +172,17 @@ va_end(ap); return *dst; } + +const std::u16string& SStringPrintf(std::u16string* dst, + const char16_t* format, + ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} #endif void StringAppendF(std::string* dst, const char* format, ...) { @@ -172,6 +199,13 @@ StringAppendV(dst, format, ap); va_end(ap); } + +void StringAppendF(std::u16string* dst, const char16_t* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} #endif void StringAppendV(std::string* dst, const char* format, va_list ap) { @@ -182,6 +216,10 @@ void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) { StringAppendVT(dst, format, ap); } + +void StringAppendV(std::u16string* dst, const char16_t* format, va_list ap) { + StringAppendVT(dst, format, ap); +} #endif } // namespace base
diff --git a/base/strings/stringprintf.h b/base/strings/stringprintf.h index 2abdb68..5768bcc 100644 --- a/base/strings/stringprintf.h +++ b/base/strings/stringprintf.h
@@ -19,8 +19,14 @@ BASE_EXPORT std::string StringPrintf(const char* format, ...) PRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; #if defined(OS_WIN) +// Note: Unfortunately compile time checking of the format string for UTF-16 +// strings is not supported by any compiler, thus these functions should be used +// carefully and sparingly. Also applies to SStringPrintf and StringAppendV +// below. BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...) WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; +BASE_EXPORT std::u16string StringPrintf(const char16_t* format, ...) + WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; #endif // Return a C++ string given vprintf-like input. @@ -35,6 +41,9 @@ BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst, const wchar_t* format, ...) WPRINTF_FORMAT(2, 3); +BASE_EXPORT const std::u16string& SStringPrintf(std::u16string* dst, + const char16_t* format, + ...) WPRINTF_FORMAT(2, 3); #endif // Append result to a supplied string. @@ -43,6 +52,8 @@ #if defined(OS_WIN) BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...) WPRINTF_FORMAT(2, 3); +BASE_EXPORT void StringAppendF(std::u16string* dst, const char16_t* format, ...) + WPRINTF_FORMAT(2, 3); #endif // Lower-level routine that takes a va_list and appends to a specified @@ -53,6 +64,9 @@ BASE_EXPORT void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) WPRINTF_FORMAT(2, 0); +BASE_EXPORT void StringAppendV(std::u16string* dst, + const char16_t* format, + va_list ap) WPRINTF_FORMAT(2, 0); #endif } // namespace base
diff --git a/base/strings/stringprintf_unittest.cc b/base/strings/stringprintf_unittest.cc index 59e3403..c2e8707 100644 --- a/base/strings/stringprintf_unittest.cc +++ b/base/strings/stringprintf_unittest.cc
@@ -18,7 +18,10 @@ // A helper for the StringAppendV test that follows. // // Just forwards its args to StringAppendV. -static void StringAppendVTestHelper(std::string* out, const char* format, ...) { +template <class CharT> +static void StringAppendVTestHelper(std::basic_string<CharT>* out, + const CharT* format, + ...) { va_list ap; va_start(ap, format); StringAppendV(out, format, ap); @@ -35,6 +38,7 @@ EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w')); #if defined(OS_WIN) EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w')); + EXPECT_EQ(u"123hello w", StringPrintf(u"%3d%2ls %1lc", 123, u"hello", 'w')); #endif } @@ -47,6 +51,10 @@ std::wstring valuew(L"Hello"); StringAppendF(&valuew, L"%ls", L""); EXPECT_EQ(L"Hello", valuew); + + std::u16string value16(u"Hello"); + StringAppendF(&value16, u"%ls", u""); + EXPECT_EQ(u"Hello", value16); #endif } @@ -59,6 +67,10 @@ std::wstring valuew(L"Hello"); StringAppendF(&valuew, L" %ls", L"World"); EXPECT_EQ(L"Hello World", valuew); + + std::u16string value16(u"Hello"); + StringAppendF(&value16, u" %ls", u"World"); + EXPECT_EQ(u"Hello World", value16); #endif } @@ -71,6 +83,10 @@ std::wstring valuew(L"Hello"); StringAppendF(&valuew, L" %d", 123); EXPECT_EQ(L"Hello 123", valuew); + + std::u16string value16(u"Hello"); + StringAppendF(&value16, u" %d", 123); + EXPECT_EQ(u"Hello 123", value16); #endif } @@ -79,12 +95,13 @@ TEST(StringPrintfTest, StringPrintfBounds) { const int kSrcLen = 1026; char src[kSrcLen]; - for (auto& i : src) - i = 'A'; + std::fill_n(src, kSrcLen, 'A'); wchar_t srcw[kSrcLen]; - for (auto& i : srcw) - i = 'A'; + std::fill_n(srcw, kSrcLen, 'A'); + + char16_t src16[kSrcLen]; + std::fill_n(src16, kSrcLen, 'A'); for (int i = 1; i < 3; i++) { src[kSrcLen - i] = 0; @@ -97,6 +114,14 @@ std::wstring outw; SStringPrintf(&outw, L"%ls", srcw); EXPECT_STREQ(srcw, outw.c_str()); + + src16[kSrcLen - i] = 0; + std::u16string out16; + SStringPrintf(&out16, u"%ls", src16); + // EXPECT_STREQ does not support const char16_t* strings yet. + // Dispatch to the const wchar_t* overload instead. + EXPECT_STREQ(reinterpret_cast<const wchar_t*>(src16), + reinterpret_cast<const wchar_t*>(out16.c_str())); #endif } } @@ -129,6 +154,16 @@ std::string out; StringAppendVTestHelper(&out, "%d foo %s", 1, "bar"); EXPECT_EQ("1 foo bar", out); + +#if defined(OS_WIN) + std::wstring outw; + StringAppendVTestHelper(&outw, L"%d foo %ls", 1, L"bar"); + EXPECT_EQ(L"1 foo bar", outw); + + std::u16string out16; + StringAppendVTestHelper(&out16, u"%d foo %ls", 1, u"bar"); + EXPECT_EQ(u"1 foo bar", out16); +#endif } // Test the boundary condition for the size of the string_util's @@ -151,9 +186,6 @@ } #if defined(OS_WIN) -// vswprintf in Visual Studio 2013 fails when given U+FFFF. This tests that the -// failure case is gracefuly handled. In Visual Studio 2015 the bad character -// is passed through. TEST(StringPrintfTest, Invalid) { wchar_t invalid[2]; invalid[0] = 0xffff; @@ -161,11 +193,7 @@ std::wstring out; SStringPrintf(&out, L"%ls", invalid); -#if _MSC_VER >= 1900 EXPECT_STREQ(invalid, out.c_str()); -#else - EXPECT_STREQ(L"", out.c_str()); -#endif } #endif
diff --git a/base/strings/sys_string_conversions.h b/base/strings/sys_string_conversions.h index 08082ae..7c3c575 100644 --- a/base/strings/sys_string_conversions.h +++ b/base/strings/sys_string_conversions.h
@@ -31,14 +31,17 @@ // Converts between wide and UTF-8 representations of a string. On error, the // result is system-dependent. -BASE_EXPORT std::string SysWideToUTF8(const std::wstring& wide); -BASE_EXPORT std::wstring SysUTF8ToWide(StringPiece utf8); +BASE_EXPORT std::string SysWideToUTF8(const std::wstring& wide) + WARN_UNUSED_RESULT; +BASE_EXPORT std::wstring SysUTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT; // Converts between wide and the system multi-byte representations of a string. // DANGER: This will lose information and can change (on Windows, this can // change between reboots). -BASE_EXPORT std::string SysWideToNativeMB(const std::wstring& wide); -BASE_EXPORT std::wstring SysNativeMBToWide(StringPiece native_mb); +BASE_EXPORT std::string SysWideToNativeMB(const std::wstring& wide) + WARN_UNUSED_RESULT; +BASE_EXPORT std::wstring SysNativeMBToWide(StringPiece native_mb) + WARN_UNUSED_RESULT; // Windows-specific ------------------------------------------------------------ @@ -47,9 +50,11 @@ // Converts between 8-bit and wide strings, using the given code page. The // code page identifier is one accepted by the Windows function // MultiByteToWideChar(). -BASE_EXPORT std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page); +BASE_EXPORT std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page) + WARN_UNUSED_RESULT; BASE_EXPORT std::string SysWideToMultiByte(const std::wstring& wide, - uint32_t code_page); + uint32_t code_page) + WARN_UNUSED_RESULT; #endif // defined(OS_WIN) @@ -61,21 +66,25 @@ // Creates a string, and returns it with a refcount of 1. You are responsible // for releasing it. Returns NULL on failure. -BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8); -BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16); +BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8) + WARN_UNUSED_RESULT; +BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16) + WARN_UNUSED_RESULT; // Same, but returns an autoreleased NSString. -BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8); -BASE_EXPORT NSString* SysUTF16ToNSString(StringPiece16 utf16); +BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8) WARN_UNUSED_RESULT; +BASE_EXPORT NSString* SysUTF16ToNSString(StringPiece16 utf16) + WARN_UNUSED_RESULT; // Converts a CFStringRef to an STL string. Returns an empty string on failure. -BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref); -BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref); +BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref) + WARN_UNUSED_RESULT; +BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref) WARN_UNUSED_RESULT; // Same, but accepts NSString input. Converts nil NSString* to the appropriate // string type of length 0. -BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref); -BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref); +BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref) WARN_UNUSED_RESULT; +BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT; #endif // defined(OS_MACOSX)
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc index 5bf7967..7d00bb4 100644 --- a/base/strings/utf_offset_string_conversions.cc +++ b/base/strings/utf_offset_string_conversions.cc
@@ -90,16 +90,22 @@ auto adjusted_iter = adjustments_on_adjusted_string->begin(); auto first_iter = first_adjustments.begin(); // Simultaneously iterate over all |adjustments_on_adjusted_string| and - // |first_adjustments|, adding adjustments to or correcting the adjustments - // in |adjustments_on_adjusted_string| as we go. |shift| keeps track of the - // current number of characters collapsed by |first_adjustments| up to this - // point. |currently_collapsing| keeps track of the number of characters - // collapsed by |first_adjustments| into the current |adjusted_iter|'s - // length. These are characters that will change |shift| as soon as we're - // done processing the current |adjusted_iter|; they are not yet reflected in - // |shift|. + // |first_adjustments|, pushing adjustments at the end of + // |adjustments_builder| as we go. |shift| keeps track of the current number + // of characters collapsed by |first_adjustments| up to this point. + // |currently_collapsing| keeps track of the number of characters collapsed by + // |first_adjustments| into the current |adjusted_iter|'s length. These are + // characters that will change |shift| as soon as we're done processing the + // current |adjusted_iter|; they are not yet reflected in |shift|. size_t shift = 0; size_t currently_collapsing = 0; + // While we *could* update |adjustments_on_adjusted_string| in place by + // inserting new adjustments into the middle, we would be repeatedly calling + // |std::vector::insert|. That would cost O(n) time per insert, relative to + // distance from end of the string. By instead allocating + // |adjustments_builder| and calling |std::vector::push_back|, we only pay + // amortized constant time per push. We are trading space for time. + Adjustments adjustments_builder; while (adjusted_iter != adjustments_on_adjusted_string->end()) { if ((first_iter == first_adjustments.end()) || ((adjusted_iter->original_offset + shift + @@ -112,6 +118,7 @@ adjusted_iter->original_offset += shift; shift += currently_collapsing; currently_collapsing = 0; + adjustments_builder.push_back(*adjusted_iter); ++adjusted_iter; } else if ((adjusted_iter->original_offset + shift) > first_iter->original_offset) { @@ -127,15 +134,9 @@ GURL_DCHECK_LE(first_iter->original_offset + first_iter->output_length, adjusted_iter->original_offset + shift); - // Add the |first_adjustment_iter| to the full set of adjustments while - // making sure |adjusted_iter| continues pointing to the same element. - // We do this by inserting the |first_adjustment_iter| right before - // |adjusted_iter|, then incrementing |adjusted_iter| so it points to - // the following element. + // Add the |first_iter| to the full set of adjustments. shift += first_iter->original_length - first_iter->output_length; - adjusted_iter = adjustments_on_adjusted_string->insert( - adjusted_iter, *first_iter); - ++adjusted_iter; + adjustments_builder.push_back(*first_iter); ++first_iter; } else { // The first adjustment adjusted something that then got further adjusted @@ -168,10 +169,10 @@ // (Their offsets are already correct with respect to the original string.) // Append them all. GURL_DCHECK(adjusted_iter == adjustments_on_adjusted_string->end()); - adjustments_on_adjusted_string->insert( - adjustments_on_adjusted_string->end(), first_iter, - first_adjustments.end()); + adjustments_builder.insert(adjustments_builder.end(), first_iter, + first_adjustments.end()); } + *adjustments_on_adjusted_string = std::move(adjustments_builder); } // Converts the given source Unicode character type to the given destination
diff --git a/base/strings/utf_offset_string_conversions.h b/base/strings/utf_offset_string_conversions.h index 8902ee5..c2e2ba7 100644 --- a/base/strings/utf_offset_string_conversions.h +++ b/base/strings/utf_offset_string_conversions.h
@@ -98,7 +98,7 @@ gurl_base::OffsetAdjuster::Adjustments* adjustments); BASE_EXPORT string16 UTF8ToUTF16WithAdjustments( const gurl_base::StringPiece& utf8, - gurl_base::OffsetAdjuster::Adjustments* adjustments); + gurl_base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT; // As above, but instead internally examines the adjustments and applies them // to |offsets_for_adjustment|. Input offsets greater than the length of the // input string will be set to string16::npos. See comments by AdjustOffsets().
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h index 84d18f7..075832e 100644 --- a/base/strings/utf_string_conversion_utils.h +++ b/base/strings/utf_string_conversion_utils.h
@@ -17,16 +17,19 @@ namespace gurl_base { inline bool IsValidCodepoint(uint32_t code_point) { - // Excludes the surrogate code points ([0xD800, 0xDFFF]) and - // codepoints larger than 0x10FFFF (the highest codepoint allowed). - // Non-characters and unassigned codepoints are allowed. + // Excludes code points that are not Unicode scalar values, i.e. + // surrogate code points ([0xD800, 0xDFFF]). Additionally, excludes + // code points larger than 0x10FFFF (the highest codepoint allowed). + // Non-characters and unassigned code points are allowed. + // https://unicode.org/glossary/#unicode_scalar_value return code_point < 0xD800u || (code_point >= 0xE000u && code_point <= 0x10FFFFu); } inline bool IsValidCharacter(uint32_t code_point) { - // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in - // 0xFFFE or 0xFFFF) from the set of valid code points. + // Excludes non-characters (U+FDD0..U+FDEF, and all code points + // ending in 0xFFFE or 0xFFFF) from the set of valid code points. + // https://unicode.org/faq/private_use.html#nonchar1 return code_point < 0xD800u || (code_point >= 0xE000u && code_point < 0xFDD0u) || (code_point > 0xFDEFu && code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
diff --git a/base/strings/utf_string_conversions.h b/base/strings/utf_string_conversions.h index e64f420..745372c 100644 --- a/base/strings/utf_string_conversions.h +++ b/base/strings/utf_string_conversions.h
@@ -23,31 +23,31 @@ // possible. BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output); -BASE_EXPORT std::string WideToUTF8(WStringPiece wide); +BASE_EXPORT std::string WideToUTF8(WStringPiece wide) WARN_UNUSED_RESULT; BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output); -BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8); +BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT; BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output); -BASE_EXPORT string16 WideToUTF16(WStringPiece wide); +BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT; BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output); -BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16); +BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT; BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); -BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8); +BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT; BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output); -BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16); +BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT; // This converts an ASCII string, typically a hardcoded constant, to a UTF16 // string. -BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii); +BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT; // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII // beforehand. -BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16); +BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT; } // namespace base
diff --git a/base/strings/utf_string_conversions_fuzzer.cc b/base/strings/utf_string_conversions_fuzzer.cc index 96bccda..55e75f7 100644 --- a/base/strings/utf_string_conversions_fuzzer.cc +++ b/base/strings/utf_string_conversions_fuzzer.cc
@@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "base/macros.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" @@ -14,10 +15,10 @@ gurl_base::StringPiece string_piece_input(reinterpret_cast<const char*>(data), size); - gurl_base::UTF8ToWide(string_piece_input); + ignore_result(gurl_base::UTF8ToWide(string_piece_input)); gurl_base::UTF8ToWide(reinterpret_cast<const char*>(data), size, &output_std_wstring); - gurl_base::UTF8ToUTF16(string_piece_input); + ignore_result(gurl_base::UTF8ToUTF16(string_piece_input)); gurl_base::UTF8ToUTF16(reinterpret_cast<const char*>(data), size, &output_string16); @@ -25,10 +26,10 @@ if (size % 2 == 0) { gurl_base::StringPiece16 string_piece_input16( reinterpret_cast<const gurl_base::char16*>(data), size / 2); - gurl_base::UTF16ToWide(output_string16); + ignore_result(gurl_base::UTF16ToWide(output_string16)); gurl_base::UTF16ToWide(reinterpret_cast<const gurl_base::char16*>(data), size / 2, &output_std_wstring); - gurl_base::UTF16ToUTF8(string_piece_input16); + ignore_result(gurl_base::UTF16ToUTF8(string_piece_input16)); gurl_base::UTF16ToUTF8(reinterpret_cast<const gurl_base::char16*>(data), size / 2, &output_std_string); } @@ -36,10 +37,10 @@ // Test for wchar_t. size_t wchar_t_size = sizeof(wchar_t); if (size % wchar_t_size == 0) { - gurl_base::WideToUTF8(output_std_wstring); + ignore_result(gurl_base::WideToUTF8(output_std_wstring)); gurl_base::WideToUTF8(reinterpret_cast<const wchar_t*>(data), size / wchar_t_size, &output_std_string); - gurl_base::WideToUTF16(output_std_wstring); + ignore_result(gurl_base::WideToUTF16(output_std_wstring)); gurl_base::WideToUTF16(reinterpret_cast<const wchar_t*>(data), size / wchar_t_size, &output_string16); } @@ -49,7 +50,7 @@ if (gurl_base::IsStringASCII(string_piece_input)) { output_string16 = gurl_base::ASCIIToUTF16(string_piece_input); gurl_base::StringPiece16 string_piece_input16(output_string16); - gurl_base::UTF16ToASCII(string_piece_input16); + ignore_result(gurl_base::UTF16ToASCII(string_piece_input16)); } return 0;
diff --git a/build/build_config.h b/build/build_config.h index 0d87d80..688b779 100644 --- a/build/build_config.h +++ b/build/build_config.h
@@ -63,7 +63,7 @@ #define OS_QNX 1 #elif defined(_AIX) #define OS_AIX 1 -#elif defined(__asmjs__) +#elif defined(__asmjs__) || defined(__wasm__) #define OS_ASMJS #else #error Please add support for your platform in build/build_config.h @@ -139,7 +139,7 @@ #define ARCH_CPU_ARM64 1 #define ARCH_CPU_64_BITS 1 #define ARCH_CPU_LITTLE_ENDIAN 1 -#elif defined(__pnacl__) || defined(__asmjs__) +#elif defined(__pnacl__) || defined(__asmjs__) || defined(__wasm__) #define ARCH_CPU_32_BITS 1 #define ARCH_CPU_LITTLE_ENDIAN 1 #elif defined(__MIPSEL__)
diff --git a/polyfills/base/logging.h b/polyfills/base/logging.h index def1745..41ddacd 100644 --- a/polyfills/base/logging.h +++ b/polyfills/base/logging.h
@@ -24,10 +24,12 @@ #define GURL_CHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_CHECK(statement) GurlFakeLogSink({statement}) #define GURL_DCHECK_EQ(statement, statement2) GurlFakeLogSink({statement, statement2}) +#define GURL_DCHECK_GE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_DCHECK_GT(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_DCHECK_IS_ON() false #define GURL_DCHECK_LE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_DCHECK_LT(statement, statement2) GurlFakeLogSink({statement, statement2}) +#define GURL_DCHECK_NE(statement, statement2) GurlFakeLogSink({statement, statement2}) #define GURL_DCHECK(statement) GurlFakeLogSink({statement}) #define GURL_DLOG(severity) GurlFakeLogSink(true) #define GURL_LOG(severity) GurlFakeLogSink(true)
diff --git a/url/gurl.cc b/url/gurl.cc index c8e424f..36c5ee2 100644 --- a/url/gurl.cc +++ b/url/gurl.cc
@@ -396,14 +396,14 @@ return ComponentString(file_component); } -std::string GURL::PathForRequest() const { +gurl_base::StringPiece GURL::PathForRequestPiece() const { GURL_DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty"; if (parsed_.ref.len >= 0) { // Clip off the reference when it exists. The reference starts after the // #-sign, so we have to subtract one to also remove it. - return std::string(spec_, parsed_.path.begin, - parsed_.ref.begin - parsed_.path.begin - 1); + return gurl_base::StringPiece(&spec_[parsed_.path.begin], + parsed_.ref.begin - parsed_.path.begin - 1); } // Compute the actual path length, rather than depending on the spec's // terminator. If we're an inner_url, our spec continues on into our outer @@ -412,7 +412,11 @@ if (parsed_.query.is_valid()) path_len = parsed_.query.end() - parsed_.path.begin; - return std::string(spec_, parsed_.path.begin, path_len); + return gurl_base::StringPiece(&spec_[parsed_.path.begin], path_len); +} + +std::string GURL::PathForRequest() const { + return PathForRequestPiece().as_string(); } std::string GURL::HostNoBrackets() const {
diff --git a/url/gurl.h b/url/gurl.h index 8c026f7..73d2b43 100644 --- a/url/gurl.h +++ b/url/gurl.h
@@ -386,6 +386,9 @@ // parameter, and query portions of the URL. It is guaranteed to be ASCII. std::string PathForRequest() const; + // Returns the same characters as PathForRequest(), avoiding a copy. + gurl_base::StringPiece PathForRequestPiece() const; + // Returns the host, excluding the square brackets surrounding IPv6 address // literals. This can be useful for passing to getaddrinfo(). std::string HostNoBrackets() const;
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index 0375eae..b114920 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc
@@ -289,21 +289,42 @@ bool expected_valid; const char* expected; } resolve_cases[] = { - {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"}, - {"http://www.google.com/foo/", "bar", true, "http://www.google.com/foo/bar"}, - {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"}, - {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"}, - {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, - {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"}, - {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, - {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, - {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, + {"http://www.google.com/", "foo.html", true, + "http://www.google.com/foo.html"}, + {"http://www.google.com/foo/", "bar", true, + "http://www.google.com/foo/bar"}, + {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"}, + {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"}, + {"http://www.google.com/", "http://images.google.com/foo.html", true, + "http://images.google.com/foo.html"}, + {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", + true, "http://images.google.com/foo.html"}, + {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", + true, "http://www.google.com/hello/world.html?a#b"}, + {"http://www.google.com/foo#bar", "#com", true, + "http://www.google.com/foo#com"}, + {"http://www.google.com/", "Https:images.google.com", true, + "https://images.google.com/"}, // A non-standard base can be replaced with a standard absolute URL. - {"data:blahblah", "http://google.com/", true, "http://google.com/"}, - {"data:blahblah", "http:google.com", true, "http://google.com/"}, + {"data:blahblah", "http://google.com/", true, "http://google.com/"}, + {"data:blahblah", "http:google.com", true, "http://google.com/"}, // Filesystem URLs have different paths to test. - {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, - {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, + {"filesystem:http://www.google.com/type/", "foo.html", true, + "filesystem:http://www.google.com/type/foo.html"}, + {"filesystem:http://www.google.com/type/", "../foo.html", true, + "filesystem:http://www.google.com/type/foo.html"}, + // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:" + // or "x1:" valid schemes) when deciding if |relative| is an absolute url. + {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true, + "file:///some/dir/10.0.0.7:8080/foo.html"}, + {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"}, + {"file:///some/dir/", "x1://host", true, "x1://host"}, + {"file:///some/dir/", "X1://host", true, "x1://host"}, + {"file:///some/dir/", "x.://host", true, "x.://host"}, + {"file:///some/dir/", "x+://host", true, "x+://host"}, + {"file:///some/dir/", "x-://host", true, "x-://host"}, + {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"}, + {"file:///some/dir/", "://host", true, "file:///some/dir/://host"}, }; for (size_t i = 0; i < gurl_base::size(resolve_cases); i++) { @@ -539,11 +560,14 @@ for (size_t i = 0; i < gurl_base::size(cases); i++) { GURL url(cases[i].input); - std::string path_request = url.PathForRequest(); - EXPECT_EQ(cases[i].expected, path_request); + EXPECT_EQ(cases[i].expected, url.PathForRequest()); + EXPECT_EQ(cases[i].expected, url.PathForRequestPiece()); EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL); - if (url.inner_url() && cases[i].inner_expected) + if (url.inner_url() && cases[i].inner_expected) { EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest()); + EXPECT_EQ(cases[i].inner_expected, + url.inner_url()->PathForRequestPiece()); + } } } @@ -567,11 +591,6 @@ {"ftp://www.google.com:21/", 21}, {"ftp://www.google.com:80/", 80}, - // gopher - {"gopher://www.google.com/", 70}, - {"gopher://www.google.com:70/", 70}, - {"gopher://www.google.com:80/", 80}, - // file - no port {"file://www.google.com/", PORT_UNSPECIFIED}, {"file://www.google.com:443/", PORT_UNSPECIFIED},
diff --git a/url/origin.cc b/url/origin.cc index 6eda15e..16e93b0 100644 --- a/url/origin.cc +++ b/url/origin.cc
@@ -7,8 +7,12 @@ #include <stdint.h> #include <algorithm> +#include <vector> +#include "base/base64.h" +#include "base/containers/span.h" #include "polyfills/base/logging.h" +#include "base/pickle.h" #include "base/stl_util.h" #include "base/strings/strcat.h" #include "base/strings/string_number_conversions.h" @@ -42,12 +46,12 @@ // It's SchemeHostPort's responsibility to filter out unrecognized schemes; // sanity check that this is happening. - GURL_DCHECK(tuple.IsInvalid() || url.IsStandard() || + GURL_DCHECK(!tuple.IsValid() || url.IsStandard() || gurl_base::Contains(GetLocalSchemes(), url.scheme_piece()) || AllowNonStandardSchemesForAndroidWebView()); } - if (tuple.IsInvalid()) + if (!tuple.IsValid()) return Origin(); return Origin(std::move(tuple)); } @@ -74,7 +78,7 @@ uint16_t port) { SchemeHostPort tuple(scheme.as_string(), host.as_string(), port, SchemeHostPort::CHECK_CANONICALIZATION); - if (tuple.IsInvalid()) + if (!tuple.IsValid()) return gurl_base::nullopt; return Origin(std::move(tuple)); } @@ -91,7 +95,7 @@ // For opaque origins, it is okay for the SchemeHostPort to be invalid; // however, this should only arise when the arguments indicate the // canonical representation of the invalid SchemeHostPort. - if (precursor.IsInvalid() && + if (!precursor.IsValid() && !(precursor_scheme.empty() && precursor_host.empty() && precursor_port == 0)) { return gurl_base::nullopt; @@ -105,7 +109,7 @@ uint16_t port) { SchemeHostPort tuple(std::move(scheme), std::move(host), port, SchemeHostPort::ALREADY_CANONICALIZED); - if (tuple.IsInvalid()) + if (!tuple.IsValid()) return Origin(); return Origin(std::move(tuple)); } @@ -171,7 +175,7 @@ // And if it is unique opaque origin, it definitely is fine. But if there // is a precursor stored, we should fall through to compare the tuples. - if (tuple_.IsInvalid()) + if (!tuple_.IsValid()) return true; } @@ -198,7 +202,7 @@ // opaque origin. It is valid case, as any browser-initiated navigation // to about:blank or data: URL will result in a document with such // origin and it is valid for it to create blob: URLs. - if (tuple_.IsInvalid()) + if (!tuple_.IsValid()) return true; url_tuple = SchemeHostPort(GURL(url.GetContent())); @@ -221,7 +225,7 @@ // If |this| does not have valid precursor tuple, it is unique opaque origin, // which is what we expect non-standard schemes to get. - if (tuple_.IsInvalid()) + if (!tuple_.IsValid()) return true; // However, when there is precursor present, the schemes must match. @@ -257,7 +261,7 @@ : nonce_->raw_token().ToString(); std::string out = gurl_base::StrCat({Serialize(), " [internally: (", nonce, ")"}); - if (tuple_.IsInvalid()) + if (!tuple_.IsValid()) gurl_base::StrAppend(&out, {" anonymous]"}); else gurl_base::StrAppend(&out, {" derived from ", tuple_.Serialize(), "]"}); @@ -266,7 +270,7 @@ Origin::Origin(SchemeHostPort tuple) : tuple_(std::move(tuple)) { GURL_DCHECK(!opaque()); - GURL_DCHECK(!tuple_.IsInvalid()); + GURL_DCHECK(tuple_.IsValid()); } // Constructs an opaque origin derived from |precursor|. @@ -279,6 +283,82 @@ GURL_DCHECK_EQ(0U, port()); } +// The pickle is saved in the following format, in order: +// string - tuple_.GetURL().spec(). +// uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized. +// uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized. +gurl_base::Optional<std::string> Origin::SerializeWithNonce() const { + if (!opaque() && !tuple_.IsValid()) + return gurl_base::nullopt; + + gurl_base::Pickle pickle; + pickle.WriteString(tuple_.Serialize()); + if (opaque() && !nonce_->raw_token().is_empty()) { + pickle.WriteUInt64(nonce_->token().GetHighForSerialization()); + pickle.WriteUInt64(nonce_->token().GetLowForSerialization()); + } else if (opaque()) { + // Nonce hasn't been initialized. + pickle.WriteUInt64(0); + pickle.WriteUInt64(0); + } + + gurl_base::span<const uint8_t> data( + static_cast<const uint8_t*>(pickle.data()), + static_cast<const uint8_t*>(pickle.data()) + pickle.size()); + // Base64 encode the data to make it nicer to play with. + return gurl_base::Base64Encode(data); +} + +// static +gurl_base::Optional<Origin> Origin::Deserialize(const std::string& value) { + std::string data; + if (!gurl_base::Base64Decode(value, &data)) + return gurl_base::nullopt; + gurl_base::Pickle pickle(reinterpret_cast<char*>(&data[0]), data.size()); + gurl_base::PickleIterator reader(pickle); + + std::string pickled_url; + if (!reader.ReadString(&pickled_url)) + return gurl_base::nullopt; + GURL url(pickled_url); + + // If only a tuple was serialized, then this origin is not opaque. For opaque + // origins, we expect two uint64's to be left in the pickle. + bool is_opaque = !reader.ReachedEnd(); + + // Opaque origins without a tuple are ok. + if (!is_opaque && !url.is_valid()) + return gurl_base::nullopt; + SchemeHostPort tuple(url); + + // Possible successful early return if the pickled Origin was not opaque. + if (!is_opaque) { + Origin origin(tuple); + if (origin.opaque()) + return gurl_base::nullopt; // Something went horribly wrong. + return origin; + } + + uint64_t nonce_high = 0; + if (!reader.ReadUInt64(&nonce_high)) + return gurl_base::nullopt; + + uint64_t nonce_low = 0; + if (!reader.ReadUInt64(&nonce_low)) + return gurl_base::nullopt; + + Origin::Nonce nonce; + if (nonce_high != 0 && nonce_low != 0) { + // The serialized nonce wasn't empty, so copy it here. + nonce = Origin::Nonce( + gurl_base::UnguessableToken::Deserialize(nonce_high, nonce_low)); + } + Origin origin; + origin.nonce_ = std::move(nonce); + origin.tuple_ = tuple; + return origin; +} + std::ostream& operator<<(std::ostream& out, const url::Origin& origin) { out << origin.GetDebugString(); return out; @@ -351,4 +431,17 @@ return !(*this == other); } +namespace debug { + +ScopedOriginCrashKey::ScopedOriginCrashKey( + gurl_base::debug::CrashKeyString* crash_key, + const url::Origin* value) + : gurl_base::debug::ScopedCrashKeyString( + crash_key, + value ? value->GetDebugString() : "nullptr") {} + +ScopedOriginCrashKey::~ScopedOriginCrashKey() = default; + +} // namespace debug + } // namespace url
diff --git a/url/origin.h b/url/origin.h index 58c9221..351c482 100644 --- a/url/origin.h +++ b/url/origin.h
@@ -7,21 +7,37 @@ #include <stdint.h> +#include <memory> #include <string> #include "polyfills/base/component_export.h" #include "polyfills/base/debug/alias.h" +#include "base/debug/crash_logging.h" #include "base/optional.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/unguessable_token.h" +#include "build/build_config.h" #include "ipc/ipc_param_traits.h" #include "url/scheme_host_port.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" #include "url/url_constants.h" +#if defined(OS_ANDROID) +#include <jni.h> + +namespace gurl_base { +namespace android { +template <typename> +class ScopedJavaLocalRef; +template <typename> +class JavaRef; +} // namespace android +} // namespace base +#endif // OS_ANDROID + class GURL; namespace blink { @@ -39,6 +55,11 @@ struct UrlOriginAdapter; } // namespace mojo +namespace net { +class NetworkIsolationKey; +class OpaqueNonTransientNetworkIsolationKeyTest; +} // namespace net + namespace url { namespace mojom { @@ -130,6 +151,9 @@ // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed // out of everything in the URL which follows the scheme). // 3. 'file' URLs all parse as ("file", "", 0). + // + // Note that the returned Origin may have a different scheme and host from + // |url| (e.g. in case of blob URLs - see OriginTest.ConstructFromGURL). static Origin Create(const GURL& url); // Creates an Origin for the resource |url| as if it were requested @@ -266,8 +290,16 @@ // and precursor information. std::string GetDebugString() const; +#if defined(OS_ANDROID) + gurl_base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const; + static Origin FromJavaObject( + const gurl_base::android::JavaRef<jobject>& java_origin); +#endif // OS_ANDROID + private: friend class blink::SecurityOrigin; + friend class net::NetworkIsolationKey; + friend class net::OpaqueNonTransientNetworkIsolationKeyTest; friend class OriginTest; friend struct mojo::UrlOriginAdapter; friend struct ipc_fuzzer::FuzzTraits<Origin>; @@ -362,6 +394,16 @@ // used only when trying to send an Origin across an IPC pipe. gurl_base::Optional<gurl_base::UnguessableToken> GetNonceForSerialization() const; + // Serializes this Origin, including its nonce if it is opaque. If an opaque + // origin's |tuple_| is invalid or the nonce isn't initialized, nullopt is + // returned. Use of this method should be limited as an opaque origin will + // never be matchable in future browser sessions. + gurl_base::Optional<std::string> SerializeWithNonce() const; + + // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the + // value was invalid in any way. + static gurl_base::Optional<Origin> Deserialize(const std::string& value); + // The tuple is used for both tuple origins (e.g. https://example.com:80), as // well as for opaque origins, where it tracks the tuple origin from which // the opaque origin was initially derived (we call this the "precursor" @@ -388,6 +430,21 @@ #define DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) \ DEBUG_ALIAS_FOR_CSTR(var_name, (origin).Serialize().c_str(), 128) +namespace debug { + +class COMPONENT_EXPORT(URL) ScopedOriginCrashKey + : public gurl_base::debug::ScopedCrashKeyString { + public: + ScopedOriginCrashKey(gurl_base::debug::CrashKeyString* crash_key, + const url::Origin* value); + ~ScopedOriginCrashKey(); + + ScopedOriginCrashKey(const ScopedOriginCrashKey&) = delete; + ScopedOriginCrashKey& operator=(const ScopedOriginCrashKey&) = delete; +}; + +} // namespace debug + } // namespace url #endif // URL_ORIGIN_H_
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc index 2754f23..514169b 100644 --- a/url/origin_unittest.cc +++ b/url/origin_unittest.cc
@@ -55,7 +55,6 @@ AddStandardScheme("standard-but-noaccess", SchemeType::SCHEME_WITH_HOST); AddNoAccessScheme("standard-but-noaccess"); } - void TearDown() override { url::ResetForTests(); } ::testing::AssertionResult DoEqualityComparisons(const url::Origin& a, const url::Origin& b, @@ -96,7 +95,8 @@ return origin.GetNonceForSerialization(); } - // Wrapper around url::Origin method to expose it to tests. + // Wrappers around url::Origin methods to expose it to tests. + gurl_base::Optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization( gurl_base::StringPiece precursor_scheme, gurl_base::StringPiece precursor_host, @@ -105,6 +105,17 @@ return Origin::UnsafelyCreateOpaqueOriginWithoutNormalization( precursor_scheme, precursor_host, precursor_port, nonce); } + + gurl_base::Optional<std::string> SerializeWithNonce(const Origin& origin) { + return origin.SerializeWithNonce(); + } + + gurl_base::Optional<Origin> Deserialize(const std::string& value) { + return Origin::Deserialize(value); + } + + private: + ScopedSchemeRegistryForTests scoped_registry_; }; TEST_F(OriginTest, OpaqueOriginComparison) { @@ -116,14 +127,14 @@ EXPECT_EQ("", opaque_a.host()); EXPECT_EQ(0, opaque_a.port()); EXPECT_EQ(SchemeHostPort(), opaque_a.GetTupleOrPrecursorTupleIfOpaque()); - EXPECT_TRUE(opaque_a.GetTupleOrPrecursorTupleIfOpaque().IsInvalid()); + EXPECT_FALSE(opaque_a.GetTupleOrPrecursorTupleIfOpaque().IsValid()); EXPECT_TRUE(opaque_b.opaque()); EXPECT_EQ("", opaque_b.scheme()); EXPECT_EQ("", opaque_b.host()); EXPECT_EQ(0, opaque_b.port()); EXPECT_EQ(SchemeHostPort(), opaque_b.GetTupleOrPrecursorTupleIfOpaque()); - EXPECT_TRUE(opaque_b.GetTupleOrPrecursorTupleIfOpaque().IsInvalid()); + EXPECT_FALSE(opaque_b.GetTupleOrPrecursorTupleIfOpaque().IsValid()); // Two default-constructed Origins should always be cross origin to each // other. @@ -304,7 +315,6 @@ // Registered URLs {"ftp://example.com/", "ftp", "example.com", 21}, - {"gopher://example.com/", "gopher", "example.com", 70}, {"ws://example.com/", "ws", "example.com", 80}, {"wss://example.com/", "wss", "example.com", 443}, {"wss://user:pass@example.com/", "wss", "example.com", 443}, @@ -354,9 +364,6 @@ 123}, {"blob:https://example.com/guid-goes-here", "https", "example.com", 443}, {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80}, - - // Gopher: - {"gopher://8u.9.Vx6", "gopher", "8u.9.vx6", 70}, }; for (const auto& test_case : cases) { @@ -381,7 +388,7 @@ .DeriveNewOpaqueOrigin(); EXPECT_TRUE(derived_opaque.opaque()); EXPECT_NE(origin, derived_opaque); - EXPECT_FALSE(derived_opaque.GetTupleOrPrecursorTupleIfOpaque().IsInvalid()); + EXPECT_TRUE(derived_opaque.GetTupleOrPrecursorTupleIfOpaque().IsValid()); EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(), derived_opaque.GetTupleOrPrecursorTupleIfOpaque()); EXPECT_EQ(derived_opaque, derived_opaque); @@ -390,8 +397,8 @@ Origin::Resolve(GURL("data:text/html,baz"), origin); EXPECT_TRUE(derived_opaque_via_data_url.opaque()); EXPECT_NE(origin, derived_opaque_via_data_url); - EXPECT_FALSE(derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque() - .IsInvalid()); + EXPECT_TRUE(derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque() + .IsValid()); EXPECT_EQ(origin.GetTupleOrPrecursorTupleIfOpaque(), derived_opaque_via_data_url.GetTupleOrPrecursorTupleIfOpaque()); EXPECT_NE(derived_opaque, derived_opaque_via_data_url); @@ -627,9 +634,9 @@ }; for (const auto& test_case : kTestCases) { - SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url - << ", " << test_case.lower_ascii_domain - << ")"); + SCOPED_TRACE(testing::Message() + << "(url, domain): (" << test_case.url << ", " + << test_case.lower_ascii_domain << ")"); GURL url(test_case.url); ASSERT_TRUE(url.is_valid()); Origin origin = Origin::Create(url); @@ -660,6 +667,7 @@ Origin origin = Origin::Create(GURL("cow://")); EXPECT_TRUE(origin.opaque()); } + TEST_F(OriginTest, NonStandardSchemeWithAndroidWebViewHack) { EnableNonStandardSchemesForAndroidWebView(); Origin origin = Origin::Create(GURL("cow://")); @@ -667,10 +675,10 @@ EXPECT_EQ("cow", origin.scheme()); EXPECT_EQ("", origin.host()); EXPECT_EQ(0, origin.port()); - ResetForTests(); } TEST_F(OriginTest, CanBeDerivedFrom) { + AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST); Origin opaque_unique_origin = Origin(); Origin regular_origin = Origin::Create(GURL("https://a.com/")); @@ -688,7 +696,6 @@ non_standard_scheme_origin.DeriveNewOpaqueOrigin(); // Also, add new standard scheme that is local to the test. - AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST); Origin new_standard_origin = Origin::Create(GURL("new-standard://host/")); Origin new_standard_opaque_precursor_origin = new_standard_origin.DeriveNewOpaqueOrigin(); @@ -863,4 +870,78 @@ "file:// [internally: file://example.com]"); } +TEST_F(OriginTest, Deserialize) { + std::vector<GURL> valid_urls = { + GURL("https://a.com"), GURL("http://a"), + GURL("http://a:80"), GURL("file://a.com/etc/passwd"), + GURL("file:///etc/passwd"), GURL("http://192.168.1.1"), + GURL("http://[2001:db8::1]/"), + }; + for (const GURL& url : valid_urls) { + SCOPED_TRACE(url.spec()); + Origin origin = Origin::Create(url); + gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin); + ASSERT_TRUE(serialized); + + gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + ASSERT_TRUE(deserialized.has_value()); + + EXPECT_TRUE(DoEqualityComparisons(origin, deserialized.value(), true)); + EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString()); + } +} + +TEST_F(OriginTest, DeserializeInvalid) { + EXPECT_EQ(gurl_base::nullopt, Deserialize(std::string())); + EXPECT_EQ(gurl_base::nullopt, Deserialize("deadbeef")); + EXPECT_EQ(gurl_base::nullopt, Deserialize("0123456789")); + EXPECT_EQ(gurl_base::nullopt, Deserialize("https://a.com")); + EXPECT_EQ(gurl_base::nullopt, Deserialize("https://192.168.1.1")); +} + +TEST_F(OriginTest, SerializeTBDNonce) { + std::vector<GURL> invalid_urls = { + GURL("data:uniqueness"), GURL("data:,"), + GURL("data:text/html,Hello!"), GURL("javascript:alert(1)"), + GURL("about:blank"), GURL("google.com"), + }; + for (const GURL& url : invalid_urls) { + SCOPED_TRACE(url.spec()); + Origin origin = Origin::Create(url); + gurl_base::Optional<std::string> serialized = SerializeWithNonce(origin); + gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + ASSERT_TRUE(deserialized.has_value()); + + // Can't use DoEqualityComparisons here since empty nonces are never == + // unless they are the same object. + EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString()); + } + + // Same basic test as above, but without a GURL to create tuple_. + Origin opaque; + gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque); + ASSERT_TRUE(serialized); + + gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + ASSERT_TRUE(deserialized.has_value()); + + // Can't use DoEqualityComparisons here since empty nonces are never == unless + // they are the same object. + EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString()); +} + +TEST_F(OriginTest, DeserializeValidNonce) { + Origin opaque; + GetNonce(opaque); + + gurl_base::Optional<std::string> serialized = SerializeWithNonce(opaque); + ASSERT_TRUE(serialized); + + gurl_base::Optional<Origin> deserialized = Deserialize(std::move(*serialized)); + ASSERT_TRUE(deserialized.has_value()); + + EXPECT_TRUE(DoEqualityComparisons(opaque, deserialized.value(), true)); + EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString()); +} + } // namespace url
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc index 290e8a6..c90e0a7 100644 --- a/url/scheme_host_port.cc +++ b/url/scheme_host_port.cc
@@ -134,15 +134,15 @@ ConstructPolicy policy) : port_(0) { if (!IsValidInput(scheme, host, port, policy)) { - GURL_DCHECK(IsInvalid()); + GURL_DCHECK(!IsValid()); return; } scheme_ = std::move(scheme); host_ = std::move(host); port_ = port; - GURL_DCHECK(!IsInvalid()) << "Scheme: " << scheme_ << " Host: " << host_ - << " Port: " << port; + GURL_DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_ + << " Port: " << port; } SchemeHostPort::SchemeHostPort(gurl_base::StringPiece scheme, @@ -172,19 +172,19 @@ if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED)) return; - scheme.CopyToString(&scheme_); - host.CopyToString(&host_); + scheme_ = std::string(scheme); + host_ = std::string(host); port_ = port; } SchemeHostPort::~SchemeHostPort() = default; -bool SchemeHostPort::IsInvalid() const { +bool SchemeHostPort::IsValid() const { // It suffices to just check |scheme_| for emptiness; the other fields are // never present without it. GURL_DCHECK(!scheme_.empty() || host_.empty()); GURL_DCHECK(!scheme_.empty() || port_ == 0); - return scheme_.empty(); + return !scheme_.empty(); } std::string SchemeHostPort::Serialize() const { @@ -198,7 +198,7 @@ url::Parsed parsed; std::string serialized = SerializeInternal(&parsed); - if (IsInvalid()) + if (!IsValid()) return GURL(std::move(serialized), parsed, false); // SchemeHostPort does not have enough information to determine if an empty @@ -223,7 +223,7 @@ std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const { std::string result; - if (IsInvalid()) + if (!IsValid()) return result; // Reserve enough space for the "normal" case of scheme://host/.
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h index a2dded1..903a398 100644 --- a/url/scheme_host_port.h +++ b/url/scheme_host_port.h
@@ -122,7 +122,7 @@ const std::string& host() const { return host_; } const std::string& scheme() const { return scheme_; } uint16_t port() const { return port_; } - bool IsInvalid() const; + bool IsValid() const; // Serializes the SchemeHostPort tuple to a canonical form. //
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc index 5270c70..e3bcade 100644 --- a/url/scheme_host_port_unittest.cc +++ b/url/scheme_host_port_unittest.cc
@@ -16,12 +16,11 @@ class SchemeHostPortTest : public testing::Test { public: SchemeHostPortTest() = default; - ~SchemeHostPortTest() override { - // Reset any added schemes. - url::ResetForTests(); - } + ~SchemeHostPortTest() override = default; private: + url::ScopedSchemeRegistryForTests scoped_registry_; + DISALLOW_COPY_AND_ASSIGN(SchemeHostPortTest); }; @@ -52,7 +51,7 @@ EXPECT_EQ("", invalid.scheme()); EXPECT_EQ("", invalid.host()); EXPECT_EQ(0, invalid.port()); - EXPECT_TRUE(invalid.IsInvalid()); + EXPECT_FALSE(invalid.IsValid()); EXPECT_EQ(invalid, invalid); const char* urls[] = { @@ -76,7 +75,7 @@ EXPECT_EQ("", tuple.scheme()); EXPECT_EQ("", tuple.host()); EXPECT_EQ(0, tuple.port()); - EXPECT_TRUE(tuple.IsInvalid()); + EXPECT_FALSE(tuple.IsValid()); EXPECT_EQ(tuple, tuple); EXPECT_EQ(tuple, invalid); EXPECT_EQ(invalid, tuple); @@ -105,7 +104,7 @@ EXPECT_EQ(test.scheme, tuple.scheme()); EXPECT_EQ(test.host, tuple.host()); EXPECT_EQ(test.port, tuple.port()); - EXPECT_FALSE(tuple.IsInvalid()); + EXPECT_TRUE(tuple.IsValid()); EXPECT_EQ(tuple, tuple); ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); } @@ -141,7 +140,7 @@ EXPECT_EQ("", tuple.scheme()); EXPECT_EQ("", tuple.host()); EXPECT_EQ(0, tuple.port()); - EXPECT_TRUE(tuple.IsInvalid()); + EXPECT_FALSE(tuple.IsValid()); EXPECT_EQ(tuple, tuple); ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); } @@ -170,7 +169,7 @@ EXPECT_EQ("", tuple.scheme()); EXPECT_EQ("", tuple.host()); EXPECT_EQ(0, tuple.port()); - EXPECT_TRUE(tuple.IsInvalid()); + EXPECT_FALSE(tuple.IsValid()); ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); } } @@ -205,7 +204,7 @@ EXPECT_EQ(test.scheme, tuple.scheme()); EXPECT_EQ(test.host, tuple.host()); EXPECT_EQ(test.port, tuple.port()); - EXPECT_FALSE(tuple.IsInvalid()); + EXPECT_TRUE(tuple.IsValid()); EXPECT_EQ(tuple, tuple); ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); }
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc index 47668f6..a5ec808 100644 --- a/url/url_canon_relative.cc +++ b/url/url_canon_relative.cc
@@ -7,6 +7,7 @@ #include <algorithm> #include "polyfills/base/logging.h" +#include "base/strings/string_util.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" #include "url/url_constants.h" @@ -62,6 +63,39 @@ #endif // WIN32 +template <typename CHAR> +bool IsValidScheme(const CHAR* url, const Component& scheme) { + // Caller should ensure that the |scheme| is not empty. + GURL_DCHECK_NE(0, scheme.len); + + // From https://url.spec.whatwg.org/#scheme-start-state: + // scheme start state: + // 1. If c is an ASCII alpha, append c, lowercased, to buffer, and set + // state to scheme state. + // 2. Otherwise, if state override is not given, set state to no scheme + // state, and decrease pointer by one. + // 3. Otherwise, validation error, return failure. + // Note that both step 2 and step 3 mean that the scheme was not valid. + if (!gurl_base::IsAsciiAlpha(url[scheme.begin])) + return false; + + // From https://url.spec.whatwg.org/#scheme-state: + // scheme state: + // 1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E + // (.), append c, lowercased, to buffer. + // 2. Otherwise, if c is U+003A (:), then [...] + // + // We begin at |scheme.begin + 1|, because the character at |scheme.begin| has + // already been checked by gurl_base::IsAsciiAlpha above. + int scheme_end = scheme.end(); + for (int i = scheme.begin + 1; i < scheme_end; i++) { + if (!CanonicalSchemeChar(url[i])) + return false; + } + + return true; +} + // See IsRelativeURL in the header file for usage. template<typename CHAR> bool DoIsRelativeURL(const char* base, @@ -126,17 +160,14 @@ } // If the scheme isn't valid, then it's relative. - int scheme_end = scheme.end(); - for (int i = scheme.begin; i < scheme_end; i++) { - if (!CanonicalSchemeChar(url[i])) { - if (!is_base_hierarchical) { - // Don't allow relative URLs if the base scheme doesn't support it. - return false; - } - *relative_component = MakeRange(begin, url_len); - *is_relative = true; - return true; + if (!IsValidScheme(url, scheme)) { + if (!is_base_hierarchical) { + // Don't allow relative URLs if the base scheme doesn't support it. + return false; } + *relative_component = MakeRange(begin, url_len); + *is_relative = true; + return true; } // If the scheme is not the same, then we can't count it as relative. @@ -287,9 +318,8 @@ // Canonical URLs always have a path, so we can use that offset. Reserve // enough room for the base URL, the new path, and some extra bytes for // possible escaped characters. - output->ReserveSizeIfNeeded( - base_parsed.path.begin + - std::max(path.end(), std::max(query.end(), ref.end()))); + output->ReserveSizeIfNeeded(base_parsed.path.begin + + std::max({path.end(), query.end(), ref.end()})); output->Append(base_url, base_parsed.path.begin); if (path.len > 0) {
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc index 78f7773..6a94f50 100644 --- a/url/url_canon_stdurl.cc +++ b/url/url_canon_stdurl.cc
@@ -128,10 +128,6 @@ else if (!strncmp(scheme, kWssScheme, scheme_len)) default_port = 443; break; - case 6: - if (!strncmp(scheme, kGopherScheme, scheme_len)) - default_port = 70; - break; case 2: if (!strncmp(scheme, kWsScheme, scheme_len)) default_port = 80;
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index 9d1a458..c3f02fb 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc
@@ -1393,7 +1393,7 @@ {"https://foo:80/", "https://foo:80/", true}, {"ftp://foo:21/", "ftp://foo/", true}, {"ftp://foo:80/", "ftp://foo:80/", true}, - {"gopher://foo:70/", "gopher://foo/", true}, + {"gopher://foo:70/", "gopher://foo:70/", true}, {"gopher://foo:443/", "gopher://foo:443/", true}, {"ws://foo:80/", "ws://foo/", true}, {"ws://foo:81/", "ws://foo:81/", true}, @@ -2325,14 +2325,12 @@ {"ftp", 21}, {"ws", 80}, {"wss", 443}, - {"gopher", 70}, {"fake-scheme", PORT_UNSPECIFIED}, {"HTTP", PORT_UNSPECIFIED}, {"HTTPS", PORT_UNSPECIFIED}, {"FTP", PORT_UNSPECIFIED}, {"WS", PORT_UNSPECIFIED}, {"WSS", PORT_UNSPECIFIED}, - {"GOPHER", PORT_UNSPECIFIED}, }; for (auto& test_case : cases) {
diff --git a/url/url_constants.cc b/url/url_constants.cc index 3540240..69399e4 100644 --- a/url/url_constants.cc +++ b/url/url_constants.cc
@@ -20,11 +20,13 @@ const char kFileScheme[] = "file"; const char kFileSystemScheme[] = "filesystem"; const char kFtpScheme[] = "ftp"; -const char kGopherScheme[] = "gopher"; const char kHttpScheme[] = "http"; const char kHttpsScheme[] = "https"; const char kJavaScriptScheme[] = "javascript"; const char kMailToScheme[] = "mailto"; +// This is for QuicTransport (https://wicg.github.io/web-transport/). +// See also: https://www.iana.org/assignments/uri-schemes/prov/quic-transport +const char kQuicTransportScheme[] = "quic-transport"; const char kTelScheme[] = "tel"; const char kWsScheme[] = "ws"; const char kWssScheme[] = "wss";
diff --git a/url/url_constants.h b/url/url_constants.h index c077b8d..3c04d68 100644 --- a/url/url_constants.h +++ b/url/url_constants.h
@@ -26,11 +26,11 @@ COMPONENT_EXPORT(URL) extern const char kFileScheme[]; COMPONENT_EXPORT(URL) extern const char kFileSystemScheme[]; COMPONENT_EXPORT(URL) extern const char kFtpScheme[]; -COMPONENT_EXPORT(URL) extern const char kGopherScheme[]; COMPONENT_EXPORT(URL) extern const char kHttpScheme[]; COMPONENT_EXPORT(URL) extern const char kHttpsScheme[]; COMPONENT_EXPORT(URL) extern const char kJavaScriptScheme[]; COMPONENT_EXPORT(URL) extern const char kMailToScheme[]; +COMPONENT_EXPORT(URL) extern const char kQuicTransportScheme[]; COMPONENT_EXPORT(URL) extern const char kTelScheme[]; COMPONENT_EXPORT(URL) extern const char kWsScheme[]; COMPONENT_EXPORT(URL) extern const char kWssScheme[];
diff --git a/url/url_util.cc b/url/url_util.cc index 47fc499..7c72bfc 100644 --- a/url/url_util.cc +++ b/url/url_util.cc
@@ -6,8 +6,9 @@ #include <stddef.h> #include <string.h> +#include <atomic> -#include "base/debug/leak_annotations.h" +#include "base/compiler_specific.h" #include "polyfills/base/logging.h" #include "base/no_destructor.h" #include "base/stl_util.h" @@ -21,6 +22,12 @@ namespace { +// A pair for representing a standard scheme name and the SchemeType for it. +struct SchemeWithType { + std::string scheme; + SchemeType type; +}; + // List of currently registered schemes and associated properties. struct SchemeRegistry { // Standard format schemes (see header for details). @@ -34,11 +41,11 @@ // canonicalization. {kFileScheme, SCHEME_WITH_HOST}, {kFtpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, - {kGopherScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, {kWssScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket secure. {kWsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket. {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY}, + {kQuicTransportScheme, SCHEME_WITH_HOST_AND_PORT}, }; // Schemes that are allowed for referrers. @@ -49,10 +56,7 @@ // Schemes that do not trigger mixed content warning. std::vector<std::string> secure_schemes = { - kHttpsScheme, - kAboutScheme, - kDataScheme, - kWssScheme, + kHttpsScheme, kAboutScheme, kDataScheme, kQuicTransportScheme, kWssScheme, }; // Schemes that normal pages cannot link to or access (i.e., with the same @@ -93,11 +97,26 @@ bool allow_non_standard_schemes = false; }; -SchemeRegistry* GetSchemeRegistry() { +// See the LockSchemeRegistries declaration in the header. +bool scheme_registries_locked = false; + +// Ensure that the schemes aren't modified after first use. +static std::atomic<bool> g_scheme_registries_used{false}; + +// Gets the scheme registry without locking the schemes. This should *only* be +// used for adding schemes to the registry. +SchemeRegistry* GetSchemeRegistryWithoutLocking() { static gurl_base::NoDestructor<SchemeRegistry> registry; return registry.get(); } +const SchemeRegistry& GetSchemeRegistry() { +#if GURL_DCHECK_IS_ON() + g_scheme_registries_used.store(true); +#endif + return *GetSchemeRegistryWithoutLocking(); +} + // Pass this enum through for methods which would like to know if whitespace // removal is necessary. enum WhitespaceRemovalPolicy { @@ -105,9 +124,6 @@ DO_NOT_REMOVE_WHITESPACE, }; -// See the LockSchemeRegistries declaration in the header. -bool scheme_registries_locked = false; - // This template converts a given character type to the corresponding // StringPiece type. template<typename CHAR> struct CharToStringPiece { @@ -157,7 +173,7 @@ template<typename CHAR> bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) { return DoIsInSchemes(spec, scheme, type, - GetSchemeRegistry()->standard_schemes); + GetSchemeRegistry().standard_schemes); } @@ -168,7 +184,7 @@ Component* found_scheme) { // Before extracting scheme, canonicalize the URL to remove any whitespace. // This matches the canonicalization done in DoCanonicalize function. - RawCanonOutputT<CHAR> whitespace_buffer; + STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer; int spec_len; const CHAR* spec = RemoveURLWhitespace(str, str_len, &whitespace_buffer, &spec_len, nullptr); @@ -197,7 +213,7 @@ // Remove any whitespace from the middle of the relative URL if necessary. // Possibly this will result in copying to the new buffer. - RawCanonOutputT<CHAR> whitespace_buffer; + STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer; if (whitespace_policy == REMOVE_WHITESPACE) { spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len, &output_parsed->potentially_dangling_markup); @@ -276,7 +292,7 @@ Parsed* output_parsed) { // Remove any whitespace from the middle of the relative URL, possibly // copying to the new buffer. - RawCanonOutputT<CHAR> whitespace_buffer; + STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer; int relative_length; const CHAR* relative = RemoveURLWhitespace( in_relative, in_relative_length, &whitespace_buffer, &relative_length, @@ -317,7 +333,7 @@ Parsed base_parsed_authority; ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority); if (base_parsed_authority.host.is_nonempty()) { - RawCanonOutputT<char> temporary_output; + STACK_UNINITIALIZED RawCanonOutputT<char> temporary_output; bool did_resolve_succeed = ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, relative_component, charset_converter, @@ -369,7 +385,7 @@ if (replacements.IsSchemeOverridden()) { // Canonicalize the new scheme so it is 8-bit and can be concatenated with // the existing spec. - RawCanonOutput<128> scheme_replaced; + STACK_UNINITIALIZED RawCanonOutput<128> scheme_replaced; Component scheme_replaced_parsed; CanonicalizeScheme(replacements.sources().scheme, replacements.components().scheme, @@ -386,7 +402,7 @@ // We now need to completely re-parse the resulting string since its meaning // may have changed with the different scheme. - RawCanonOutput<128> recanonicalized; + STACK_UNINITIALIZED RawCanonOutput<128> recanonicalized; Parsed recanonicalized_parsed; DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, REMOVE_WHITESPACE, charset_converter, &recanonicalized, @@ -441,8 +457,16 @@ return ReplacePathURL(spec, parsed, replacements, output, out_parsed); } -void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) { - GURL_DCHECK(schemes); +void DoSchemeModificationPreamble() { + // If this assert triggers, it means you've called Add*Scheme after + // the SchemeRegistry has been used. + // + // This normally means you're trying to set up a new scheme too late or using + // the SchemeRegistry too early in your application's init process. Make sure + // that you haven't added any static GURL initializers in tests. + GURL_DCHECK(!g_scheme_registries_used.load()) + << "Trying to add a scheme after the lists have been used."; + // If this assert triggers, it means you've called Add*Scheme after // LockSchemeRegistries has been called (see the header file for // LockSchemeRegistries for more). @@ -452,122 +476,145 @@ // and calls LockSchemeRegistries, and add your new scheme there. GURL_DCHECK(!scheme_registries_locked) << "Trying to add a scheme after the lists have been locked."; +} - size_t scheme_len = strlen(new_scheme); - if (scheme_len == 0) - return; - +void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) { + DoSchemeModificationPreamble(); + GURL_DCHECK(schemes); + GURL_DCHECK(strlen(new_scheme) > 0); GURL_DCHECK_EQ(gurl_base::ToLowerASCII(new_scheme), new_scheme); - schemes->push_back(std::string(new_scheme)); + GURL_DCHECK(std::find(schemes->begin(), schemes->end(), new_scheme) == + schemes->end()); + schemes->push_back(new_scheme); } void DoAddSchemeWithType(const char* new_scheme, SchemeType type, std::vector<SchemeWithType>* schemes) { + DoSchemeModificationPreamble(); GURL_DCHECK(schemes); - // If this assert triggers, it means you've called Add*Scheme after - // LockSchemeRegistries has been called (see the header file for - // LockSchemeRegistries for more). - // - // This normally means you're trying to set up a new scheme too late in your - // application's init process. Locate where your app does this initialization - // and calls LockSchemeRegistries, and add your new scheme there. - GURL_DCHECK(!scheme_registries_locked) - << "Trying to add a scheme after the lists have been locked."; - - size_t scheme_len = strlen(new_scheme); - if (scheme_len == 0) - return; - + GURL_DCHECK(strlen(new_scheme) > 0); GURL_DCHECK_EQ(gurl_base::ToLowerASCII(new_scheme), new_scheme); - // Duplicate the scheme into a new buffer and add it to the list of standard - // schemes. This pointer will be leaked on shutdown. - char* dup_scheme = new char[scheme_len + 1]; - ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme); - memcpy(dup_scheme, new_scheme, scheme_len + 1); - - SchemeWithType scheme_with_type; - scheme_with_type.scheme = dup_scheme; - scheme_with_type.type = type; - schemes->push_back(scheme_with_type); + GURL_DCHECK(std::find_if(schemes->begin(), schemes->end(), + [&new_scheme](const SchemeWithType& scheme) { + return scheme.scheme == new_scheme; + }) == schemes->end()); + schemes->push_back({new_scheme, type}); } } // namespace -void ResetForTests() { - *GetSchemeRegistry() = SchemeRegistry(); +void ClearSchemesForTests() { + GURL_DCHECK(!g_scheme_registries_used.load()) + << "Schemes already used " + << "(use ScopedSchemeRegistryForTests to relax for tests)."; + GURL_DCHECK(!scheme_registries_locked) + << "Schemes already locked " + << "(use ScopedSchemeRegistryForTests to relax for tests)."; + *GetSchemeRegistryWithoutLocking() = SchemeRegistry(); } +class ScopedSchemeRegistryInternal { + public: + ScopedSchemeRegistryInternal() + : registry_(std::make_unique<SchemeRegistry>( + *GetSchemeRegistryWithoutLocking())) { + g_scheme_registries_used.store(false); + scheme_registries_locked = false; + } + ~ScopedSchemeRegistryInternal() { + *GetSchemeRegistryWithoutLocking() = *registry_; + g_scheme_registries_used.store(true); + scheme_registries_locked = true; + } + + private: + std::unique_ptr<SchemeRegistry> registry_; +}; + +ScopedSchemeRegistryForTests::ScopedSchemeRegistryForTests() + : internal_(std::make_unique<ScopedSchemeRegistryInternal>()) {} + +ScopedSchemeRegistryForTests::~ScopedSchemeRegistryForTests() = default; + void EnableNonStandardSchemesForAndroidWebView() { - GetSchemeRegistry()->allow_non_standard_schemes = true; + DoSchemeModificationPreamble(); + GetSchemeRegistryWithoutLocking()->allow_non_standard_schemes = true; } bool AllowNonStandardSchemesForAndroidWebView() { - return GetSchemeRegistry()->allow_non_standard_schemes; + return GetSchemeRegistry().allow_non_standard_schemes; } void AddStandardScheme(const char* new_scheme, SchemeType type) { - DoAddSchemeWithType(new_scheme, type, &GetSchemeRegistry()->standard_schemes); + DoAddSchemeWithType(new_scheme, type, + &GetSchemeRegistryWithoutLocking()->standard_schemes); } void AddReferrerScheme(const char* new_scheme, SchemeType type) { - DoAddSchemeWithType(new_scheme, type, &GetSchemeRegistry()->referrer_schemes); + DoAddSchemeWithType(new_scheme, type, + &GetSchemeRegistryWithoutLocking()->referrer_schemes); } void AddSecureScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->secure_schemes); + DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->secure_schemes); } const std::vector<std::string>& GetSecureSchemes() { - return GetSchemeRegistry()->secure_schemes; + return GetSchemeRegistry().secure_schemes; } void AddLocalScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->local_schemes); + DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->local_schemes); } const std::vector<std::string>& GetLocalSchemes() { - return GetSchemeRegistry()->local_schemes; + return GetSchemeRegistry().local_schemes; } void AddNoAccessScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->no_access_schemes); + DoAddScheme(new_scheme, + &GetSchemeRegistryWithoutLocking()->no_access_schemes); } const std::vector<std::string>& GetNoAccessSchemes() { - return GetSchemeRegistry()->no_access_schemes; + return GetSchemeRegistry().no_access_schemes; } void AddCorsEnabledScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->cors_enabled_schemes); + DoAddScheme(new_scheme, + &GetSchemeRegistryWithoutLocking()->cors_enabled_schemes); } const std::vector<std::string>& GetCorsEnabledSchemes() { - return GetSchemeRegistry()->cors_enabled_schemes; + return GetSchemeRegistry().cors_enabled_schemes; } void AddWebStorageScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->web_storage_schemes); + DoAddScheme(new_scheme, + &GetSchemeRegistryWithoutLocking()->web_storage_schemes); } const std::vector<std::string>& GetWebStorageSchemes() { - return GetSchemeRegistry()->web_storage_schemes; + return GetSchemeRegistry().web_storage_schemes; } void AddCSPBypassingScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->csp_bypassing_schemes); + DoAddScheme(new_scheme, + &GetSchemeRegistryWithoutLocking()->csp_bypassing_schemes); } const std::vector<std::string>& GetCSPBypassingSchemes() { - return GetSchemeRegistry()->csp_bypassing_schemes; + return GetSchemeRegistry().csp_bypassing_schemes; } void AddEmptyDocumentScheme(const char* new_scheme) { - DoAddScheme(new_scheme, &GetSchemeRegistry()->empty_document_schemes); + DoAddScheme(new_scheme, + &GetSchemeRegistryWithoutLocking()->empty_document_schemes); } const std::vector<std::string>& GetEmptyDocumentSchemes() { - return GetSchemeRegistry()->empty_document_schemes; + return GetSchemeRegistry().empty_document_schemes; } void LockSchemeRegistries() { @@ -599,7 +646,7 @@ bool IsReferrerScheme(const char* spec, const Component& scheme) { SchemeType unused_scheme_type; return DoIsInSchemes(spec, scheme, &unused_scheme_type, - GetSchemeRegistry()->referrer_schemes); + GetSchemeRegistry().referrer_schemes); } bool FindAndCompareScheme(const char* str, @@ -653,7 +700,7 @@ } bool HostIsIPAddress(gurl_base::StringPiece host) { - url::RawCanonOutputT<char, 128> ignored_output; + STACK_UNINITIALIZED url::RawCanonOutputT<char, 128> ignored_output; url::CanonHostInfo host_info; url::CanonicalizeIPAddress(host.data(), Component(0, host.length()), &ignored_output, &host_info); @@ -732,7 +779,7 @@ int length, DecodeURLMode mode, CanonOutputW* output) { - RawCanonOutputT<char> unescaped_chars; + STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars; for (int i = 0; i < length; i++) { if (input[i] == '%') { unsigned char ch;
diff --git a/url/url_util.h b/url/url_util.h index 473ae5f..1816637 100644 --- a/url/url_util.h +++ b/url/url_util.h
@@ -5,6 +5,7 @@ #ifndef URL_URL_UTIL_H_ #define URL_URL_UTIL_H_ +#include <memory> #include <string> #include <vector> @@ -19,8 +20,22 @@ // Init ------------------------------------------------------------------------ -// Resets all custom schemes to the default values. Not thread-safe. -COMPONENT_EXPORT(URL) void ResetForTests(); +// Used for tests that need to reset schemes. Note that this can only be used +// in conjunction with ScopedSchemeRegistryForTests. +COMPONENT_EXPORT(URL) void ClearSchemesForTests(); + +class ScopedSchemeRegistryInternal; + +// Stores the SchemeRegistry upon creation, allowing tests to modify a copy of +// it, and restores the original SchemeRegistry when deleted. +class COMPONENT_EXPORT(URL) ScopedSchemeRegistryForTests { + public: + ScopedSchemeRegistryForTests(); + ~ScopedSchemeRegistryForTests(); + + private: + std::unique_ptr<ScopedSchemeRegistryInternal> internal_; +}; // Schemes --------------------------------------------------------------------- @@ -37,15 +52,9 @@ // Whether or not SchemeHostPort and Origin allow non-standard schemes. COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView(); -// A pair for representing a standard scheme name and the SchemeType for it. -struct COMPONENT_EXPORT(URL) SchemeWithType { - const char* scheme; - SchemeType type; -}; - // The following Add*Scheme method are not threadsafe and can not be called // concurrently with any other url_util function. They will assert if the lists -// of schemes have been locked (see LockSchemeRegistries). +// of schemes have been locked (see LockSchemeRegistries), or used. // Adds an application-defined scheme to the internal list of "standard-format" // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc index 741c1dc..a63294f 100644 --- a/url/url_util_unittest.cc +++ b/url/url_util_unittest.cc
@@ -17,12 +17,11 @@ class URLUtilTest : public testing::Test { public: URLUtilTest() = default; - ~URLUtilTest() override { - // Reset any added schemes. - ResetForTests(); - } + ~URLUtilTest() override = default; private: + ScopedSchemeRegistryForTests scoped_registry_; + DISALLOW_COPY_AND_ASSIGN(URLUtilTest); }; @@ -92,21 +91,24 @@ } TEST_F(URLUtilTest, AddReferrerScheme) { - const char kFooScheme[] = "foo"; + static const char kFooScheme[] = "foo"; EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); + url::ScopedSchemeRegistryForTests scoped_registry; AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST); EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); } TEST_F(URLUtilTest, ShutdownCleansUpSchemes) { - const char kFooScheme[] = "foo"; + static const char kFooScheme[] = "foo"; EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); - AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST); - EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); + { + url::ScopedSchemeRegistryForTests scoped_registry; + AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST); + EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); + } - ResetForTests(); EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); }