Update googleurl from Chromium upstream Uses revision c0807c09e6ff496fd42d13a6189214ca83051cec from Tue Aug 31 18:40:50 2021
diff --git a/AUTHORS b/AUTHORS index 4bb6b20..1aa2922 100644 --- a/AUTHORS +++ b/AUTHORS
@@ -43,6 +43,7 @@ Aku Kotkavuo <a.kotkavuo@partner.samsung.com> Aldo Culquicondor <alculquicondor@gmail.com> Aleksandar Stojiljkovic <aleksandar.stojiljkovic@intel.com> +Aleksei Gurianov <gurianov@gmail.com> Alex Chronopoulos <achronop@gmail.com> Alex Gabriel <minilogo@gmail.com> Alex Gartrell <agartrell@cmu.edu> @@ -126,6 +127,7 @@ Asami Doi <d0iasm.pub@gmail.com> Ashish Kumar Gupta <guptaag@amazon.com> Ashlin Joseph <ashlin.j@samsung.com> +Ashutosh <coder.commando@gmail.com> Asish Singh <asish.singh@samsung.com> Attila Dusnoki <dati91@gmail.com> Avinaash Doreswamy <avi.nitk@samsung.com> @@ -210,6 +212,7 @@ Chris Tserng <tserng@amazon.com> Chris Vasselli <clindsay@gmail.com> Chris Ye <hawkoyates@gmail.com> +Christoph Durschang <christoph142@gmx.com> Christophe Dumez <ch.dumez@samsung.com> Christopher Dale <chrelad@gmail.com> Chunbo Hua <chunbo.hua@intel.com> @@ -286,6 +289,7 @@ Donghee Na <corona10@gmail.com> Dong-hee Na <donghee.na92@gmail.com> Dongie Agnir <dongie.agnir@gmail.com> +Dongjun Kim <deejay.kim@navercorp.com> Dongjun Kim <djmix.kim@samsung.com> Dongseong Hwang <dongseong.hwang@intel.com> Dongwoo Joshua Im <dw.im@samsung.com> @@ -308,11 +312,14 @@ Elan Ruusamäe <elan.ruusamae@gmail.com> Ergun Erdogmus <erdogmusergun@gmail.com> Eric Ahn <byungwook.ahn@gmail.com> +Eric Huang <ele828@gmail.com> Eric Rescorla <ekr@rtfm.com> Erik Hill <erikghill@gmail.com> +Erik Kurzinger <ekurzinger@gmail.com> Erik Sjölund <erik.sjolund@gmail.com> Eriq Augustine <eriq.augustine@gmail.com> Ernesto Mudu <ernesto.mudu@gmail.com> +Ethan Wong <bunnnywong@gmail.com> Etienne Laurin <etienne@atnnn.com> Eugene Kim <eugene70kim@gmail.com> Eugene Sudin <eugene@sudin.pro> @@ -415,6 +422,7 @@ HyunJi Kim <hjkim3323@gmail.com> Hyunjun Shin <hyunjun.shin2@navercorp.com> Hyunjun Shin <shjj1504@gmail.com> +Hyunjune Kim <hyunjune.kim@navercorp.com> Hyunjune Kim <hyunjune.kim@samsung.com> Hyunki Baik <hyunki.baik@samsung.com> Ian Cullinan <cullinan@amazon.com> @@ -479,6 +487,7 @@ Jerry Lin <wahahab11@gmail.com> Jerry Zhang <zhj8407@gmail.com> Jesper Storm Bache <jsbache@gmail.com> +Jesper van den Ende <jespertheend@gmail.com> Jesse Miller <jesse@jmiller.biz> Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com> Jiadong Chen <chenjiadong@huawei.com> @@ -547,6 +556,7 @@ Josué Ratelle <jorat1346@gmail.com> Josyula Venkat Narasimham <venkat.nj@samsung.com> Joyer Huang <collger@gmail.com> +Juan Cruz Viotti <jv@jviotti.com> Juan Jose Lopez Jaimez <jj.lopezjaimez@gmail.com> Juhui Lee <juhui24.lee@samsung.com> Julian Geppert <spctstr@gmail.com> @@ -562,6 +572,7 @@ Jungkee Song <jungkee.song@samsung.com> Junmin Zhu <junmin.zhu@intel.com> Junsong Li <ljs.darkfish@gmail.com> +Jun Zeng <hjunzeng6@gmail.com> Justin Okamoto <justmoto@amazon.com> Justin Ribeiro <justin@justinribeiro.com> Jüri Valdmann <juri.valdmann@qt.io> @@ -657,6 +668,7 @@ Lucie Brozkova <lucinka.brozkova@gmail.com> Luiz Von Dentz <luiz.von.dentz@intel.com> Luka Dojcilovic <l.dojcilovic@gmail.com> +Lukas Lihotzki <lukas@lihotzki.de> Lukasz Krakowiak <lukasz.krakowiak@mobica.com> Luke Inman-Semerau <luke.semerau@gmail.com> Luke Seunghoe Gu <gulukesh@gmail.com> @@ -714,7 +726,9 @@ Matthias Reitinger <reimarvin@gmail.com> Matthieu Rigolot <matthieu.rigolot@gmail.com> Matthieu Vlad Hauglustaine <matt.hauglustaine@gmail.com> +Max Karolinskiy <max@brave.com> Max Perepelitsyn <pph34r@gmail.com> +Max Schmitt <max@schmitt.mx> Max Vujovic <mvujovic@adobe.com> Mayank Gupta <mayank.g1@samsung.com> Mayur Kankanwadi <mayurk.vk@samsung.com> @@ -732,6 +746,7 @@ Michael Müller <michael@fds-team.de> Michael Schechter <mike.schechter@gmail.com> Michael Smith <sideshowbarker@gmail.com> +Michael Weiss <dev.primeos@gmail.com> Michaël Zasso <mic.besace@gmail.com> Michael Zugelder <michael@zugelder.org> Michel Promonet <michel.promonet.1@gmail.com> @@ -764,6 +779,7 @@ Momoko Hattori <momohatt10@gmail.com> Mostafa Sedaghat joo <mostafa.sedaghat@gmail.com> Mrunal Kapade <mrunal.kapade@intel.com> +Munira Tursunova <moonira@google.com> Myeongjin Cho <myeongjin.cho@navercorp.com> Myles C. Maxfield <mymax@amazon.com> Myung-jong Kim <mjkim610@gmail.com> @@ -889,6 +905,7 @@ Reda Tawfik <redatawfik@noogler.google.com> Réda Housni Alaoui <alaoui.rda@gmail.com> Refael Ackermann <refack@gmail.com> +Rémi Arnaud <jsremi@gmail.com> Renata Hodovan <rhodovan.u-szeged@partner.samsung.com> Rene Bolldorf <rb@radix.io> Rene Ladan <r.c.ladan@gmail.com> @@ -922,6 +939,7 @@ Rulong Chen <rulong.crl@alibaba-inc.com> Russell Davis <russell.davis@gmail.com> Ryan Ackley <ryanackley@gmail.com> +Ryan Gonzalez <rymg19@gmail.com> Ryan Norton <rnorton10@gmail.com> Ryan Sleevi <ryan-chromium-dev@sleevi.com> Ryan Yoakum <ryoakum@skobalt.com> @@ -1038,6 +1056,7 @@ Sumaid Syed <sumaidsyed@gmail.com> Sunchang Li <johnstonli@tencent.com> Sundoo Kim <nerdooit@gmail.com> +Sundoo Kim <0xd00d00b@gmail.com> Suneel Kota <suneel.kota@samsung.com> Sungguk Lim <limasdf@gmail.com> Sungmann Cho <sungmann.cho@gmail.com> @@ -1086,6 +1105,8 @@ Timo Reimann <ttr314@googlemail.com> Timo Witte <timo.witte@gmail.com> Ting Shao <ting.shao@intel.com> +Tobias Soppa <tobias@soppa.me> +Tobias Soppa <tobias.soppa@code.berlin> Tom Callaway <tcallawa@redhat.com> Tom Harwood <tfh@skip.org> Tomas Popela <tomas.popela@gmail.com> @@ -1114,6 +1135,7 @@ Vernon Tang <vt@foilhead.net> Viatcheslav Ostapenko <sl.ostapenko@samsung.com> Victor Costan <costan@gmail.com> +Victor Solonsky <victor.solonsky@gmail.com> Viet-Trung Luu <viettrungluu@gmail.com> Vinay Anantharaman <vinaya@adobe.com> Vinoth Chandar <vinoth@uber.com> @@ -1141,6 +1163,7 @@ Xiang Long <xiang.long@intel.com> XiangYang <yangxiang12@huawei.com> Xiangze Zhang <xiangze.zhang@intel.com> +Xiaobing Yang <yangxiaobing@qianxin.com> Xiaofeng Zhang <xiaofeng.zhang@intel.com> Xiaolei Yu <dreifachstein@gmail.com> Xiaoshu Zhang <xiaoshu@amazon.com> @@ -1222,6 +1245,7 @@ # END individuals section. # BEGIN organizations section. +Accenture <*@accenture.com> ACCESS CO., LTD. <*@access-company.com> Akamai Inc. <*@akamai.com> ARM Holdings <*@arm.com>
diff --git a/base/BUILD b/base/BUILD index c933421..e2da292 100644 --- a/base/BUILD +++ b/base/BUILD
@@ -14,6 +14,7 @@ "containers/span.h", "containers/util.h", "cxx17_backports.h", + "cxx20_to_address.h", "debug/leak_annotations.h", "functional/identity.h", "functional/invoke.h",
diff --git a/base/compiler_specific.h b/base/compiler_specific.h index 3faca72..6651220 100644 --- a/base/compiler_specific.h +++ b/base/compiler_specific.h
@@ -308,6 +308,31 @@ #define STACK_UNINITIALIZED #endif +// Attribute "no_stack_protector" disables -fstack-protector for the specified +// function. +// +// "stack_protector" is enabled on most POSIX builds. The flag adds a canary +// to each stack frame, which on function return is checked against a reference +// canary. If the canaries do not match, it's likely that a stack buffer +// overflow has occurred, so immediately crashing will prevent exploitation in +// many cases. +// +// In some cases it's desirable to remove this, e.g. on hot functions, or if +// we have purposely changed the reference canary. +#if defined(COMPILER_GCC) || defined(__clang__) +#if defined(__has_attribute) +#if __has_attribute(__no_stack_protector__) +#define NO_STACK_PROTECTOR __attribute__((__no_stack_protector__)) +#else // __has_attribute(__no_stack_protector__) +#define NO_STACK_PROTECTOR __attribute__((__optimize__("-fno-stack-protector"))) +#endif +#else // defined(__has_attribute) +#define NO_STACK_PROTECTOR __attribute__((__optimize__("-fno-stack-protector"))) +#endif +#else +#define NO_STACK_PROTECTOR +#endif + // The ANALYZER_ASSUME_TRUE(bool arg) macro adds compiler-specific hints // to Clang which control what code paths are statically analyzed, // and is meant to be used in conjunction with assert & assert-like functions.
diff --git a/base/containers/span.h b/base/containers/span.h index 1a54de1..d43814e 100644 --- a/base/containers/span.h +++ b/base/containers/span.h
@@ -17,8 +17,9 @@ #include "polyfills/base/check_op.h" #include "base/containers/checked_iterators.h" #include "base/containers/contiguous_iterator.h" +#include "base/cxx17_backports.h" +#include "base/cxx20_to_address.h" #include "base/macros.h" -#include "base/stl_util.h" #include "base/template_util.h" namespace gurl_base {
diff --git a/base/cxx17_backports.h b/base/cxx17_backports.h index 6378a78..77d689a 100644 --- a/base/cxx17_backports.h +++ b/base/cxx17_backports.h
@@ -6,10 +6,13 @@ #define BASE_CXX17_BACKPORTS_H_ #include <array> +#include <functional> #include <initializer_list> #include <memory> #include <string> +#include "polyfills/base/check.h" + namespace gurl_base { // C++14 implementation of C++17's std::size(): @@ -89,6 +92,22 @@ return !array.empty() ? &array[0] : nullptr; } +// C++14 implementation of C++17's std::clamp(): +// https://en.cppreference.com/w/cpp/algorithm/clamp +// Please note that the C++ spec makes it undefined behavior to call std::clamp +// with a value of `lo` that compares greater than the value of `hi`. This +// implementation uses a GURL_CHECK to enforce this as a hard restriction. +template <typename T, typename Compare> +constexpr const T& clamp(const T& v, const T& lo, const T& hi, Compare comp) { + GURL_CHECK(!comp(hi, lo)); + return comp(v, lo) ? lo : comp(hi, v) ? hi : v; +} + +template <typename T> +constexpr const T& clamp(const T& v, const T& lo, const T& hi) { + return clamp(v, lo, hi, std::less<T>{}); +} + } // namespace base #endif // BASE_CXX17_BACKPORTS_H_
diff --git a/base/no_destructor.h b/base/no_destructor.h index 3d7a85c..2f3c549 100644 --- a/base/no_destructor.h +++ b/base/no_destructor.h
@@ -6,9 +6,15 @@ #define BASE_NO_DESTRUCTOR_H_ #include <new> +#include <type_traits> #include <utility> namespace gurl_base { +// A tag type used for NoDestructor to allow it to be created for a type that +// has a trivial destructor. Use for cases where the same class might have +// different implementations that vary on destructor triviality or when the +// LSan hiding properties of NoDestructor are needed. +struct AllowForTriviallyDestructibleType; // A wrapper that makes it easy to create an object of type T with static // storage duration that: @@ -44,9 +50,20 @@ // Note that since the destructor is never run, this *will* leak memory if used // as a stack or member variable. Furthermore, a NoDestructor<T> should never // have global scope as that may require a static initializer. -template <typename T> +template <typename T, typename O = std::nullptr_t> class NoDestructor { public: + static_assert( + !std::is_trivially_destructible<T>::value || + std::is_same<O, AllowForTriviallyDestructibleType>::value, + "gurl_base::NoDestructor is not needed because the templated class has a " + "trivial destructor"); + + static_assert(std::is_same<O, AllowForTriviallyDestructibleType>::value || + std::is_same<O, std::nullptr_t>::value, + "AllowForTriviallyDestructibleType is the only valid option " + "for the second template parameter of NoDestructor"); + // Not constexpr; just write static constexpr T x = ...; if the value should // be a constexpr. template <typename... Args>
diff --git a/base/stl_util.h b/base/stl_util.h index 609b71a..46e91b9 100644 --- a/base/stl_util.h +++ b/base/stl_util.h
@@ -8,22 +8,13 @@ #define BASE_STL_UTIL_H_ #include <algorithm> -#include <deque> #include <forward_list> #include <iterator> -#include <list> -#include <map> -#include <set> -#include <string> #include <tuple> #include <type_traits> -#include <unordered_map> -#include <unordered_set> #include <utility> -#include <vector> #include "polyfills/base/check.h" -#include "base/cxx17_backports.h" #include "base/ranges/algorithm.h" #include "absl/types/optional.h" @@ -31,20 +22,6 @@ namespace internal { -// Calls erase on iterators of matching elements and returns the number of -// removed elements. -template <typename Container, typename Predicate> -size_t IterateAndEraseIf(Container& container, Predicate pred) { - size_t old_size = container.size(); - for (auto it = container.begin(), last = container.end(); it != last;) { - if (pred(*it)) - it = container.erase(it); - else - ++it; - } - return old_size - container.size(); -} - template <typename Iter> constexpr bool IsRandomAccessIter = std::is_same<typename std::iterator_traits<Iter>::iterator_category, @@ -52,23 +29,6 @@ } // namespace internal -// Simplified C++14 implementation of C++20's std::to_address. -// Note: This does not consider specializations of pointer_traits<>::to_address, -// since that member function may only be present in C++20 and later. -// -// Reference: https://wg21.link/pointer.conversion#lib:to_address -template <typename T> -constexpr T* to_address(T* p) noexcept { - static_assert(!std::is_function<T>::value, - "Error: T must not be a function type."); - return p; -} - -template <typename Ptr> -constexpr auto to_address(const Ptr& p) noexcept { - return to_address(p.operator->()); -} - // Implementation of C++23's std::to_underlying. // // Note: This has an additional `std::is_enum<EnumT>` requirement to be SFINAE @@ -348,167 +308,6 @@ return result; } -// Erase/EraseIf are based on C++20's uniform container erasure API: -// - https://eel.is/c++draft/libraryindex#:erase -// - https://eel.is/c++draft/libraryindex#:erase_if -// They provide a generic way to erase elements from a container. -// The functions here implement these for the standard containers until those -// functions are available in the C++ standard. -// For Chromium containers overloads should be defined in their own headers -// (like standard containers). -// Note: there is no std::erase for standard associative containers so we don't -// have it either. - -template <typename CharT, typename Traits, typename Allocator, typename Value> -size_t Erase(std::basic_string<CharT, Traits, Allocator>& container, - const Value& value) { - auto it = std::remove(container.begin(), container.end(), value); - size_t removed = std::distance(it, container.end()); - container.erase(it, container.end()); - return removed; -} - -template <typename CharT, typename Traits, typename Allocator, class Predicate> -size_t EraseIf(std::basic_string<CharT, Traits, Allocator>& container, - Predicate pred) { - auto it = std::remove_if(container.begin(), container.end(), pred); - size_t removed = std::distance(it, container.end()); - container.erase(it, container.end()); - return removed; -} - -template <class T, class Allocator, class Value> -size_t Erase(std::deque<T, Allocator>& container, const Value& value) { - auto it = std::remove(container.begin(), container.end(), value); - size_t removed = std::distance(it, container.end()); - container.erase(it, container.end()); - return removed; -} - -template <class T, class Allocator, class Predicate> -size_t EraseIf(std::deque<T, Allocator>& container, Predicate pred) { - auto it = std::remove_if(container.begin(), container.end(), pred); - size_t removed = std::distance(it, container.end()); - container.erase(it, container.end()); - return removed; -} - -template <class T, class Allocator, class Value> -size_t Erase(std::vector<T, Allocator>& container, const Value& value) { - auto it = std::remove(container.begin(), container.end(), value); - size_t removed = std::distance(it, container.end()); - container.erase(it, container.end()); - return removed; -} - -template <class T, class Allocator, class Predicate> -size_t EraseIf(std::vector<T, Allocator>& container, Predicate pred) { - auto it = std::remove_if(container.begin(), container.end(), pred); - size_t removed = std::distance(it, container.end()); - container.erase(it, container.end()); - return removed; -} - -template <class T, class Allocator, class Predicate> -size_t EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) { - // Note: std::forward_list does not have a size() API, thus we need to use the - // O(n) std::distance work-around. However, given that EraseIf is O(n) - // already, this should not make a big difference. - size_t old_size = std::distance(container.begin(), container.end()); - container.remove_if(pred); - return old_size - std::distance(container.begin(), container.end()); -} - -template <class T, class Allocator, class Predicate> -size_t EraseIf(std::list<T, Allocator>& container, Predicate pred) { - size_t old_size = container.size(); - container.remove_if(pred); - return old_size - container.size(); -} - -template <class Key, class T, class Compare, class Allocator, class Predicate> -size_t EraseIf(std::map<Key, T, Compare, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, class T, class Compare, class Allocator, class Predicate> -size_t EraseIf(std::multimap<Key, T, Compare, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, class Compare, class Allocator, class Predicate> -size_t EraseIf(std::set<Key, Compare, Allocator>& container, Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, class Compare, class Allocator, class Predicate> -size_t EraseIf(std::multiset<Key, Compare, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, - class T, - class Hash, - class KeyEqual, - class Allocator, - class Predicate> -size_t EraseIf(std::unordered_map<Key, T, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, - class T, - class Hash, - class KeyEqual, - class Allocator, - class Predicate> -size_t EraseIf( - std::unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, - class Hash, - class KeyEqual, - class Allocator, - class Predicate> -size_t EraseIf(std::unordered_set<Key, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class Key, - class Hash, - class KeyEqual, - class Allocator, - class Predicate> -size_t EraseIf( - std::unordered_multiset<Key, Hash, KeyEqual, Allocator>& container, - Predicate pred) { - return internal::IterateAndEraseIf(container, pred); -} - -template <class T, class Allocator, class Value> -size_t Erase(std::forward_list<T, Allocator>& container, const Value& value) { - // Unlike std::forward_list::remove, this function template accepts - // heterogeneous types and does not force a conversion to the container's - // value type before invoking the == operator. - return EraseIf(container, [&](const T& cur) { return cur == value; }); -} - -template <class T, class Allocator, class Value> -size_t Erase(std::list<T, Allocator>& container, const Value& value) { - // Unlike std::list::remove, this function template accepts heterogeneous - // types and does not force a conversion to the container's value type before - // invoking the == operator. - return EraseIf(container, [&](const T& cur) { return cur == value; }); -} - // A helper class to be used as the predicate with |EraseIf| to implement // in-place set intersection. Helps implement the algorithm of going through // each container an element at a time, erasing elements from the first
diff --git a/base/strings/escape_unittest.cc b/base/strings/escape_unittest.cc index b8a5fd6..923eb5a 100644 --- a/base/strings/escape_unittest.cc +++ b/base/strings/escape_unittest.cc
@@ -421,8 +421,10 @@ EXPECT_TRUE(ContainsEncodedBytes("abc%2fdef", {'/', '\\'})); // Should be looking for byte values, not UTF-8 character values. - EXPECT_TRUE(ContainsEncodedBytes("caf%C3%A9", {'\xc3'})); - EXPECT_FALSE(ContainsEncodedBytes("caf%C3%A9", {'\xe9'})); + EXPECT_TRUE( + ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xc3')})); + EXPECT_FALSE( + ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xe9')})); } } // namespace base
diff --git a/base/strings/safe_sprintf.cc b/base/strings/safe_sprintf.cc index e8bb070..0569da1 100644 --- a/base/strings/safe_sprintf.cc +++ b/base/strings/safe_sprintf.cc
@@ -490,7 +490,6 @@ goto format_character_found; } } - break; case 'c': { // Output an ASCII character. // Check that there are arguments left to be inserted. if (cur_arg >= max_args) {
diff --git a/base/strings/safe_sprintf.h b/base/strings/safe_sprintf.h index 92f8c59..40cddc5 100644 --- a/base/strings/safe_sprintf.h +++ b/base/strings/safe_sprintf.h
@@ -5,12 +5,12 @@ #ifndef BASE_STRINGS_SAFE_SPRINTF_H_ #define BASE_STRINGS_SAFE_SPRINTF_H_ -#include "build/build_config.h" - #include <stddef.h> #include <stdint.h> #include <stdlib.h> +#include "build/build_config.h" + #if defined(OS_POSIX) || defined(OS_FUCHSIA) // For ssize_t #include <unistd.h>
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h index 8223b59..8c45d1b 100644 --- a/base/strings/string_number_conversions_internal.h +++ b/base/strings/string_number_conversions_internal.h
@@ -14,7 +14,6 @@ #include "polyfills/base/check_op.h" #include "polyfills/base/logging.h" -#include "base/no_destructor.h" #include "base/numerics/safe_math.h" #include "base/strings/string_util.h" #include "base/third_party/double_conversion/double-conversion/double-conversion.h" @@ -229,10 +228,10 @@ static const double_conversion::DoubleToStringConverter* GetDoubleToStringConverter() { - static NoDestructor<double_conversion::DoubleToStringConverter> converter( + static double_conversion::DoubleToStringConverter converter( double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, nullptr, nullptr, 'e', -6, 12, 0, 0); - return converter.get(); + return &converter; } // Converts a given (data, size) pair to a desired string type. For @@ -258,14 +257,14 @@ template <typename STRING, typename CHAR> bool StringToDoubleImpl(STRING input, const CHAR* data, double& output) { - static NoDestructor<double_conversion::StringToDoubleConverter> converter( + static double_conversion::StringToDoubleConverter converter( double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, 0.0, 0, nullptr, nullptr); int processed_characters_count; - output = converter->StringToDouble(data, input.size(), - &processed_characters_count); + output = + converter.StringToDouble(data, input.size(), &processed_characters_count); // Cases to return false: // - If the input string is empty, there was nothing to parse.
diff --git a/base/strings/string_number_conversions_unittest.cc b/base/strings/string_number_conversions_unittest.cc index f836316..b5a23a1 100644 --- a/base/strings/string_number_conversions_unittest.cc +++ b/base/strings/string_number_conversions_unittest.cc
@@ -14,8 +14,8 @@ #include <limits> #include "base/bit_cast.h" +#include "base/cxx17_backports.h" #include "base/format_macros.h" -#include "base/stl_util.h" #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h index 03f5103..8a22e8d 100644 --- a/base/strings/string_piece.h +++ b/base/strings/string_piece.h
@@ -25,7 +25,6 @@ #include <iosfwd> #include <limits> -#include <ostream> #include <string> #include <type_traits>
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc index f76d2f7..2adfa94 100644 --- a/base/strings/string_util.cc +++ b/base/strings/string_util.cc
@@ -22,8 +22,8 @@ #include <vector> #include "polyfills/base/check_op.h" +#include "base/cxx17_backports.h" #include "base/no_destructor.h" -#include "base/stl_util.h" #include "base/strings/string_util_internal.h" #include "base/strings/utf_string_conversion_utils.h" #include "base/strings/utf_string_conversions.h"
diff --git a/base/strings/string_util.h b/base/strings/string_util.h index ccbf745..5995c2d 100644 --- a/base/strings/string_util.h +++ b/base/strings/string_util.h
@@ -20,6 +20,7 @@ #include "polyfills/base/base_export.h" #include "base/compiler_specific.h" #include "base/containers/span.h" +#include "base/cxx20_to_address.h" #include "base/strings/string_piece.h" // For implicit conversions. #include "build/build_config.h"
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc index f8326cc..231c3c8 100644 --- a/base/strings/string_util_unittest.cc +++ b/base/strings/string_util_unittest.cc
@@ -14,7 +14,7 @@ #include <type_traits> #include "base/bits.h" -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "base/strings/string_piece.h" #include "base/strings/utf_string_conversions.h" #include "build/build_config.h"
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc index e1a18c9..3f74e07 100644 --- a/base/strings/stringprintf.cc +++ b/base/strings/stringprintf.cc
@@ -9,9 +9,9 @@ #include <vector> +#include "base/cxx17_backports.h" #include "polyfills/base/logging.h" #include "base/scoped_clear_last_error.h" -#include "base/stl_util.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "build/build_config.h"
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc index 0775dc4..f50fa3f 100644 --- a/base/strings/utf_offset_string_conversions_unittest.cc +++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -6,7 +6,7 @@ #include <algorithm> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "base/strings/string_piece.h" #include "base/strings/utf_offset_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/utf_string_conversions.cc b/base/strings/utf_string_conversions.cc index 9595e7b..8cf90f1 100644 --- a/base/strings/utf_string_conversions.cc +++ b/base/strings/utf_string_conversions.cc
@@ -7,6 +7,7 @@ #include <limits.h> #include <stdint.h> +#include <ostream> #include <type_traits> #include "base/strings/string_piece.h"
diff --git a/base/strings/utf_string_conversions_unittest.cc b/base/strings/utf_string_conversions_unittest.cc index 752bf95..3b26fa8 100644 --- a/base/strings/utf_string_conversions_unittest.cc +++ b/base/strings/utf_string_conversions_unittest.cc
@@ -4,7 +4,7 @@ #include <stddef.h> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h"
diff --git a/base/template_util.h b/base/template_util.h index 78b52ee..d0803f8 100644 --- a/base/template_util.h +++ b/base/template_util.h
@@ -10,11 +10,14 @@ #include <iterator> #include <type_traits> #include <utility> -#include <vector> #include "base/compiler_specific.h" #include "build/build_config.h" +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 7 +#include <vector> +#endif + // Some versions of libstdc++ have partial support for type_traits, but misses // a smaller subset while removing some of the older non-standard stuff. Assume // that all versions below 5.0 fall in this category, along with one 5.0
diff --git a/copy.bara.sky b/copy.bara.sky index 1384bee..33c0f00 100644 --- a/copy.bara.sky +++ b/copy.bara.sky
@@ -19,6 +19,7 @@ "base/containers/span.h", "base/containers/util.h", "base/cxx17_backports.h", + "base/cxx20_to_address.h", "base/debug/leak_annotations.h", "base/functional/*.h", "base/i18n/uchar.h", @@ -63,7 +64,6 @@ "base/debug/alias.h", "base/export_template.h", "base/logging.h", - "base/metrics/histogram_macros.h", "base/notreached.h", "base/trace_event/memory_usage_estimator.h", "third_party/perfetto/include/perfetto/tracing/traced_value.h",
diff --git a/url/BUILD b/url/BUILD index 6ed3fc5..f2ec8da 100644 --- a/url/BUILD +++ b/url/BUILD
@@ -43,9 +43,7 @@ "url_file.h", "url_util.h", ], - copts = build_config.default_copts + [ - "-Wno-c++11-narrowing", - ], + copts = build_config.default_copts, linkopts = build_config.url_linkopts, visibility = ["//visibility:public"], deps = [
diff --git a/url/gurl.cc b/url/gurl.cc index 2d68889..18a46f1 100644 --- a/url/gurl.cc +++ b/url/gurl.cc
@@ -237,11 +237,8 @@ NULL, &output, &result.parsed_); output.Complete(); - if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_ = - std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true); - } + + ProcessFileOrFileSystemURLAfterReplaceComponents(result); return result; } @@ -260,14 +257,34 @@ NULL, &output, &result.parsed_); output.Complete(); - if (result.is_valid_ && result.SchemeIsFileSystem()) { - result.inner_url_ = - std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), - *result.parsed_.inner_parsed(), true); - } + + ProcessFileOrFileSystemURLAfterReplaceComponents(result); + return result; } +void GURL::ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const { + if (!url.is_valid_) + return; + if (url.SchemeIsFileSystem()) { + url.inner_url_ = + std::make_unique<GURL>(url.spec_.data(), url.parsed_.Length(), + *url.parsed_.inner_parsed(), true); + } +#ifdef WIN32 + if (url.SchemeIsFile()) { + // On Win32, some file URLs created through ReplaceComponents used to lose + // its hostname after getting reparsed (e.g. when it's sent through IPC) due + // to special handling of file URLs with Windows-drive paths in the URL + // parser. To make the behavior for URLs modified through ReplaceComponents + // (instead of getting fully reparsed) the same, immediately reparse the + // URL here to trigger the special handling. + // See https://crbug.com/1214098. + url = GURL(url.spec()); + } +#endif +} + GURL GURL::GetOrigin() const { // This doesn't make sense for invalid or nonstandard URLs, so return // the empty URL.
diff --git a/url/gurl.h b/url/gurl.h index 21e6611..c70c5a4 100644 --- a/url/gurl.h +++ b/url/gurl.h
@@ -468,6 +468,8 @@ return gurl_base::StringPiece(&spec_[comp.begin], comp.len); } + void ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const; + // The actual text of the URL, in canonical ASCII form. std::string spec_;
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc index 0b81da4..f3b9f3c 100644 --- a/url/gurl_unittest.cc +++ b/url/gurl_unittest.cc
@@ -4,7 +4,7 @@ #include <stddef.h> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "base/strings/string_number_conversions.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" @@ -561,12 +561,12 @@ GURL url(" data: one ? two # three "); // By default the trailing whitespace will have been stripped. - EXPECT_EQ("data: one ? two #%20three", url.spec()); + EXPECT_EQ("data: one ?%20two%20#%20three", url.spec()); GURL::Replacements repl; repl.ClearRef(); GURL url_no_ref = url.ReplaceComponents(repl); - EXPECT_EQ("data: one ? two ", url_no_ref.spec()); + EXPECT_EQ("data: one ?%20two%20", url_no_ref.spec()); // Importing a parsed URL via this constructor overload will retain trailing // whitespace. @@ -574,7 +574,7 @@ url_no_ref.parsed_for_possibly_invalid_spec(), url_no_ref.is_valid()); EXPECT_EQ(url_no_ref, import_url); - EXPECT_EQ(import_url.query(), " two "); + EXPECT_EQ(import_url.query(), "%20two%20"); } TEST(GURLTest, PathForRequest) { @@ -862,7 +862,7 @@ {"http://www.example.com/GUID#ref", "www.example.com/GUID"}, {"http://me:secret@example.com/GUID/#ref", "me:secret@example.com/GUID/"}, {"data:text/html,Question?<div style=\"color: #bad\">idea</div>", - "text/html,Question?<div style=\"color: "}, + "text/html,Question?%3Cdiv%20style=%22color:%20"}, // TODO(mkwst): This seems like a bug. https://crbug.com/513600 {"filesystem:http://example.com/path", "/"},
diff --git a/url/origin.cc b/url/origin.cc index 33e26f9..6c7915f 100644 --- a/url/origin.cc +++ b/url/origin.cc
@@ -7,6 +7,7 @@ #include <stdint.h> #include <algorithm> +#include <ostream> #include <vector> #include "base/base64.h" @@ -423,11 +424,11 @@ } // Moving a nonce does NOT trigger lazy-generation of the token. -Origin::Nonce::Nonce(Origin::Nonce&& other) : token_(other.token_) { +Origin::Nonce::Nonce(Origin::Nonce&& other) noexcept : token_(other.token_) { other.token_ = gurl_base::UnguessableToken(); // Reset |other|. } -Origin::Nonce& Origin::Nonce::operator=(Origin::Nonce&& other) { +Origin::Nonce& Origin::Nonce::operator=(Origin::Nonce&& other) noexcept { token_ = other.token_; other.token_ = gurl_base::UnguessableToken(); // Reset |other|. return *this;
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h index 0c53f82..82d1f55 100644 --- a/url/origin_abstract_tests.h +++ b/url/origin_abstract_tests.h
@@ -11,7 +11,6 @@ #include "base/containers/contains.h" #include "base/strings/string_piece.h" #include "testing/gtest/include/gtest/gtest.h" -#include "absl/types/optional.h" #include "url/gurl.h" #include "url/origin.h" #include "url/scheme_host_port.h"
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc index a9d3a4f..cb78bb6 100644 --- a/url/origin_unittest.cc +++ b/url/origin_unittest.cc
@@ -704,17 +704,19 @@ EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString()); } - // Same basic test as above, but without a GURL to create tuple_. - Origin opaque; - absl::optional<std::string> serialized = SerializeWithNonce(opaque); - ASSERT_TRUE(serialized); + { + // Same basic test as above, but without a GURL to create tuple_. + Origin opaque; + absl::optional<std::string> serialized = SerializeWithNonce(opaque); + ASSERT_TRUE(serialized); - absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); - ASSERT_TRUE(deserialized.has_value()); + absl::optional<Origin> deserialized = Deserialize(std::move(*serialized)); + ASSERT_TRUE(deserialized.has_value()); - // Can't use DoEqualityComparisons here since empty nonces are never == unless - // they are the same object. - EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString()); + // Can't use DoEqualityComparisons here since empty nonces are never == + // unless they are the same object. + EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString()); + } // Now force initialization of the nonce prior to serialization. for (const GURL& url : invalid_urls) {
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc index a1f415d..a8bde47 100644 --- a/url/scheme_host_port_unittest.cc +++ b/url/scheme_host_port_unittest.cc
@@ -5,7 +5,7 @@ #include <stddef.h> #include <stdint.h> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" #include "url/scheme_host_port.h"
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc index e8a1edb..8edb7f3 100644 --- a/url/third_party/mozilla/url_parse.cc +++ b/url/third_party/mozilla/url_parse.cc
@@ -38,6 +38,8 @@ #include <stdlib.h> +#include <ostream> + #include "polyfills/base/check_op.h" #include "url/url_parse_internal.h" #include "url/url_util.h"
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc index b278b15..abcf615 100644 --- a/url/url_canon_host.cc +++ b/url/url_canon_host.cc
@@ -3,10 +3,8 @@ // found in the LICENSE file. #include "polyfills/base/check.h" -#include "polyfills/base/metrics/histogram_macros.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" -#include "url/url_canon_ip.h" namespace url { @@ -379,16 +377,6 @@ if (host_info->IsIPAddress()) { output->set_length(output_begin); output->Append(canon_ip.data(), canon_ip.length()); - } else if (host_info->family == CanonHostInfo::NEUTRAL) { - // Only need to call CheckHostnameSafety() for valid hosts that aren't IP - // addresses and aren't broken. - HostSafetyStatus host_safety_status = CheckHostnameSafety(spec, host); - // Don't record kOK. Ratio of OK to not-OK statuses is not meaningful at - // this layer, and hostnames are canonicalized a lot. - if (host_safety_status != HostSafetyStatus::kOk) { - UMA_HISTOGRAM_ENUMERATION("Net.Url.HostSafetyStatus", - host_safety_status); - } } } else { // Canonicalization failed. Set BROKEN to notify the caller.
diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc index 7cd5cae..ca13427 100644 --- a/url/url_canon_icu_unittest.cc +++ b/url/url_canon_icu_unittest.cc
@@ -4,8 +4,8 @@ #include <stddef.h> +#include "base/cxx17_backports.h" #include "polyfills/base/logging.h" -#include "base/stl_util.h" #include "testing/gtest/include/gtest/gtest.h" #include <unicode/ucnv.h> #include "url/url_canon.h"
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc index ab56e7b..99541bd 100644 --- a/url/url_canon_internal.cc +++ b/url/url_canon_internal.cc
@@ -427,7 +427,7 @@ } for (int i = 0; i < written; ++i) { - buffer[i] = char16_t{temp[i]}; + buffer[i] = static_cast<char16_t>(temp[i]); } buffer[written] = '\0'; return 0;
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc index 8234b4e..f0552b5 100644 --- a/url/url_canon_ip.cc +++ b/url/url_canon_ip.cc
@@ -6,11 +6,10 @@ #include <stdint.h> #include <stdlib.h> + #include <limits> #include "polyfills/base/check.h" -#include "base/strings/string_piece.h" -#include "base/strings/string_util.h" #include "url/url_canon_internal.h" namespace url { @@ -32,56 +31,6 @@ } } -template<typename CHAR, typename UCHAR> -bool DoFindIPv4Components(const CHAR* spec, - const Component& host, - Component components[4]) { - if (!host.is_nonempty()) - return false; - - int cur_component = 0; // Index of the component we're working on. - int cur_component_begin = host.begin; // Start of the current component. - int end = host.end(); - for (int i = host.begin; /* nothing */; i++) { - if (i >= end || spec[i] == '.') { - // Found the end of the current component. - int component_len = i - cur_component_begin; - components[cur_component] = Component(cur_component_begin, component_len); - - // The next component starts after the dot. - cur_component_begin = i + 1; - cur_component++; - - // Don't allow empty components (two dots in a row), except we may - // allow an empty component at the end (this would indicate that the - // input ends in a dot). We also want to error if the component is - // empty and it's the only component (cur_component == 1). - if (component_len == 0 && (i < end || cur_component == 1)) - return false; - - if (i >= end) - break; // End of the input. - - if (cur_component == 4) { - // Anything else after the 4th component is an error unless it is a - // dot that would otherwise be treated as the end of input. - if (spec[i] == '.' && i + 1 == end) - break; - return false; - } - } else if (static_cast<UCHAR>(spec[i]) >= 0x80 || - !IsIPv4Char(static_cast<unsigned char>(spec[i]))) { - // Invalid character for an IPv4 address. - return false; - } - } - - // Fill in any unused components. - while (cur_component < 4) - components[cur_component++] = Component(); - return true; -} - // Converts an IPv4 component to a 32-bit number, while checking for overflow. // // Possible return values: @@ -89,13 +38,15 @@ // - BROKEN - The input was numeric, but too large for a 32-bit field. // - NEUTRAL - Input was not numeric. // -// The input is assumed to be ASCII. FindIPv4Components should have stripped -// out any input that is greater than 7 bits. The components are assumed -// to be non-empty. +// The input is assumed to be ASCII. The components are assumed to be non-empty. template<typename CHAR> CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec, const Component& component, uint32_t* number) { + // Empty components are considered non-numeric. + if (!component.is_nonempty()) + return CanonHostInfo::NEUTRAL; + // Figure out the base SharedCharTypes base; int base_prefix_len = 0; // Size of the prefix for this base. @@ -127,14 +78,25 @@ const int kMaxComponentLen = 16; char buf[kMaxComponentLen + 1]; // digits + '\0' int dest_i = 0; + bool may_be_broken_octal = false; for (int i = component.begin + base_prefix_len; i < component.end(); i++) { + if (spec[i] >= 0x80) + return CanonHostInfo::NEUTRAL; + // We know the input is 7-bit, so convert to narrow (if this is the wide // version of the template) by casting. char input = static_cast<char>(spec[i]); // Validate that this character is OK for the given base. - if (!IsCharOfType(input, base)) - return CanonHostInfo::NEUTRAL; + if (!IsCharOfType(input, base)) { + if (IsCharOfType(input, CHAR_DEC)) { + // Entirely numeric components with leading 0s that aren't octal are + // considered broken. + may_be_broken_octal = true; + } else { + return CanonHostInfo::NEUTRAL; + } + } // Fill the buffer, if there's space remaining. This check allows us to // verify that all characters are numeric, even those that don't fit. @@ -142,6 +104,9 @@ buf[dest_i++] = input; } + if (may_be_broken_octal) + return CanonHostInfo::BROKEN; + buf[dest_i] = '\0'; // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal @@ -158,64 +123,76 @@ } // See declaration of IPv4AddressToNumber for documentation. -template<typename CHAR> +template <typename CHAR, typename UCHAR> CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec, - const Component& host, + Component host, unsigned char address[4], int* num_ipv4_components) { - // The identified components. Not all may exist. - Component components[4]; - if (!FindIPv4Components(spec, host, components)) + // Ignore terminal dot, if present. + if (host.is_nonempty() && spec[host.end() - 1] == '.') + --host.len; + + // Do nothing if empty. + if (!host.is_nonempty()) return CanonHostInfo::NEUTRAL; - // Convert existing components to digits. Values up to - // |existing_components| will be valid. + // Read component values. The first `existing_components` of them are + // populated front to back, with the first one corresponding to the last + // component, which allows for early exit if the last component isn't a + // number. uint32_t component_values[4]; int existing_components = 0; - // Set to true if one or more components are BROKEN. BROKEN is only - // returned if all components are IPV4 or BROKEN, so, for example, - // 12345678912345.de returns NEUTRAL rather than broken. - bool broken = false; - for (int i = 0; i < 4; i++) { - if (components[i].len <= 0) + int current_component_end = host.end(); + int current_position = current_component_end; + while (true) { + // If this is not the first character of a component, go to the next + // component. + if (current_position != host.begin && spec[current_position - 1] != '.') { + --current_position; continue; - CanonHostInfo::Family family = IPv4ComponentToNumber( - spec, components[i], &component_values[existing_components]); - - if (family == CanonHostInfo::BROKEN) { - broken = true; - } else if (family != CanonHostInfo::IPV4) { - // Stop if we hit a non-BROKEN invalid non-empty component. - return family; } - existing_components++; + CanonHostInfo::Family family = IPv4ComponentToNumber( + spec, + Component(current_position, current_component_end - current_position), + &component_values[existing_components]); + + // If `family` is NEUTRAL and this is the last component, return NEUTRAL. If + // `family` is NEUTRAL but not the last component, this is considered a + // BROKEN IPv4 address, as opposed to a non-IPv4 hostname. + if (family == CanonHostInfo::NEUTRAL && existing_components == 0) + return CanonHostInfo::NEUTRAL; + + if (family != CanonHostInfo::IPV4) + return CanonHostInfo::BROKEN; + + ++existing_components; + + // If this is the final component, nothing else to do. + if (current_position == host.begin) + break; + + // If there are more than 4 components, fail. + if (existing_components == 4) + return CanonHostInfo::BROKEN; + + current_component_end = current_position - 1; + --current_position; } - if (broken) - return CanonHostInfo::BROKEN; - - // Use that sequence of numbers to fill out the 4-component IP address. + // Use `component_values` to fill out the 4-component IP address. // First, process all components but the last, while making sure each fits // within an 8-bit field. - for (int i = 0; i < existing_components - 1; i++) { + for (int i = existing_components - 1; i > 0; i--) { if (component_values[i] > std::numeric_limits<uint8_t>::max()) return CanonHostInfo::BROKEN; - address[i] = static_cast<unsigned char>(component_values[i]); + address[existing_components - i - 1] = + static_cast<unsigned char>(component_values[i]); } - // Next, consume the last component to fill in the remaining bytes. - // Work around a gcc 4.9 bug. crbug.com/392872 -#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - uint32_t last_value = component_values[existing_components - 1]; -#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) -#pragma GCC diagnostic pop -#endif + uint32_t last_value = component_values[0]; for (int i = 3; i >= existing_components - 1; i--) { address[i] = static_cast<unsigned char>(last_value); last_value >>= 8; @@ -595,105 +572,6 @@ return true; } -// Method to check if something looks like a number. Used instead of -// IPv4ComponentToNumber() so that it counts things that look like bad base-8 -// (e.g. 09). -// -// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. -template <typename CHAR> -bool LooksLikeANumber(const CHAR* spec, const Component& component) { - // Empty components don't look like numbers. - if (!component.is_nonempty()) - return false; - - SharedCharTypes base = CHAR_DEC; - size_t start = component.begin; - if (component.len >= 2 && spec[start] == '0' && - (spec[start + 1] == 'x' || spec[start + 1] == 'X')) { - base = CHAR_HEX; - start += 2; - } - for (int i = start; i < component.end(); i++) { - if (!IsCharOfType(spec[i], base)) - return false; - } - return true; -} - -// Calculates the "HostSafetyStatus" of the provided hostname. -// -// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. -template <typename CHAR> -HostSafetyStatus DoCheckHostnameSafety(const CHAR* spec, - const Component& host) { - if (!host.is_nonempty()) - return HostSafetyStatus::kOk; - - // Find the last two components. - - // Number of identified components. Stops after second component. Does not - // include the empty terminal component, if the host ends with a dot. - int existing_components = 0; - // Parsed component values. Populated last component first. - Component components[2]; - - // Index of the character after the end of the current component. - int cur_component_end = host.end(); - - // Ignore terminal dot, if there is one. - if (spec[cur_component_end - 1] == '.') { - cur_component_end--; - // Nothing else to do if the host is just a dot. - if (host.begin == cur_component_end) - return HostSafetyStatus::kOk; - } - - for (int i = cur_component_end; /* nothing */; i--) { - GURL_DCHECK_GE(i, host.begin); - - // If `i` is not the first character of the component, continue. - if (i != host.begin && spec[i - 1] != '.') - continue; - - // Otherwise, i is the index of the the start of a component. - components[existing_components] = Component(i, cur_component_end - i); - existing_components++; - - // Finished parsing last component. - if (i == host.begin) - break; - - // If there's anything left to parse after the 2th component, nothing more - // to do. - if (existing_components == 2) - break; - - // The next component ends before the dot at spec[i]. `i` will be - // decremented when restarting the loop, so no need to modify it. - cur_component_end = i - 1; - } - - // If the last value doesn't look like a number, no need to do more work, as - // IPv6 and hostnames with non-numeric final components are all considered OK. - if (!LooksLikeANumber(spec, components[0])) - return HostSafetyStatus::kOk; - - url::RawCanonOutputT<char> ignored_output; - CanonHostInfo host_info; - CanonicalizeIPAddress(spec, host, &ignored_output, &host_info); - // Ignore valid IPv4 addresses, and hostnames considered invalid by the IPv4 - // and IPv6 parsers. The IPv6 check doesn't provide a whole lot, but does mean - // things like "].6" will correctly be considered already invalid, so will - // return kOk. - if (host_info.family != CanonHostInfo::NEUTRAL) - return HostSafetyStatus::kOk; - - if (LooksLikeANumber(spec, components[1])) - return HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric; - - return HostSafetyStatus::kTopLevelDomainIsNumeric; -} - } // namespace void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) { @@ -745,18 +623,6 @@ } } -bool FindIPv4Components(const char* spec, - const Component& host, - Component components[4]) { - return DoFindIPv4Components<char, unsigned char>(spec, host, components); -} - -bool FindIPv4Components(const char16_t* spec, - const Component& host, - Component components[4]) { - return DoFindIPv4Components<char16_t, char16_t>(spec, host, components); -} - void CanonicalizeIPAddress(const char* spec, const Component& host, CanonOutput* output, @@ -785,15 +651,16 @@ const Component& host, unsigned char address[4], int* num_ipv4_components) { - return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components); + return DoIPv4AddressToNumber<char, unsigned char>(spec, host, address, + num_ipv4_components); } CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec, const Component& host, unsigned char address[4], int* num_ipv4_components) { - return DoIPv4AddressToNumber<char16_t>(spec, host, address, - num_ipv4_components); + return DoIPv4AddressToNumber<char16_t, char16_t>(spec, host, address, + num_ipv4_components); } bool IPv6AddressToNumber(const char* spec, @@ -808,13 +675,4 @@ return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address); } -HostSafetyStatus CheckHostnameSafety(const char* spec, const Component& host) { - return DoCheckHostnameSafety(spec, host); -} - -HostSafetyStatus CheckHostnameSafety(const char16_t* spec, - const Component& host) { - return DoCheckHostnameSafety(spec, host); -} - } // namespace url
diff --git a/url/url_canon_ip.h b/url/url_canon_ip.h index 8980dbb..4e85466 100644 --- a/url/url_canon_ip.h +++ b/url/url_canon_ip.h
@@ -6,7 +6,6 @@ #define URL_URL_CANON_IP_H_ #include "polyfills/base/component_export.h" -#include "base/strings/string_piece_forward.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" @@ -20,33 +19,6 @@ COMPONENT_EXPORT(URL) void AppendIPv6Address(const unsigned char address[16], CanonOutput* output); -// Searches the host name for the portions of the IPv4 address. On success, -// each component will be placed into |components| and it will return true. -// It will return false if the host can not be separated as an IPv4 address -// or if there are any non-7-bit characters or other characters that can not -// be in an IP address. (This is important so we fail as early as possible for -// common non-IP hostnames.) -// -// Not all components may exist. If there are only 3 components, for example, -// the last one will have a length of -1 or 0 to indicate it does not exist. -// -// Note that many platforms' inet_addr will ignore everything after a space -// in certain circumstances if the stuff before the space looks like an IP -// address. IE6 is included in this. We do NOT handle this case. In many cases, -// the browser's canonicalization will get run before this which converts -// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla), -// so this code path never gets hit. Our host canonicalization will notice -// these spaces and escape them, which will make IP address finding fail. This -// seems like better behavior than stripping after a space. -COMPONENT_EXPORT(URL) -bool FindIPv4Components(const char* spec, - const Component& host, - Component components[4]); -COMPONENT_EXPORT(URL) -bool FindIPv4Components(const char16_t* spec, - const Component& host, - Component components[4]); - // Converts an IPv4 address to a 32-bit number (network byte order). // // Possible return values: @@ -83,48 +55,6 @@ const Component& host, unsigned char address[16]); -// Temporary enum for collecting histograms at the DNS and URL level about -// hostname validity, for potentially updating the URL spec. -// -// This is used in histograms, so old values should not be reused, and new -// values should be added at the bottom. -// -// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. -enum class HostSafetyStatus { - // Any canonical hostname that doesn't fit into any other class. IPv4 - // hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are - // broken are also considered OK. - kOk = 0, - - // The top level domain looks numeric. This is basically means it either - // parses as a number per the URL spec, or is entirely numeric ("09" doesn't - // currently parse as a number, since the leading "0" indicates an octal - // value). - kTopLevelDomainIsNumeric = 1, - - // Both the top level domain and the next level domain look like a number, - // using the above definition. This is the case that is actually concerning - - // for these domains, the eTLD+1 is purely numeric, which means putting it as - // the hostname of a URL will potentially result in an IPv4 hostname. This is - // logically a subset of kTopLevelDomainIsNumeric, but when both apply, this - // label will be returned instead. - kTwoHighestLevelDomainsAreNumeric = 2, - - kMaxValue = kTwoHighestLevelDomainsAreNumeric, -}; - -// Calculates the HostSafetyStatus of a hostname. Hostname should have been -// canonicalized. This function is only intended to be temporary, to inform -// decisions around tightening up what the URL parser considers valid hostnames. -// -// TODO(https://crbug.com/1149194): Remove this once the bug is fixed. -COMPONENT_EXPORT(URL) -HostSafetyStatus CheckHostnameSafety(const char* hostname, - const Component& host); -COMPONENT_EXPORT(URL) -HostSafetyStatus CheckHostnameSafety(const char16_t* hostname, - const Component& host); - } // namespace url #endif // URL_URL_CANON_IP_H_
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc index 134e132..e726cfb 100644 --- a/url/url_canon_pathurl.cc +++ b/url/url_canon_pathurl.cc
@@ -63,14 +63,17 @@ new_parsed->host.reset(); new_parsed->port.reset(); - // Canonicalize path and query via the weaker path URL rules. + // Canonicalize path via the weaker path URL rules. // // Note: parsing the path part should never cause a failure, see // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0', output, &new_parsed->path); - DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?', - output, &new_parsed->query); + + // Similar to mailto:, always use the default UTF-8 charset converter for + // query. + CanonicalizeQuery(source.query, parsed.query, nullptr, output, + &new_parsed->query); CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc index a59c745..aa2a8ce 100644 --- a/url/url_canon_unittest.cc +++ b/url/url_canon_unittest.cc
@@ -5,14 +5,13 @@ #include <errno.h> #include <stddef.h> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "base/strings/utf_string_conversions.h" #include "base/test/gtest_util.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h" #include "url/url_canon_internal.h" -#include "url/url_canon_ip.h" #include "url/url_canon_stdstring.h" #include "url/url_test_utils.h" @@ -609,21 +608,36 @@ } TEST(URLCanonTest, IPv4) { + // clang-format off IPAddressCase cases[] = { - // Empty is not an IP address. + // Empty is not an IP address. {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, {".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Regular IP addresses in different bases. + // Regular IP addresses in different bases. {"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - // Non-IP addresses due to invalid characters. + // Non-IP addresses due to invalid characters. {"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid characters for the base should be rejected. - {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // If there are not enough components, the last one should fill them out. + // Hostnames with a numeric final component but other components that don't + // parse as numbers should be considered broken. + {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"19a.168.0.1.", L"19a.168.0.1.", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"0308.0250.00.01.", L"0308.0250.00.01.", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"0xCG.0xA8.0x0.0x1.", L"0xCG.0xA8.0x0.0x1.", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + // Non-numeric terminal compeonent should be considered not IPv4 hostnames, but valid. + {"19.168.0.1a", L"19.168.0.1a", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + {"0xC.0xA8.0x0.0x1G", L"0xC.0xA8.0x0.0x1G", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + // Hostnames that would be considered broken IPv4 hostnames should be considered valid non-IPv4 hostnames if they end with two dots instead of 0 or 1. + {"19a.168.0.1..", L"19a.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + {"0308.0250.00.01..", L"0308.0250.00.01..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + {"0xCG.0xA8.0x0.0x1..", L"0xCG.0xA8.0x0.0x1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + // Hosts with components that aren't considered valid IPv4 numbers but are entirely numeric should be considered invalid. + {"1.2.3.08", L"1.2.3.08", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"1.2.3.08.", L"1.2.3.08.", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + // If there are not enough components, the last one should fill them out. {"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"}, {"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, {"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, @@ -632,15 +646,16 @@ {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"}, {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"}, {"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - // Too many components means not an IP address. - {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // We allow a single trailing dot. + // Hostnames with too many components, but a numeric final numeric component are invalid. + {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + // We allow a single trailing dot. {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, {"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, {"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Two dots in a row means not an IP address. - {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Any numerical overflow should be marked as BROKEN. + // Hosts with two dots in a row with a final numeric component are considered invalid. + {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"192.168..1.", L"192.168..1.", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + // Any numerical overflow should be marked as BROKEN. {"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, @@ -650,7 +665,7 @@ {"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Repeat the previous tests, minus 1, to verify boundaries. + // Repeat the previous tests, minus 1, to verify boundaries. {"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"}, {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"}, {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"}, @@ -660,52 +675,69 @@ {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"}, {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"}, {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"}, - // Old trunctations tests. They're all "BROKEN" now. + // Old trunctations tests. They're all "BROKEN" now. {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, {"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Spaces should be rejected. + // Too many components should be rejected, in valid ranges or not. + {"255.255.255.255.255", L"255.255.255.255.255", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"256.256.256.256.256", L"256.256.256.256.256", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + // Spaces should be rejected. {"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Very large numbers. + // Very large numbers. {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"}, {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, - // A number has no length limit, but long numbers can still overflow. + // A number has no length limit, but long numbers can still overflow. {"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"}, {"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // If a long component is non-numeric, it's a hostname, *not* a broken IP. + // If a long component is non-numeric, it's a hostname, *not* a broken IP. {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Truncation of all zeros should still result in 0. + // Truncation of all zeros should still result in 0. {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"}, + // Non-ASCII characters in final component should return NEUTRAL. + {"1.2.3.\xF0\x9F\x92\xA9", L"1.2.3.\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + {"1.2.3.4\xF0\x9F\x92\xA9", L"1.2.3.4\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + {"1.2.3.0x\xF0\x9F\x92\xA9", L"1.2.3.0x\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + {"1.2.3.0\xF0\x9F\x92\xA9", L"1.2.3.0\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, + // Non-ASCII characters in other components should result in broken IPs when final component is numeric. + {"1.2.\xF0\x9F\x92\xA9.4", L"1.2.\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"1.2.3\xF0\x9F\x92\xA9.4", L"1.2.3\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"1.2.0x\xF0\x9F\x92\xA9.4", L"1.2.0x\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"1.2.0\xF0\x9F\x92\xA9.4", L"1.2.0\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""}, + {"\xF0\x9F\x92\xA9.2.3.4", L"\xD83D\xDCA9.2.3.4", "", Component(), CanonHostInfo::BROKEN, -1, ""}, }; + // clang-format on - for (size_t i = 0; i < gurl_base::size(cases); i++) { + for (const auto& test_case : cases) { + SCOPED_TRACE(test_case.input8); + // 8-bit version. - Component component(0, static_cast<int>(strlen(cases[i].input8))); + Component component(0, static_cast<int>(strlen(test_case.input8))); std::string out_str1; StdStringCanonOutput output1(&out_str1); CanonHostInfo host_info; - CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info); + CanonicalizeIPAddress(test_case.input8, component, &output1, &host_info); output1.Complete(); - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), + EXPECT_EQ(test_case.expected_family, host_info.family); + EXPECT_EQ(std::string(test_case.expected_address_hex), BytesToHexString(host_info.address, host_info.AddressLength())); if (host_info.family == CanonHostInfo::IPV4) { - EXPECT_STREQ(cases[i].expected, out_str1.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(cases[i].expected_num_ipv4_components, + EXPECT_STREQ(test_case.expected, out_str1.c_str()); + EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin); + EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len); + EXPECT_EQ(test_case.expected_num_ipv4_components, host_info.num_ipv4_components); } // 16-bit version. std::u16string input16( - test_utils::TruncateWStringToUTF16(cases[i].input16)); + test_utils::TruncateWStringToUTF16(test_case.input16)); component = Component(0, static_cast<int>(input16.length())); std::string out_str2; @@ -713,14 +745,14 @@ CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info); output2.Complete(); - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), + EXPECT_EQ(test_case.expected_family, host_info.family); + EXPECT_EQ(std::string(test_case.expected_address_hex), BytesToHexString(host_info.address, host_info.AddressLength())); if (host_info.family == CanonHostInfo::IPV4) { - EXPECT_STREQ(cases[i].expected, out_str2.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(cases[i].expected_num_ipv4_components, + EXPECT_STREQ(test_case.expected, out_str2.c_str()); + EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin); + EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len); + EXPECT_EQ(test_case.expected_num_ipv4_components, host_info.num_ipv4_components); } } @@ -2529,115 +2561,4 @@ output.set_length(0); } -TEST(URLCanonTest, URLSafetyStatus) { - const struct { - const char* host; - HostSafetyStatus expected_safety_status; - } kTestCases[] = { - // Empty components are ok. - {"", HostSafetyStatus::kOk}, - {".", HostSafetyStatus::kOk}, - {"..", HostSafetyStatus::kOk}, - - // Hostnames with purely non-numeric components are ok. - {"com", HostSafetyStatus::kOk}, - {"a.com", HostSafetyStatus::kOk}, - {"a.b.com", HostSafetyStatus::kOk}, - - // Hostnames with components with letters and numbers are ok. - {"1com", HostSafetyStatus::kOk}, - {"0a.0com", HostSafetyStatus::kOk}, - {"0xa.0xb.0xcom", HostSafetyStatus::kOk}, - {"com1", HostSafetyStatus::kOk}, - {"a1.com1", HostSafetyStatus::kOk}, - {"a1.b1.com1", HostSafetyStatus::kOk}, - - // Hostnames components that are numbers that are before a final - // non-numeric component are ok. - {"1.com", HostSafetyStatus::kOk}, - {"0.1.2com", HostSafetyStatus::kOk}, - - // Invalid hostnames are ok. - {"[", HostSafetyStatus::kOk}, - - // IPv6 hostnames are ok. - {"[::]", HostSafetyStatus::kOk}, - {"[2001:db8::1]", HostSafetyStatus::kOk}, - - // IPv4 hostnames are ok. - {"1.2.3.4", HostSafetyStatus::kOk}, - // IPv4 hostnames with creative representations are ok. - {"01.02.03.04", HostSafetyStatus::kOk}, - {"0x1.0x2.0x3.0x4", HostSafetyStatus::kOk}, - {"1.2", HostSafetyStatus::kOk}, - {"1.2.3", HostSafetyStatus::kOk}, - {"0", HostSafetyStatus::kOk}, - {"0x0", HostSafetyStatus::kOk}, - {"07", HostSafetyStatus::kOk}, - - // Hostnames with a final problematic top level domain. - {"a.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.123", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.123456", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.999999999999999999", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.0x1", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.0xabcdef", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.0XABCDEF", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.07", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a.09", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {".0", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"foo.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"1.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"a..0", HostSafetyStatus::kTopLevelDomainIsNumeric}, - {"1..0", HostSafetyStatus::kTopLevelDomainIsNumeric}, - - // Hostnames with problematic two highest level domains. - {"a.1.2", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, - {"a.0x1.0x2f", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, - {"a.06.09", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric}, - }; - - for (const auto& test_case : kTestCases) { - // Test with ASCII. - SCOPED_TRACE(test_case.host); - EXPECT_EQ(test_case.expected_safety_status, - CheckHostnameSafety(test_case.host, - Component(0, strlen(test_case.host)))); - - // Test with ASCII and terminal dot, which shouldn't affect results for - // anything that doesn't already end in a dot (or anything that only has - // dots). - std::string host_with_dot = test_case.host; - host_with_dot += "."; - EXPECT_EQ(test_case.expected_safety_status, - CheckHostnameSafety(host_with_dot.c_str(), - Component(0, host_with_dot.size()))); - - // Test with ASCII and characters that are not part of the component. - std::string host_with_bonus_characters = test_case.host; - host_with_bonus_characters = "00" + host_with_bonus_characters + "00"; - EXPECT_EQ(test_case.expected_safety_status, - CheckHostnameSafety(host_with_bonus_characters.c_str(), - Component(2, strlen(test_case.host)))); - - // Test with UTF-16. - std::u16string utf16 = gurl_base::UTF8ToUTF16(test_case.host); - EXPECT_EQ(test_case.expected_safety_status, - CheckHostnameSafety(utf16.c_str(), Component(0, utf16.size()))); - - // Test with UTF-16 and terminal dot. - std::u16string utf16_with_dot = gurl_base::UTF8ToUTF16(host_with_dot); - EXPECT_EQ(test_case.expected_safety_status, - CheckHostnameSafety(utf16_with_dot.c_str(), - Component(0, utf16_with_dot.size()))); - - // Test with UTF-16 and characters that are not part of the component. - std::u16string utf16_with_bonus_characters = - gurl_base::UTF8ToUTF16(host_with_bonus_characters); - EXPECT_EQ(test_case.expected_safety_status, - CheckHostnameSafety(utf16_with_bonus_characters.c_str(), - Component(2, utf16.size()))); - } -} - } // namespace url
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc index 4029d61..381d74e 100644 --- a/url/url_idna_icu.cc +++ b/url/url_idna_icu.cc
@@ -11,7 +11,6 @@ #include <ostream> #include "polyfills/base/check_op.h" -#include "base/no_destructor.h" #include <unicode/uidna.h> #include <unicode/utypes.h> #include "url/url_canon_icu.h" @@ -19,10 +18,8 @@ namespace url { -namespace { - -// A wrapper to use gurl_base::NoDestructor with ICU's UIDNA, a C pointer to -// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). +// Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with +// uidna_openUTS46(). // // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned // code points allowed) to IDNA 2008 with @@ -42,12 +39,12 @@ // http://goo.gl/3XBhqw ). // See http://http://unicode.org/reports/tr46/ and references therein // for more details. -struct UIDNAWrapper { - UIDNAWrapper() { +UIDNA* GetUIDNA() { + static UIDNA* uidna = [] { UErrorCode err = U_ZERO_ERROR; // TODO(jungshik): Change options as different parties (browsers, // registrars, search engines) converge toward a consensus. - value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); + UIDNA* value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); if (U_FAILURE(err)) { GURL_CHECK(false) << "failed to open UTS46 data with error: " << u_errorName(err) @@ -56,16 +53,9 @@ << "tables for libicu. See https://crbug.com/778929."; value = nullptr; } - } - - UIDNA* value; -}; - -} // namespace - -UIDNA* GetUIDNA() { - static gurl_base::NoDestructor<UIDNAWrapper> uidna_wrapper; - return uidna_wrapper->value; + return value; + }(); + return uidna; } // Converts the Unicode input representing a hostname to ASCII using IDN rules.
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc index b67b550..b23dcf8 100644 --- a/url/url_parse_unittest.cc +++ b/url/url_parse_unittest.cc
@@ -6,7 +6,7 @@ #include <stddef.h> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/third_party/mozilla/url_parse.h" @@ -491,7 +491,7 @@ struct FileCase { const char* input; const char* expected; - } file_cases[] = { + } extract_cases[] = { {"http://www.google.com", nullptr}, {"http://www.google.com/", ""}, {"http://www.google.com/search", "search"}, @@ -509,8 +509,8 @@ {"http://www.google.com/foo;bar;html", "foo"}, }; - for (size_t i = 0; i < gurl_base::size(file_cases); i++) { - const char* url = file_cases[i].input; + for (size_t i = 0; i < gurl_base::size(extract_cases); i++) { + const char* url = extract_cases[i].input; int len = static_cast<int>(strlen(url)); Parsed parsed; @@ -519,7 +519,7 @@ Component file_name; ExtractFileName(url, parsed.path, &file_name); - EXPECT_TRUE(ComponentMatches(url, file_cases[i].expected, file_name)); + EXPECT_TRUE(ComponentMatches(url, extract_cases[i].expected, file_name)); } }
diff --git a/url/url_util.cc b/url/url_util.cc index 0c35913..470da30 100644 --- a/url/url_util.cc +++ b/url/url_util.cc
@@ -6,7 +6,9 @@ #include <stddef.h> #include <string.h> + #include <atomic> +#include <ostream> #include "polyfills/base/check_op.h" #include "base/compiler_specific.h"
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc index 3dcfa76..158e3d6 100644 --- a/url/url_util_unittest.cc +++ b/url/url_util_unittest.cc
@@ -4,7 +4,7 @@ #include <stddef.h> -#include "base/stl_util.h" +#include "base/cxx17_backports.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/third_party/mozilla/url_parse.h" #include "url/url_canon.h"