Update googleurl from Chromium upstream
Uses revision c0807c09e6ff496fd42d13a6189214ca83051cec
from Tue Aug 31 18:40:50 2021
diff --git a/AUTHORS b/AUTHORS
index 4bb6b20..1aa2922 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -43,6 +43,7 @@
Aku Kotkavuo <a.kotkavuo@partner.samsung.com>
Aldo Culquicondor <alculquicondor@gmail.com>
Aleksandar Stojiljkovic <aleksandar.stojiljkovic@intel.com>
+Aleksei Gurianov <gurianov@gmail.com>
Alex Chronopoulos <achronop@gmail.com>
Alex Gabriel <minilogo@gmail.com>
Alex Gartrell <agartrell@cmu.edu>
@@ -126,6 +127,7 @@
Asami Doi <d0iasm.pub@gmail.com>
Ashish Kumar Gupta <guptaag@amazon.com>
Ashlin Joseph <ashlin.j@samsung.com>
+Ashutosh <coder.commando@gmail.com>
Asish Singh <asish.singh@samsung.com>
Attila Dusnoki <dati91@gmail.com>
Avinaash Doreswamy <avi.nitk@samsung.com>
@@ -210,6 +212,7 @@
Chris Tserng <tserng@amazon.com>
Chris Vasselli <clindsay@gmail.com>
Chris Ye <hawkoyates@gmail.com>
+Christoph Durschang <christoph142@gmx.com>
Christophe Dumez <ch.dumez@samsung.com>
Christopher Dale <chrelad@gmail.com>
Chunbo Hua <chunbo.hua@intel.com>
@@ -286,6 +289,7 @@
Donghee Na <corona10@gmail.com>
Dong-hee Na <donghee.na92@gmail.com>
Dongie Agnir <dongie.agnir@gmail.com>
+Dongjun Kim <deejay.kim@navercorp.com>
Dongjun Kim <djmix.kim@samsung.com>
Dongseong Hwang <dongseong.hwang@intel.com>
Dongwoo Joshua Im <dw.im@samsung.com>
@@ -308,11 +312,14 @@
Elan Ruusamäe <elan.ruusamae@gmail.com>
Ergun Erdogmus <erdogmusergun@gmail.com>
Eric Ahn <byungwook.ahn@gmail.com>
+Eric Huang <ele828@gmail.com>
Eric Rescorla <ekr@rtfm.com>
Erik Hill <erikghill@gmail.com>
+Erik Kurzinger <ekurzinger@gmail.com>
Erik Sjölund <erik.sjolund@gmail.com>
Eriq Augustine <eriq.augustine@gmail.com>
Ernesto Mudu <ernesto.mudu@gmail.com>
+Ethan Wong <bunnnywong@gmail.com>
Etienne Laurin <etienne@atnnn.com>
Eugene Kim <eugene70kim@gmail.com>
Eugene Sudin <eugene@sudin.pro>
@@ -415,6 +422,7 @@
HyunJi Kim <hjkim3323@gmail.com>
Hyunjun Shin <hyunjun.shin2@navercorp.com>
Hyunjun Shin <shjj1504@gmail.com>
+Hyunjune Kim <hyunjune.kim@navercorp.com>
Hyunjune Kim <hyunjune.kim@samsung.com>
Hyunki Baik <hyunki.baik@samsung.com>
Ian Cullinan <cullinan@amazon.com>
@@ -479,6 +487,7 @@
Jerry Lin <wahahab11@gmail.com>
Jerry Zhang <zhj8407@gmail.com>
Jesper Storm Bache <jsbache@gmail.com>
+Jesper van den Ende <jespertheend@gmail.com>
Jesse Miller <jesse@jmiller.biz>
Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com>
Jiadong Chen <chenjiadong@huawei.com>
@@ -547,6 +556,7 @@
Josué Ratelle <jorat1346@gmail.com>
Josyula Venkat Narasimham <venkat.nj@samsung.com>
Joyer Huang <collger@gmail.com>
+Juan Cruz Viotti <jv@jviotti.com>
Juan Jose Lopez Jaimez <jj.lopezjaimez@gmail.com>
Juhui Lee <juhui24.lee@samsung.com>
Julian Geppert <spctstr@gmail.com>
@@ -562,6 +572,7 @@
Jungkee Song <jungkee.song@samsung.com>
Junmin Zhu <junmin.zhu@intel.com>
Junsong Li <ljs.darkfish@gmail.com>
+Jun Zeng <hjunzeng6@gmail.com>
Justin Okamoto <justmoto@amazon.com>
Justin Ribeiro <justin@justinribeiro.com>
Jüri Valdmann <juri.valdmann@qt.io>
@@ -657,6 +668,7 @@
Lucie Brozkova <lucinka.brozkova@gmail.com>
Luiz Von Dentz <luiz.von.dentz@intel.com>
Luka Dojcilovic <l.dojcilovic@gmail.com>
+Lukas Lihotzki <lukas@lihotzki.de>
Lukasz Krakowiak <lukasz.krakowiak@mobica.com>
Luke Inman-Semerau <luke.semerau@gmail.com>
Luke Seunghoe Gu <gulukesh@gmail.com>
@@ -714,7 +726,9 @@
Matthias Reitinger <reimarvin@gmail.com>
Matthieu Rigolot <matthieu.rigolot@gmail.com>
Matthieu Vlad Hauglustaine <matt.hauglustaine@gmail.com>
+Max Karolinskiy <max@brave.com>
Max Perepelitsyn <pph34r@gmail.com>
+Max Schmitt <max@schmitt.mx>
Max Vujovic <mvujovic@adobe.com>
Mayank Gupta <mayank.g1@samsung.com>
Mayur Kankanwadi <mayurk.vk@samsung.com>
@@ -732,6 +746,7 @@
Michael Müller <michael@fds-team.de>
Michael Schechter <mike.schechter@gmail.com>
Michael Smith <sideshowbarker@gmail.com>
+Michael Weiss <dev.primeos@gmail.com>
Michaël Zasso <mic.besace@gmail.com>
Michael Zugelder <michael@zugelder.org>
Michel Promonet <michel.promonet.1@gmail.com>
@@ -764,6 +779,7 @@
Momoko Hattori <momohatt10@gmail.com>
Mostafa Sedaghat joo <mostafa.sedaghat@gmail.com>
Mrunal Kapade <mrunal.kapade@intel.com>
+Munira Tursunova <moonira@google.com>
Myeongjin Cho <myeongjin.cho@navercorp.com>
Myles C. Maxfield <mymax@amazon.com>
Myung-jong Kim <mjkim610@gmail.com>
@@ -889,6 +905,7 @@
Reda Tawfik <redatawfik@noogler.google.com>
Réda Housni Alaoui <alaoui.rda@gmail.com>
Refael Ackermann <refack@gmail.com>
+Rémi Arnaud <jsremi@gmail.com>
Renata Hodovan <rhodovan.u-szeged@partner.samsung.com>
Rene Bolldorf <rb@radix.io>
Rene Ladan <r.c.ladan@gmail.com>
@@ -922,6 +939,7 @@
Rulong Chen <rulong.crl@alibaba-inc.com>
Russell Davis <russell.davis@gmail.com>
Ryan Ackley <ryanackley@gmail.com>
+Ryan Gonzalez <rymg19@gmail.com>
Ryan Norton <rnorton10@gmail.com>
Ryan Sleevi <ryan-chromium-dev@sleevi.com>
Ryan Yoakum <ryoakum@skobalt.com>
@@ -1038,6 +1056,7 @@
Sumaid Syed <sumaidsyed@gmail.com>
Sunchang Li <johnstonli@tencent.com>
Sundoo Kim <nerdooit@gmail.com>
+Sundoo Kim <0xd00d00b@gmail.com>
Suneel Kota <suneel.kota@samsung.com>
Sungguk Lim <limasdf@gmail.com>
Sungmann Cho <sungmann.cho@gmail.com>
@@ -1086,6 +1105,8 @@
Timo Reimann <ttr314@googlemail.com>
Timo Witte <timo.witte@gmail.com>
Ting Shao <ting.shao@intel.com>
+Tobias Soppa <tobias@soppa.me>
+Tobias Soppa <tobias.soppa@code.berlin>
Tom Callaway <tcallawa@redhat.com>
Tom Harwood <tfh@skip.org>
Tomas Popela <tomas.popela@gmail.com>
@@ -1114,6 +1135,7 @@
Vernon Tang <vt@foilhead.net>
Viatcheslav Ostapenko <sl.ostapenko@samsung.com>
Victor Costan <costan@gmail.com>
+Victor Solonsky <victor.solonsky@gmail.com>
Viet-Trung Luu <viettrungluu@gmail.com>
Vinay Anantharaman <vinaya@adobe.com>
Vinoth Chandar <vinoth@uber.com>
@@ -1141,6 +1163,7 @@
Xiang Long <xiang.long@intel.com>
XiangYang <yangxiang12@huawei.com>
Xiangze Zhang <xiangze.zhang@intel.com>
+Xiaobing Yang <yangxiaobing@qianxin.com>
Xiaofeng Zhang <xiaofeng.zhang@intel.com>
Xiaolei Yu <dreifachstein@gmail.com>
Xiaoshu Zhang <xiaoshu@amazon.com>
@@ -1222,6 +1245,7 @@
# END individuals section.
# BEGIN organizations section.
+Accenture <*@accenture.com>
ACCESS CO., LTD. <*@access-company.com>
Akamai Inc. <*@akamai.com>
ARM Holdings <*@arm.com>
diff --git a/base/BUILD b/base/BUILD
index c933421..e2da292 100644
--- a/base/BUILD
+++ b/base/BUILD
@@ -14,6 +14,7 @@
"containers/span.h",
"containers/util.h",
"cxx17_backports.h",
+ "cxx20_to_address.h",
"debug/leak_annotations.h",
"functional/identity.h",
"functional/invoke.h",
diff --git a/base/compiler_specific.h b/base/compiler_specific.h
index 3faca72..6651220 100644
--- a/base/compiler_specific.h
+++ b/base/compiler_specific.h
@@ -308,6 +308,31 @@
#define STACK_UNINITIALIZED
#endif
+// Attribute "no_stack_protector" disables -fstack-protector for the specified
+// function.
+//
+// "stack_protector" is enabled on most POSIX builds. The flag adds a canary
+// to each stack frame, which on function return is checked against a reference
+// canary. If the canaries do not match, it's likely that a stack buffer
+// overflow has occurred, so immediately crashing will prevent exploitation in
+// many cases.
+//
+// In some cases it's desirable to remove this, e.g. on hot functions, or if
+// we have purposely changed the reference canary.
+#if defined(COMPILER_GCC) || defined(__clang__)
+#if defined(__has_attribute)
+#if __has_attribute(__no_stack_protector__)
+#define NO_STACK_PROTECTOR __attribute__((__no_stack_protector__))
+#else // __has_attribute(__no_stack_protector__)
+#define NO_STACK_PROTECTOR __attribute__((__optimize__("-fno-stack-protector")))
+#endif
+#else // defined(__has_attribute)
+#define NO_STACK_PROTECTOR __attribute__((__optimize__("-fno-stack-protector")))
+#endif
+#else
+#define NO_STACK_PROTECTOR
+#endif
+
// The ANALYZER_ASSUME_TRUE(bool arg) macro adds compiler-specific hints
// to Clang which control what code paths are statically analyzed,
// and is meant to be used in conjunction with assert & assert-like functions.
diff --git a/base/containers/span.h b/base/containers/span.h
index 1a54de1..d43814e 100644
--- a/base/containers/span.h
+++ b/base/containers/span.h
@@ -17,8 +17,9 @@
#include "polyfills/base/check_op.h"
#include "base/containers/checked_iterators.h"
#include "base/containers/contiguous_iterator.h"
+#include "base/cxx17_backports.h"
+#include "base/cxx20_to_address.h"
#include "base/macros.h"
-#include "base/stl_util.h"
#include "base/template_util.h"
namespace gurl_base {
diff --git a/base/cxx17_backports.h b/base/cxx17_backports.h
index 6378a78..77d689a 100644
--- a/base/cxx17_backports.h
+++ b/base/cxx17_backports.h
@@ -6,10 +6,13 @@
#define BASE_CXX17_BACKPORTS_H_
#include <array>
+#include <functional>
#include <initializer_list>
#include <memory>
#include <string>
+#include "polyfills/base/check.h"
+
namespace gurl_base {
// C++14 implementation of C++17's std::size():
@@ -89,6 +92,22 @@
return !array.empty() ? &array[0] : nullptr;
}
+// C++14 implementation of C++17's std::clamp():
+// https://en.cppreference.com/w/cpp/algorithm/clamp
+// Please note that the C++ spec makes it undefined behavior to call std::clamp
+// with a value of `lo` that compares greater than the value of `hi`. This
+// implementation uses a GURL_CHECK to enforce this as a hard restriction.
+template <typename T, typename Compare>
+constexpr const T& clamp(const T& v, const T& lo, const T& hi, Compare comp) {
+ GURL_CHECK(!comp(hi, lo));
+ return comp(v, lo) ? lo : comp(hi, v) ? hi : v;
+}
+
+template <typename T>
+constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
+ return clamp(v, lo, hi, std::less<T>{});
+}
+
} // namespace base
#endif // BASE_CXX17_BACKPORTS_H_
diff --git a/base/no_destructor.h b/base/no_destructor.h
index 3d7a85c..2f3c549 100644
--- a/base/no_destructor.h
+++ b/base/no_destructor.h
@@ -6,9 +6,15 @@
#define BASE_NO_DESTRUCTOR_H_
#include <new>
+#include <type_traits>
#include <utility>
namespace gurl_base {
+// A tag type used for NoDestructor to allow it to be created for a type that
+// has a trivial destructor. Use for cases where the same class might have
+// different implementations that vary on destructor triviality or when the
+// LSan hiding properties of NoDestructor are needed.
+struct AllowForTriviallyDestructibleType;
// A wrapper that makes it easy to create an object of type T with static
// storage duration that:
@@ -44,9 +50,20 @@
// Note that since the destructor is never run, this *will* leak memory if used
// as a stack or member variable. Furthermore, a NoDestructor<T> should never
// have global scope as that may require a static initializer.
-template <typename T>
+template <typename T, typename O = std::nullptr_t>
class NoDestructor {
public:
+ static_assert(
+ !std::is_trivially_destructible<T>::value ||
+ std::is_same<O, AllowForTriviallyDestructibleType>::value,
+ "gurl_base::NoDestructor is not needed because the templated class has a "
+ "trivial destructor");
+
+ static_assert(std::is_same<O, AllowForTriviallyDestructibleType>::value ||
+ std::is_same<O, std::nullptr_t>::value,
+ "AllowForTriviallyDestructibleType is the only valid option "
+ "for the second template parameter of NoDestructor");
+
// Not constexpr; just write static constexpr T x = ...; if the value should
// be a constexpr.
template <typename... Args>
diff --git a/base/stl_util.h b/base/stl_util.h
index 609b71a..46e91b9 100644
--- a/base/stl_util.h
+++ b/base/stl_util.h
@@ -8,22 +8,13 @@
#define BASE_STL_UTIL_H_
#include <algorithm>
-#include <deque>
#include <forward_list>
#include <iterator>
-#include <list>
-#include <map>
-#include <set>
-#include <string>
#include <tuple>
#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
#include <utility>
-#include <vector>
#include "polyfills/base/check.h"
-#include "base/cxx17_backports.h"
#include "base/ranges/algorithm.h"
#include "absl/types/optional.h"
@@ -31,20 +22,6 @@
namespace internal {
-// Calls erase on iterators of matching elements and returns the number of
-// removed elements.
-template <typename Container, typename Predicate>
-size_t IterateAndEraseIf(Container& container, Predicate pred) {
- size_t old_size = container.size();
- for (auto it = container.begin(), last = container.end(); it != last;) {
- if (pred(*it))
- it = container.erase(it);
- else
- ++it;
- }
- return old_size - container.size();
-}
-
template <typename Iter>
constexpr bool IsRandomAccessIter =
std::is_same<typename std::iterator_traits<Iter>::iterator_category,
@@ -52,23 +29,6 @@
} // namespace internal
-// Simplified C++14 implementation of C++20's std::to_address.
-// Note: This does not consider specializations of pointer_traits<>::to_address,
-// since that member function may only be present in C++20 and later.
-//
-// Reference: https://wg21.link/pointer.conversion#lib:to_address
-template <typename T>
-constexpr T* to_address(T* p) noexcept {
- static_assert(!std::is_function<T>::value,
- "Error: T must not be a function type.");
- return p;
-}
-
-template <typename Ptr>
-constexpr auto to_address(const Ptr& p) noexcept {
- return to_address(p.operator->());
-}
-
// Implementation of C++23's std::to_underlying.
//
// Note: This has an additional `std::is_enum<EnumT>` requirement to be SFINAE
@@ -348,167 +308,6 @@
return result;
}
-// Erase/EraseIf are based on C++20's uniform container erasure API:
-// - https://eel.is/c++draft/libraryindex#:erase
-// - https://eel.is/c++draft/libraryindex#:erase_if
-// They provide a generic way to erase elements from a container.
-// The functions here implement these for the standard containers until those
-// functions are available in the C++ standard.
-// For Chromium containers overloads should be defined in their own headers
-// (like standard containers).
-// Note: there is no std::erase for standard associative containers so we don't
-// have it either.
-
-template <typename CharT, typename Traits, typename Allocator, typename Value>
-size_t Erase(std::basic_string<CharT, Traits, Allocator>& container,
- const Value& value) {
- auto it = std::remove(container.begin(), container.end(), value);
- size_t removed = std::distance(it, container.end());
- container.erase(it, container.end());
- return removed;
-}
-
-template <typename CharT, typename Traits, typename Allocator, class Predicate>
-size_t EraseIf(std::basic_string<CharT, Traits, Allocator>& container,
- Predicate pred) {
- auto it = std::remove_if(container.begin(), container.end(), pred);
- size_t removed = std::distance(it, container.end());
- container.erase(it, container.end());
- return removed;
-}
-
-template <class T, class Allocator, class Value>
-size_t Erase(std::deque<T, Allocator>& container, const Value& value) {
- auto it = std::remove(container.begin(), container.end(), value);
- size_t removed = std::distance(it, container.end());
- container.erase(it, container.end());
- return removed;
-}
-
-template <class T, class Allocator, class Predicate>
-size_t EraseIf(std::deque<T, Allocator>& container, Predicate pred) {
- auto it = std::remove_if(container.begin(), container.end(), pred);
- size_t removed = std::distance(it, container.end());
- container.erase(it, container.end());
- return removed;
-}
-
-template <class T, class Allocator, class Value>
-size_t Erase(std::vector<T, Allocator>& container, const Value& value) {
- auto it = std::remove(container.begin(), container.end(), value);
- size_t removed = std::distance(it, container.end());
- container.erase(it, container.end());
- return removed;
-}
-
-template <class T, class Allocator, class Predicate>
-size_t EraseIf(std::vector<T, Allocator>& container, Predicate pred) {
- auto it = std::remove_if(container.begin(), container.end(), pred);
- size_t removed = std::distance(it, container.end());
- container.erase(it, container.end());
- return removed;
-}
-
-template <class T, class Allocator, class Predicate>
-size_t EraseIf(std::forward_list<T, Allocator>& container, Predicate pred) {
- // Note: std::forward_list does not have a size() API, thus we need to use the
- // O(n) std::distance work-around. However, given that EraseIf is O(n)
- // already, this should not make a big difference.
- size_t old_size = std::distance(container.begin(), container.end());
- container.remove_if(pred);
- return old_size - std::distance(container.begin(), container.end());
-}
-
-template <class T, class Allocator, class Predicate>
-size_t EraseIf(std::list<T, Allocator>& container, Predicate pred) {
- size_t old_size = container.size();
- container.remove_if(pred);
- return old_size - container.size();
-}
-
-template <class Key, class T, class Compare, class Allocator, class Predicate>
-size_t EraseIf(std::map<Key, T, Compare, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key, class T, class Compare, class Allocator, class Predicate>
-size_t EraseIf(std::multimap<Key, T, Compare, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key, class Compare, class Allocator, class Predicate>
-size_t EraseIf(std::set<Key, Compare, Allocator>& container, Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key, class Compare, class Allocator, class Predicate>
-size_t EraseIf(std::multiset<Key, Compare, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key,
- class T,
- class Hash,
- class KeyEqual,
- class Allocator,
- class Predicate>
-size_t EraseIf(std::unordered_map<Key, T, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key,
- class T,
- class Hash,
- class KeyEqual,
- class Allocator,
- class Predicate>
-size_t EraseIf(
- std::unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key,
- class Hash,
- class KeyEqual,
- class Allocator,
- class Predicate>
-size_t EraseIf(std::unordered_set<Key, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class Key,
- class Hash,
- class KeyEqual,
- class Allocator,
- class Predicate>
-size_t EraseIf(
- std::unordered_multiset<Key, Hash, KeyEqual, Allocator>& container,
- Predicate pred) {
- return internal::IterateAndEraseIf(container, pred);
-}
-
-template <class T, class Allocator, class Value>
-size_t Erase(std::forward_list<T, Allocator>& container, const Value& value) {
- // Unlike std::forward_list::remove, this function template accepts
- // heterogeneous types and does not force a conversion to the container's
- // value type before invoking the == operator.
- return EraseIf(container, [&](const T& cur) { return cur == value; });
-}
-
-template <class T, class Allocator, class Value>
-size_t Erase(std::list<T, Allocator>& container, const Value& value) {
- // Unlike std::list::remove, this function template accepts heterogeneous
- // types and does not force a conversion to the container's value type before
- // invoking the == operator.
- return EraseIf(container, [&](const T& cur) { return cur == value; });
-}
-
// A helper class to be used as the predicate with |EraseIf| to implement
// in-place set intersection. Helps implement the algorithm of going through
// each container an element at a time, erasing elements from the first
diff --git a/base/strings/escape_unittest.cc b/base/strings/escape_unittest.cc
index b8a5fd6..923eb5a 100644
--- a/base/strings/escape_unittest.cc
+++ b/base/strings/escape_unittest.cc
@@ -421,8 +421,10 @@
EXPECT_TRUE(ContainsEncodedBytes("abc%2fdef", {'/', '\\'}));
// Should be looking for byte values, not UTF-8 character values.
- EXPECT_TRUE(ContainsEncodedBytes("caf%C3%A9", {'\xc3'}));
- EXPECT_FALSE(ContainsEncodedBytes("caf%C3%A9", {'\xe9'}));
+ EXPECT_TRUE(
+ ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xc3')}));
+ EXPECT_FALSE(
+ ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xe9')}));
}
} // namespace base
diff --git a/base/strings/safe_sprintf.cc b/base/strings/safe_sprintf.cc
index e8bb070..0569da1 100644
--- a/base/strings/safe_sprintf.cc
+++ b/base/strings/safe_sprintf.cc
@@ -490,7 +490,6 @@
goto format_character_found;
}
}
- break;
case 'c': { // Output an ASCII character.
// Check that there are arguments left to be inserted.
if (cur_arg >= max_args) {
diff --git a/base/strings/safe_sprintf.h b/base/strings/safe_sprintf.h
index 92f8c59..40cddc5 100644
--- a/base/strings/safe_sprintf.h
+++ b/base/strings/safe_sprintf.h
@@ -5,12 +5,12 @@
#ifndef BASE_STRINGS_SAFE_SPRINTF_H_
#define BASE_STRINGS_SAFE_SPRINTF_H_
-#include "build/build_config.h"
-
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include "build/build_config.h"
+
#if defined(OS_POSIX) || defined(OS_FUCHSIA)
// For ssize_t
#include <unistd.h>
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h
index 8223b59..8c45d1b 100644
--- a/base/strings/string_number_conversions_internal.h
+++ b/base/strings/string_number_conversions_internal.h
@@ -14,7 +14,6 @@
#include "polyfills/base/check_op.h"
#include "polyfills/base/logging.h"
-#include "base/no_destructor.h"
#include "base/numerics/safe_math.h"
#include "base/strings/string_util.h"
#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
@@ -229,10 +228,10 @@
static const double_conversion::DoubleToStringConverter*
GetDoubleToStringConverter() {
- static NoDestructor<double_conversion::DoubleToStringConverter> converter(
+ static double_conversion::DoubleToStringConverter converter(
double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
nullptr, nullptr, 'e', -6, 12, 0, 0);
- return converter.get();
+ return &converter;
}
// Converts a given (data, size) pair to a desired string type. For
@@ -258,14 +257,14 @@
template <typename STRING, typename CHAR>
bool StringToDoubleImpl(STRING input, const CHAR* data, double& output) {
- static NoDestructor<double_conversion::StringToDoubleConverter> converter(
+ static double_conversion::StringToDoubleConverter converter(
double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
0.0, 0, nullptr, nullptr);
int processed_characters_count;
- output = converter->StringToDouble(data, input.size(),
- &processed_characters_count);
+ output =
+ converter.StringToDouble(data, input.size(), &processed_characters_count);
// Cases to return false:
// - If the input string is empty, there was nothing to parse.
diff --git a/base/strings/string_number_conversions_unittest.cc b/base/strings/string_number_conversions_unittest.cc
index f836316..b5a23a1 100644
--- a/base/strings/string_number_conversions_unittest.cc
+++ b/base/strings/string_number_conversions_unittest.cc
@@ -14,8 +14,8 @@
#include <limits>
#include "base/bit_cast.h"
+#include "base/cxx17_backports.h"
#include "base/format_macros.h"
-#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index 03f5103..8a22e8d 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h
@@ -25,7 +25,6 @@
#include <iosfwd>
#include <limits>
-#include <ostream>
#include <string>
#include <type_traits>
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index f76d2f7..2adfa94 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc
@@ -22,8 +22,8 @@
#include <vector>
#include "polyfills/base/check_op.h"
+#include "base/cxx17_backports.h"
#include "base/no_destructor.h"
-#include "base/stl_util.h"
#include "base/strings/string_util_internal.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/strings/utf_string_conversions.h"
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index ccbf745..5995c2d 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -20,6 +20,7 @@
#include "polyfills/base/base_export.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
+#include "base/cxx20_to_address.h"
#include "base/strings/string_piece.h" // For implicit conversions.
#include "build/build_config.h"
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index f8326cc..231c3c8 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc
@@ -14,7 +14,7 @@
#include <type_traits>
#include "base/bits.h"
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc
index e1a18c9..3f74e07 100644
--- a/base/strings/stringprintf.cc
+++ b/base/strings/stringprintf.cc
@@ -9,9 +9,9 @@
#include <vector>
+#include "base/cxx17_backports.h"
#include "polyfills/base/logging.h"
#include "base/scoped_clear_last_error.h"
-#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc
index 0775dc4..f50fa3f 100644
--- a/base/strings/utf_offset_string_conversions_unittest.cc
+++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -6,7 +6,7 @@
#include <algorithm>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_offset_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
diff --git a/base/strings/utf_string_conversions.cc b/base/strings/utf_string_conversions.cc
index 9595e7b..8cf90f1 100644
--- a/base/strings/utf_string_conversions.cc
+++ b/base/strings/utf_string_conversions.cc
@@ -7,6 +7,7 @@
#include <limits.h>
#include <stdint.h>
+#include <ostream>
#include <type_traits>
#include "base/strings/string_piece.h"
diff --git a/base/strings/utf_string_conversions_unittest.cc b/base/strings/utf_string_conversions_unittest.cc
index 752bf95..3b26fa8 100644
--- a/base/strings/utf_string_conversions_unittest.cc
+++ b/base/strings/utf_string_conversions_unittest.cc
@@ -4,7 +4,7 @@
#include <stddef.h>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
diff --git a/base/template_util.h b/base/template_util.h
index 78b52ee..d0803f8 100644
--- a/base/template_util.h
+++ b/base/template_util.h
@@ -10,11 +10,14 @@
#include <iterator>
#include <type_traits>
#include <utility>
-#include <vector>
#include "base/compiler_specific.h"
#include "build/build_config.h"
+#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 7
+#include <vector>
+#endif
+
// Some versions of libstdc++ have partial support for type_traits, but misses
// a smaller subset while removing some of the older non-standard stuff. Assume
// that all versions below 5.0 fall in this category, along with one 5.0
diff --git a/copy.bara.sky b/copy.bara.sky
index 1384bee..33c0f00 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -19,6 +19,7 @@
"base/containers/span.h",
"base/containers/util.h",
"base/cxx17_backports.h",
+ "base/cxx20_to_address.h",
"base/debug/leak_annotations.h",
"base/functional/*.h",
"base/i18n/uchar.h",
@@ -63,7 +64,6 @@
"base/debug/alias.h",
"base/export_template.h",
"base/logging.h",
- "base/metrics/histogram_macros.h",
"base/notreached.h",
"base/trace_event/memory_usage_estimator.h",
"third_party/perfetto/include/perfetto/tracing/traced_value.h",
diff --git a/url/BUILD b/url/BUILD
index 6ed3fc5..f2ec8da 100644
--- a/url/BUILD
+++ b/url/BUILD
@@ -43,9 +43,7 @@
"url_file.h",
"url_util.h",
],
- copts = build_config.default_copts + [
- "-Wno-c++11-narrowing",
- ],
+ copts = build_config.default_copts,
linkopts = build_config.url_linkopts,
visibility = ["//visibility:public"],
deps = [
diff --git a/url/gurl.cc b/url/gurl.cc
index 2d68889..18a46f1 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -237,11 +237,8 @@
NULL, &output, &result.parsed_);
output.Complete();
- if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_ =
- std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true);
- }
+
+ ProcessFileOrFileSystemURLAfterReplaceComponents(result);
return result;
}
@@ -260,14 +257,34 @@
NULL, &output, &result.parsed_);
output.Complete();
- if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_ =
- std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true);
- }
+
+ ProcessFileOrFileSystemURLAfterReplaceComponents(result);
+
return result;
}
+void GURL::ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const {
+ if (!url.is_valid_)
+ return;
+ if (url.SchemeIsFileSystem()) {
+ url.inner_url_ =
+ std::make_unique<GURL>(url.spec_.data(), url.parsed_.Length(),
+ *url.parsed_.inner_parsed(), true);
+ }
+#ifdef WIN32
+ if (url.SchemeIsFile()) {
+ // On Win32, some file URLs created through ReplaceComponents used to lose
+ // its hostname after getting reparsed (e.g. when it's sent through IPC) due
+ // to special handling of file URLs with Windows-drive paths in the URL
+ // parser. To make the behavior for URLs modified through ReplaceComponents
+ // (instead of getting fully reparsed) the same, immediately reparse the
+ // URL here to trigger the special handling.
+ // See https://crbug.com/1214098.
+ url = GURL(url.spec());
+ }
+#endif
+}
+
GURL GURL::GetOrigin() const {
// This doesn't make sense for invalid or nonstandard URLs, so return
// the empty URL.
diff --git a/url/gurl.h b/url/gurl.h
index 21e6611..c70c5a4 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -468,6 +468,8 @@
return gurl_base::StringPiece(&spec_[comp.begin], comp.len);
}
+ void ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const;
+
// The actual text of the URL, in canonical ASCII form.
std::string spec_;
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index 0b81da4..f3b9f3c 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -4,7 +4,7 @@
#include <stddef.h>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -561,12 +561,12 @@
GURL url(" data: one ? two # three ");
// By default the trailing whitespace will have been stripped.
- EXPECT_EQ("data: one ? two #%20three", url.spec());
+ EXPECT_EQ("data: one ?%20two%20#%20three", url.spec());
GURL::Replacements repl;
repl.ClearRef();
GURL url_no_ref = url.ReplaceComponents(repl);
- EXPECT_EQ("data: one ? two ", url_no_ref.spec());
+ EXPECT_EQ("data: one ?%20two%20", url_no_ref.spec());
// Importing a parsed URL via this constructor overload will retain trailing
// whitespace.
@@ -574,7 +574,7 @@
url_no_ref.parsed_for_possibly_invalid_spec(),
url_no_ref.is_valid());
EXPECT_EQ(url_no_ref, import_url);
- EXPECT_EQ(import_url.query(), " two ");
+ EXPECT_EQ(import_url.query(), "%20two%20");
}
TEST(GURLTest, PathForRequest) {
@@ -862,7 +862,7 @@
{"http://www.example.com/GUID#ref", "www.example.com/GUID"},
{"http://me:secret@example.com/GUID/#ref", "me:secret@example.com/GUID/"},
{"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
- "text/html,Question?<div style=\"color: "},
+ "text/html,Question?%3Cdiv%20style=%22color:%20"},
// TODO(mkwst): This seems like a bug. https://crbug.com/513600
{"filesystem:http://example.com/path", "/"},
diff --git a/url/origin.cc b/url/origin.cc
index 33e26f9..6c7915f 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -7,6 +7,7 @@
#include <stdint.h>
#include <algorithm>
+#include <ostream>
#include <vector>
#include "base/base64.h"
@@ -423,11 +424,11 @@
}
// Moving a nonce does NOT trigger lazy-generation of the token.
-Origin::Nonce::Nonce(Origin::Nonce&& other) : token_(other.token_) {
+Origin::Nonce::Nonce(Origin::Nonce&& other) noexcept : token_(other.token_) {
other.token_ = gurl_base::UnguessableToken(); // Reset |other|.
}
-Origin::Nonce& Origin::Nonce::operator=(Origin::Nonce&& other) {
+Origin::Nonce& Origin::Nonce::operator=(Origin::Nonce&& other) noexcept {
token_ = other.token_;
other.token_ = gurl_base::UnguessableToken(); // Reset |other|.
return *this;
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h
index 0c53f82..82d1f55 100644
--- a/url/origin_abstract_tests.h
+++ b/url/origin_abstract_tests.h
@@ -11,7 +11,6 @@
#include "base/containers/contains.h"
#include "base/strings/string_piece.h"
#include "testing/gtest/include/gtest/gtest.h"
-#include "absl/types/optional.h"
#include "url/gurl.h"
#include "url/origin.h"
#include "url/scheme_host_port.h"
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index a9d3a4f..cb78bb6 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -704,17 +704,19 @@
EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
}
- // Same basic test as above, but without a GURL to create tuple_.
- Origin opaque;
- absl::optional<std::string> serialized = SerializeWithNonce(opaque);
- ASSERT_TRUE(serialized);
+ {
+ // Same basic test as above, but without a GURL to create tuple_.
+ Origin opaque;
+ absl::optional<std::string> serialized = SerializeWithNonce(opaque);
+ ASSERT_TRUE(serialized);
- absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
- ASSERT_TRUE(deserialized.has_value());
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
- // Can't use DoEqualityComparisons here since empty nonces are never == unless
- // they are the same object.
- EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+ // Can't use DoEqualityComparisons here since empty nonces are never ==
+ // unless they are the same object.
+ EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+ }
// Now force initialization of the nonce prior to serialization.
for (const GURL& url : invalid_urls) {
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc
index a1f415d..a8bde47 100644
--- a/url/scheme_host_port_unittest.cc
+++ b/url/scheme_host_port_unittest.cc
@@ -5,7 +5,7 @@
#include <stddef.h>
#include <stdint.h>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/scheme_host_port.h"
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc
index e8a1edb..8edb7f3 100644
--- a/url/third_party/mozilla/url_parse.cc
+++ b/url/third_party/mozilla/url_parse.cc
@@ -38,6 +38,8 @@
#include <stdlib.h>
+#include <ostream>
+
#include "polyfills/base/check_op.h"
#include "url/url_parse_internal.h"
#include "url/url_util.h"
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index b278b15..abcf615 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -3,10 +3,8 @@
// found in the LICENSE file.
#include "polyfills/base/check.h"
-#include "polyfills/base/metrics/histogram_macros.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
-#include "url/url_canon_ip.h"
namespace url {
@@ -379,16 +377,6 @@
if (host_info->IsIPAddress()) {
output->set_length(output_begin);
output->Append(canon_ip.data(), canon_ip.length());
- } else if (host_info->family == CanonHostInfo::NEUTRAL) {
- // Only need to call CheckHostnameSafety() for valid hosts that aren't IP
- // addresses and aren't broken.
- HostSafetyStatus host_safety_status = CheckHostnameSafety(spec, host);
- // Don't record kOK. Ratio of OK to not-OK statuses is not meaningful at
- // this layer, and hostnames are canonicalized a lot.
- if (host_safety_status != HostSafetyStatus::kOk) {
- UMA_HISTOGRAM_ENUMERATION("Net.Url.HostSafetyStatus",
- host_safety_status);
- }
}
} else {
// Canonicalization failed. Set BROKEN to notify the caller.
diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc
index 7cd5cae..ca13427 100644
--- a/url/url_canon_icu_unittest.cc
+++ b/url/url_canon_icu_unittest.cc
@@ -4,8 +4,8 @@
#include <stddef.h>
+#include "base/cxx17_backports.h"
#include "polyfills/base/logging.h"
-#include "base/stl_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include <unicode/ucnv.h>
#include "url/url_canon.h"
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index ab56e7b..99541bd 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -427,7 +427,7 @@
}
for (int i = 0; i < written; ++i) {
- buffer[i] = char16_t{temp[i]};
+ buffer[i] = static_cast<char16_t>(temp[i]);
}
buffer[written] = '\0';
return 0;
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc
index 8234b4e..f0552b5 100644
--- a/url/url_canon_ip.cc
+++ b/url/url_canon_ip.cc
@@ -6,11 +6,10 @@
#include <stdint.h>
#include <stdlib.h>
+
#include <limits>
#include "polyfills/base/check.h"
-#include "base/strings/string_piece.h"
-#include "base/strings/string_util.h"
#include "url/url_canon_internal.h"
namespace url {
@@ -32,56 +31,6 @@
}
}
-template<typename CHAR, typename UCHAR>
-bool DoFindIPv4Components(const CHAR* spec,
- const Component& host,
- Component components[4]) {
- if (!host.is_nonempty())
- return false;
-
- int cur_component = 0; // Index of the component we're working on.
- int cur_component_begin = host.begin; // Start of the current component.
- int end = host.end();
- for (int i = host.begin; /* nothing */; i++) {
- if (i >= end || spec[i] == '.') {
- // Found the end of the current component.
- int component_len = i - cur_component_begin;
- components[cur_component] = Component(cur_component_begin, component_len);
-
- // The next component starts after the dot.
- cur_component_begin = i + 1;
- cur_component++;
-
- // Don't allow empty components (two dots in a row), except we may
- // allow an empty component at the end (this would indicate that the
- // input ends in a dot). We also want to error if the component is
- // empty and it's the only component (cur_component == 1).
- if (component_len == 0 && (i < end || cur_component == 1))
- return false;
-
- if (i >= end)
- break; // End of the input.
-
- if (cur_component == 4) {
- // Anything else after the 4th component is an error unless it is a
- // dot that would otherwise be treated as the end of input.
- if (spec[i] == '.' && i + 1 == end)
- break;
- return false;
- }
- } else if (static_cast<UCHAR>(spec[i]) >= 0x80 ||
- !IsIPv4Char(static_cast<unsigned char>(spec[i]))) {
- // Invalid character for an IPv4 address.
- return false;
- }
- }
-
- // Fill in any unused components.
- while (cur_component < 4)
- components[cur_component++] = Component();
- return true;
-}
-
// Converts an IPv4 component to a 32-bit number, while checking for overflow.
//
// Possible return values:
@@ -89,13 +38,15 @@
// - BROKEN - The input was numeric, but too large for a 32-bit field.
// - NEUTRAL - Input was not numeric.
//
-// The input is assumed to be ASCII. FindIPv4Components should have stripped
-// out any input that is greater than 7 bits. The components are assumed
-// to be non-empty.
+// The input is assumed to be ASCII. The components are assumed to be non-empty.
template<typename CHAR>
CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
const Component& component,
uint32_t* number) {
+ // Empty components are considered non-numeric.
+ if (!component.is_nonempty())
+ return CanonHostInfo::NEUTRAL;
+
// Figure out the base
SharedCharTypes base;
int base_prefix_len = 0; // Size of the prefix for this base.
@@ -127,14 +78,25 @@
const int kMaxComponentLen = 16;
char buf[kMaxComponentLen + 1]; // digits + '\0'
int dest_i = 0;
+ bool may_be_broken_octal = false;
for (int i = component.begin + base_prefix_len; i < component.end(); i++) {
+ if (spec[i] >= 0x80)
+ return CanonHostInfo::NEUTRAL;
+
// We know the input is 7-bit, so convert to narrow (if this is the wide
// version of the template) by casting.
char input = static_cast<char>(spec[i]);
// Validate that this character is OK for the given base.
- if (!IsCharOfType(input, base))
- return CanonHostInfo::NEUTRAL;
+ if (!IsCharOfType(input, base)) {
+ if (IsCharOfType(input, CHAR_DEC)) {
+ // Entirely numeric components with leading 0s that aren't octal are
+ // considered broken.
+ may_be_broken_octal = true;
+ } else {
+ return CanonHostInfo::NEUTRAL;
+ }
+ }
// Fill the buffer, if there's space remaining. This check allows us to
// verify that all characters are numeric, even those that don't fit.
@@ -142,6 +104,9 @@
buf[dest_i++] = input;
}
+ if (may_be_broken_octal)
+ return CanonHostInfo::BROKEN;
+
buf[dest_i] = '\0';
// Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
@@ -158,64 +123,76 @@
}
// See declaration of IPv4AddressToNumber for documentation.
-template<typename CHAR>
+template <typename CHAR, typename UCHAR>
CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,
- const Component& host,
+ Component host,
unsigned char address[4],
int* num_ipv4_components) {
- // The identified components. Not all may exist.
- Component components[4];
- if (!FindIPv4Components(spec, host, components))
+ // Ignore terminal dot, if present.
+ if (host.is_nonempty() && spec[host.end() - 1] == '.')
+ --host.len;
+
+ // Do nothing if empty.
+ if (!host.is_nonempty())
return CanonHostInfo::NEUTRAL;
- // Convert existing components to digits. Values up to
- // |existing_components| will be valid.
+ // Read component values. The first `existing_components` of them are
+ // populated front to back, with the first one corresponding to the last
+ // component, which allows for early exit if the last component isn't a
+ // number.
uint32_t component_values[4];
int existing_components = 0;
- // Set to true if one or more components are BROKEN. BROKEN is only
- // returned if all components are IPV4 or BROKEN, so, for example,
- // 12345678912345.de returns NEUTRAL rather than broken.
- bool broken = false;
- for (int i = 0; i < 4; i++) {
- if (components[i].len <= 0)
+ int current_component_end = host.end();
+ int current_position = current_component_end;
+ while (true) {
+ // If this is not the first character of a component, go to the next
+ // component.
+ if (current_position != host.begin && spec[current_position - 1] != '.') {
+ --current_position;
continue;
- CanonHostInfo::Family family = IPv4ComponentToNumber(
- spec, components[i], &component_values[existing_components]);
-
- if (family == CanonHostInfo::BROKEN) {
- broken = true;
- } else if (family != CanonHostInfo::IPV4) {
- // Stop if we hit a non-BROKEN invalid non-empty component.
- return family;
}
- existing_components++;
+ CanonHostInfo::Family family = IPv4ComponentToNumber(
+ spec,
+ Component(current_position, current_component_end - current_position),
+ &component_values[existing_components]);
+
+ // If `family` is NEUTRAL and this is the last component, return NEUTRAL. If
+ // `family` is NEUTRAL but not the last component, this is considered a
+ // BROKEN IPv4 address, as opposed to a non-IPv4 hostname.
+ if (family == CanonHostInfo::NEUTRAL && existing_components == 0)
+ return CanonHostInfo::NEUTRAL;
+
+ if (family != CanonHostInfo::IPV4)
+ return CanonHostInfo::BROKEN;
+
+ ++existing_components;
+
+ // If this is the final component, nothing else to do.
+ if (current_position == host.begin)
+ break;
+
+ // If there are more than 4 components, fail.
+ if (existing_components == 4)
+ return CanonHostInfo::BROKEN;
+
+ current_component_end = current_position - 1;
+ --current_position;
}
- if (broken)
- return CanonHostInfo::BROKEN;
-
- // Use that sequence of numbers to fill out the 4-component IP address.
+ // Use `component_values` to fill out the 4-component IP address.
// First, process all components but the last, while making sure each fits
// within an 8-bit field.
- for (int i = 0; i < existing_components - 1; i++) {
+ for (int i = existing_components - 1; i > 0; i--) {
if (component_values[i] > std::numeric_limits<uint8_t>::max())
return CanonHostInfo::BROKEN;
- address[i] = static_cast<unsigned char>(component_values[i]);
+ address[existing_components - i - 1] =
+ static_cast<unsigned char>(component_values[i]);
}
- // Next, consume the last component to fill in the remaining bytes.
- // Work around a gcc 4.9 bug. crbug.com/392872
-#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Warray-bounds"
-#endif
- uint32_t last_value = component_values[existing_components - 1];
-#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
-#pragma GCC diagnostic pop
-#endif
+ uint32_t last_value = component_values[0];
for (int i = 3; i >= existing_components - 1; i--) {
address[i] = static_cast<unsigned char>(last_value);
last_value >>= 8;
@@ -595,105 +572,6 @@
return true;
}
-// Method to check if something looks like a number. Used instead of
-// IPv4ComponentToNumber() so that it counts things that look like bad base-8
-// (e.g. 09).
-//
-// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
-template <typename CHAR>
-bool LooksLikeANumber(const CHAR* spec, const Component& component) {
- // Empty components don't look like numbers.
- if (!component.is_nonempty())
- return false;
-
- SharedCharTypes base = CHAR_DEC;
- size_t start = component.begin;
- if (component.len >= 2 && spec[start] == '0' &&
- (spec[start + 1] == 'x' || spec[start + 1] == 'X')) {
- base = CHAR_HEX;
- start += 2;
- }
- for (int i = start; i < component.end(); i++) {
- if (!IsCharOfType(spec[i], base))
- return false;
- }
- return true;
-}
-
-// Calculates the "HostSafetyStatus" of the provided hostname.
-//
-// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
-template <typename CHAR>
-HostSafetyStatus DoCheckHostnameSafety(const CHAR* spec,
- const Component& host) {
- if (!host.is_nonempty())
- return HostSafetyStatus::kOk;
-
- // Find the last two components.
-
- // Number of identified components. Stops after second component. Does not
- // include the empty terminal component, if the host ends with a dot.
- int existing_components = 0;
- // Parsed component values. Populated last component first.
- Component components[2];
-
- // Index of the character after the end of the current component.
- int cur_component_end = host.end();
-
- // Ignore terminal dot, if there is one.
- if (spec[cur_component_end - 1] == '.') {
- cur_component_end--;
- // Nothing else to do if the host is just a dot.
- if (host.begin == cur_component_end)
- return HostSafetyStatus::kOk;
- }
-
- for (int i = cur_component_end; /* nothing */; i--) {
- GURL_DCHECK_GE(i, host.begin);
-
- // If `i` is not the first character of the component, continue.
- if (i != host.begin && spec[i - 1] != '.')
- continue;
-
- // Otherwise, i is the index of the the start of a component.
- components[existing_components] = Component(i, cur_component_end - i);
- existing_components++;
-
- // Finished parsing last component.
- if (i == host.begin)
- break;
-
- // If there's anything left to parse after the 2th component, nothing more
- // to do.
- if (existing_components == 2)
- break;
-
- // The next component ends before the dot at spec[i]. `i` will be
- // decremented when restarting the loop, so no need to modify it.
- cur_component_end = i - 1;
- }
-
- // If the last value doesn't look like a number, no need to do more work, as
- // IPv6 and hostnames with non-numeric final components are all considered OK.
- if (!LooksLikeANumber(spec, components[0]))
- return HostSafetyStatus::kOk;
-
- url::RawCanonOutputT<char> ignored_output;
- CanonHostInfo host_info;
- CanonicalizeIPAddress(spec, host, &ignored_output, &host_info);
- // Ignore valid IPv4 addresses, and hostnames considered invalid by the IPv4
- // and IPv6 parsers. The IPv6 check doesn't provide a whole lot, but does mean
- // things like "].6" will correctly be considered already invalid, so will
- // return kOk.
- if (host_info.family != CanonHostInfo::NEUTRAL)
- return HostSafetyStatus::kOk;
-
- if (LooksLikeANumber(spec, components[1]))
- return HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric;
-
- return HostSafetyStatus::kTopLevelDomainIsNumeric;
-}
-
} // namespace
void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) {
@@ -745,18 +623,6 @@
}
}
-bool FindIPv4Components(const char* spec,
- const Component& host,
- Component components[4]) {
- return DoFindIPv4Components<char, unsigned char>(spec, host, components);
-}
-
-bool FindIPv4Components(const char16_t* spec,
- const Component& host,
- Component components[4]) {
- return DoFindIPv4Components<char16_t, char16_t>(spec, host, components);
-}
-
void CanonicalizeIPAddress(const char* spec,
const Component& host,
CanonOutput* output,
@@ -785,15 +651,16 @@
const Component& host,
unsigned char address[4],
int* num_ipv4_components) {
- return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components);
+ return DoIPv4AddressToNumber<char, unsigned char>(spec, host, address,
+ num_ipv4_components);
}
CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components) {
- return DoIPv4AddressToNumber<char16_t>(spec, host, address,
- num_ipv4_components);
+ return DoIPv4AddressToNumber<char16_t, char16_t>(spec, host, address,
+ num_ipv4_components);
}
bool IPv6AddressToNumber(const char* spec,
@@ -808,13 +675,4 @@
return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address);
}
-HostSafetyStatus CheckHostnameSafety(const char* spec, const Component& host) {
- return DoCheckHostnameSafety(spec, host);
-}
-
-HostSafetyStatus CheckHostnameSafety(const char16_t* spec,
- const Component& host) {
- return DoCheckHostnameSafety(spec, host);
-}
-
} // namespace url
diff --git a/url/url_canon_ip.h b/url/url_canon_ip.h
index 8980dbb..4e85466 100644
--- a/url/url_canon_ip.h
+++ b/url/url_canon_ip.h
@@ -6,7 +6,6 @@
#define URL_URL_CANON_IP_H_
#include "polyfills/base/component_export.h"
-#include "base/strings/string_piece_forward.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -20,33 +19,6 @@
COMPONENT_EXPORT(URL)
void AppendIPv6Address(const unsigned char address[16], CanonOutput* output);
-// Searches the host name for the portions of the IPv4 address. On success,
-// each component will be placed into |components| and it will return true.
-// It will return false if the host can not be separated as an IPv4 address
-// or if there are any non-7-bit characters or other characters that can not
-// be in an IP address. (This is important so we fail as early as possible for
-// common non-IP hostnames.)
-//
-// Not all components may exist. If there are only 3 components, for example,
-// the last one will have a length of -1 or 0 to indicate it does not exist.
-//
-// Note that many platforms' inet_addr will ignore everything after a space
-// in certain circumstances if the stuff before the space looks like an IP
-// address. IE6 is included in this. We do NOT handle this case. In many cases,
-// the browser's canonicalization will get run before this which converts
-// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
-// so this code path never gets hit. Our host canonicalization will notice
-// these spaces and escape them, which will make IP address finding fail. This
-// seems like better behavior than stripping after a space.
-COMPONENT_EXPORT(URL)
-bool FindIPv4Components(const char* spec,
- const Component& host,
- Component components[4]);
-COMPONENT_EXPORT(URL)
-bool FindIPv4Components(const char16_t* spec,
- const Component& host,
- Component components[4]);
-
// Converts an IPv4 address to a 32-bit number (network byte order).
//
// Possible return values:
@@ -83,48 +55,6 @@
const Component& host,
unsigned char address[16]);
-// Temporary enum for collecting histograms at the DNS and URL level about
-// hostname validity, for potentially updating the URL spec.
-//
-// This is used in histograms, so old values should not be reused, and new
-// values should be added at the bottom.
-//
-// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
-enum class HostSafetyStatus {
- // Any canonical hostname that doesn't fit into any other class. IPv4
- // hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are
- // broken are also considered OK.
- kOk = 0,
-
- // The top level domain looks numeric. This is basically means it either
- // parses as a number per the URL spec, or is entirely numeric ("09" doesn't
- // currently parse as a number, since the leading "0" indicates an octal
- // value).
- kTopLevelDomainIsNumeric = 1,
-
- // Both the top level domain and the next level domain look like a number,
- // using the above definition. This is the case that is actually concerning -
- // for these domains, the eTLD+1 is purely numeric, which means putting it as
- // the hostname of a URL will potentially result in an IPv4 hostname. This is
- // logically a subset of kTopLevelDomainIsNumeric, but when both apply, this
- // label will be returned instead.
- kTwoHighestLevelDomainsAreNumeric = 2,
-
- kMaxValue = kTwoHighestLevelDomainsAreNumeric,
-};
-
-// Calculates the HostSafetyStatus of a hostname. Hostname should have been
-// canonicalized. This function is only intended to be temporary, to inform
-// decisions around tightening up what the URL parser considers valid hostnames.
-//
-// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
-COMPONENT_EXPORT(URL)
-HostSafetyStatus CheckHostnameSafety(const char* hostname,
- const Component& host);
-COMPONENT_EXPORT(URL)
-HostSafetyStatus CheckHostnameSafety(const char16_t* hostname,
- const Component& host);
-
} // namespace url
#endif // URL_URL_CANON_IP_H_
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc
index 134e132..e726cfb 100644
--- a/url/url_canon_pathurl.cc
+++ b/url/url_canon_pathurl.cc
@@ -63,14 +63,17 @@
new_parsed->host.reset();
new_parsed->port.reset();
- // Canonicalize path and query via the weaker path URL rules.
+ // Canonicalize path via the weaker path URL rules.
//
// Note: parsing the path part should never cause a failure, see
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
output, &new_parsed->path);
- DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?',
- output, &new_parsed->query);
+
+ // Similar to mailto:, always use the default UTF-8 charset converter for
+ // query.
+ CanonicalizeQuery(source.query, parsed.query, nullptr, output,
+ &new_parsed->query);
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index a59c745..aa2a8ce 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -5,14 +5,13 @@
#include <errno.h>
#include <stddef.h>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/gtest_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
-#include "url/url_canon_ip.h"
#include "url/url_canon_stdstring.h"
#include "url/url_test_utils.h"
@@ -609,21 +608,36 @@
}
TEST(URLCanonTest, IPv4) {
+ // clang-format off
IPAddressCase cases[] = {
- // Empty is not an IP address.
+ // Empty is not an IP address.
{"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Regular IP addresses in different bases.
+ // Regular IP addresses in different bases.
{"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
- // Non-IP addresses due to invalid characters.
+ // Non-IP addresses due to invalid characters.
{"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Invalid characters for the base should be rejected.
- {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // If there are not enough components, the last one should fill them out.
+ // Hostnames with a numeric final component but other components that don't
+ // parse as numbers should be considered broken.
+ {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"19a.168.0.1.", L"19a.168.0.1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0308.0250.00.01.", L"0308.0250.00.01.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0xCG.0xA8.0x0.0x1.", L"0xCG.0xA8.0x0.0x1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Non-numeric terminal compeonent should be considered not IPv4 hostnames, but valid.
+ {"19.168.0.1a", L"19.168.0.1a", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"0xC.0xA8.0x0.0x1G", L"0xC.0xA8.0x0.0x1G", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ // Hostnames that would be considered broken IPv4 hostnames should be considered valid non-IPv4 hostnames if they end with two dots instead of 0 or 1.
+ {"19a.168.0.1..", L"19a.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"0308.0250.00.01..", L"0308.0250.00.01..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"0xCG.0xA8.0x0.0x1..", L"0xCG.0xA8.0x0.0x1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ // Hosts with components that aren't considered valid IPv4 numbers but are entirely numeric should be considered invalid.
+ {"1.2.3.08", L"1.2.3.08", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.3.08.", L"1.2.3.08.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // If there are not enough components, the last one should fill them out.
{"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"},
{"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
{"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
@@ -632,15 +646,16 @@
{"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
{"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
{"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
- // Too many components means not an IP address.
- {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // We allow a single trailing dot.
+ // Hostnames with too many components, but a numeric final numeric component are invalid.
+ {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // We allow a single trailing dot.
{"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Two dots in a row means not an IP address.
- {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Any numerical overflow should be marked as BROKEN.
+ // Hosts with two dots in a row with a final numeric component are considered invalid.
+ {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"192.168..1.", L"192.168..1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Any numerical overflow should be marked as BROKEN.
{"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -650,7 +665,7 @@
{"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // Repeat the previous tests, minus 1, to verify boundaries.
+ // Repeat the previous tests, minus 1, to verify boundaries.
{"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"},
{"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"},
{"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"},
@@ -660,52 +675,69 @@
{"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
{"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
{"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
- // Old trunctations tests. They're all "BROKEN" now.
+ // Old trunctations tests. They're all "BROKEN" now.
{"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // Spaces should be rejected.
+ // Too many components should be rejected, in valid ranges or not.
+ {"255.255.255.255.255", L"255.255.255.255.255", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"256.256.256.256.256", L"256.256.256.256.256", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Spaces should be rejected.
{"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Very large numbers.
+ // Very large numbers.
{"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"},
{"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
- // A number has no length limit, but long numbers can still overflow.
+ // A number has no length limit, but long numbers can still overflow.
{"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"},
{"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // If a long component is non-numeric, it's a hostname, *not* a broken IP.
+ // If a long component is non-numeric, it's a hostname, *not* a broken IP.
{"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Truncation of all zeros should still result in 0.
+ // Truncation of all zeros should still result in 0.
{"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"},
+ // Non-ASCII characters in final component should return NEUTRAL.
+ {"1.2.3.\xF0\x9F\x92\xA9", L"1.2.3.\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.3.4\xF0\x9F\x92\xA9", L"1.2.3.4\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.3.0x\xF0\x9F\x92\xA9", L"1.2.3.0x\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.3.0\xF0\x9F\x92\xA9", L"1.2.3.0\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ // Non-ASCII characters in other components should result in broken IPs when final component is numeric.
+ {"1.2.\xF0\x9F\x92\xA9.4", L"1.2.\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.3\xF0\x9F\x92\xA9.4", L"1.2.3\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.0x\xF0\x9F\x92\xA9.4", L"1.2.0x\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.0\xF0\x9F\x92\xA9.4", L"1.2.0\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"\xF0\x9F\x92\xA9.2.3.4", L"\xD83D\xDCA9.2.3.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
};
+ // clang-format on
- for (size_t i = 0; i < gurl_base::size(cases); i++) {
+ for (const auto& test_case : cases) {
+ SCOPED_TRACE(test_case.input8);
+
// 8-bit version.
- Component component(0, static_cast<int>(strlen(cases[i].input8)));
+ Component component(0, static_cast<int>(strlen(test_case.input8)));
std::string out_str1;
StdStringCanonOutput output1(&out_str1);
CanonHostInfo host_info;
- CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info);
+ CanonicalizeIPAddress(test_case.input8, component, &output1, &host_info);
output1.Complete();
- EXPECT_EQ(cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(cases[i].expected_address_hex),
+ EXPECT_EQ(test_case.expected_family, host_info.family);
+ EXPECT_EQ(std::string(test_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
if (host_info.family == CanonHostInfo::IPV4) {
- EXPECT_STREQ(cases[i].expected, out_str1.c_str());
- EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
- EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(cases[i].expected_num_ipv4_components,
+ EXPECT_STREQ(test_case.expected, out_str1.c_str());
+ EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(test_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
// 16-bit version.
std::u16string input16(
- test_utils::TruncateWStringToUTF16(cases[i].input16));
+ test_utils::TruncateWStringToUTF16(test_case.input16));
component = Component(0, static_cast<int>(input16.length()));
std::string out_str2;
@@ -713,14 +745,14 @@
CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
output2.Complete();
- EXPECT_EQ(cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(cases[i].expected_address_hex),
+ EXPECT_EQ(test_case.expected_family, host_info.family);
+ EXPECT_EQ(std::string(test_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
if (host_info.family == CanonHostInfo::IPV4) {
- EXPECT_STREQ(cases[i].expected, out_str2.c_str());
- EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
- EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(cases[i].expected_num_ipv4_components,
+ EXPECT_STREQ(test_case.expected, out_str2.c_str());
+ EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(test_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
@@ -2529,115 +2561,4 @@
output.set_length(0);
}
-TEST(URLCanonTest, URLSafetyStatus) {
- const struct {
- const char* host;
- HostSafetyStatus expected_safety_status;
- } kTestCases[] = {
- // Empty components are ok.
- {"", HostSafetyStatus::kOk},
- {".", HostSafetyStatus::kOk},
- {"..", HostSafetyStatus::kOk},
-
- // Hostnames with purely non-numeric components are ok.
- {"com", HostSafetyStatus::kOk},
- {"a.com", HostSafetyStatus::kOk},
- {"a.b.com", HostSafetyStatus::kOk},
-
- // Hostnames with components with letters and numbers are ok.
- {"1com", HostSafetyStatus::kOk},
- {"0a.0com", HostSafetyStatus::kOk},
- {"0xa.0xb.0xcom", HostSafetyStatus::kOk},
- {"com1", HostSafetyStatus::kOk},
- {"a1.com1", HostSafetyStatus::kOk},
- {"a1.b1.com1", HostSafetyStatus::kOk},
-
- // Hostnames components that are numbers that are before a final
- // non-numeric component are ok.
- {"1.com", HostSafetyStatus::kOk},
- {"0.1.2com", HostSafetyStatus::kOk},
-
- // Invalid hostnames are ok.
- {"[", HostSafetyStatus::kOk},
-
- // IPv6 hostnames are ok.
- {"[::]", HostSafetyStatus::kOk},
- {"[2001:db8::1]", HostSafetyStatus::kOk},
-
- // IPv4 hostnames are ok.
- {"1.2.3.4", HostSafetyStatus::kOk},
- // IPv4 hostnames with creative representations are ok.
- {"01.02.03.04", HostSafetyStatus::kOk},
- {"0x1.0x2.0x3.0x4", HostSafetyStatus::kOk},
- {"1.2", HostSafetyStatus::kOk},
- {"1.2.3", HostSafetyStatus::kOk},
- {"0", HostSafetyStatus::kOk},
- {"0x0", HostSafetyStatus::kOk},
- {"07", HostSafetyStatus::kOk},
-
- // Hostnames with a final problematic top level domain.
- {"a.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.123", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.123456", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.999999999999999999", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.0x1", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.0xabcdef", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.0XABCDEF", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.07", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a.09", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {".0", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"foo.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"1.bar.0", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"a..0", HostSafetyStatus::kTopLevelDomainIsNumeric},
- {"1..0", HostSafetyStatus::kTopLevelDomainIsNumeric},
-
- // Hostnames with problematic two highest level domains.
- {"a.1.2", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
- {"a.0x1.0x2f", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
- {"a.06.09", HostSafetyStatus::kTwoHighestLevelDomainsAreNumeric},
- };
-
- for (const auto& test_case : kTestCases) {
- // Test with ASCII.
- SCOPED_TRACE(test_case.host);
- EXPECT_EQ(test_case.expected_safety_status,
- CheckHostnameSafety(test_case.host,
- Component(0, strlen(test_case.host))));
-
- // Test with ASCII and terminal dot, which shouldn't affect results for
- // anything that doesn't already end in a dot (or anything that only has
- // dots).
- std::string host_with_dot = test_case.host;
- host_with_dot += ".";
- EXPECT_EQ(test_case.expected_safety_status,
- CheckHostnameSafety(host_with_dot.c_str(),
- Component(0, host_with_dot.size())));
-
- // Test with ASCII and characters that are not part of the component.
- std::string host_with_bonus_characters = test_case.host;
- host_with_bonus_characters = "00" + host_with_bonus_characters + "00";
- EXPECT_EQ(test_case.expected_safety_status,
- CheckHostnameSafety(host_with_bonus_characters.c_str(),
- Component(2, strlen(test_case.host))));
-
- // Test with UTF-16.
- std::u16string utf16 = gurl_base::UTF8ToUTF16(test_case.host);
- EXPECT_EQ(test_case.expected_safety_status,
- CheckHostnameSafety(utf16.c_str(), Component(0, utf16.size())));
-
- // Test with UTF-16 and terminal dot.
- std::u16string utf16_with_dot = gurl_base::UTF8ToUTF16(host_with_dot);
- EXPECT_EQ(test_case.expected_safety_status,
- CheckHostnameSafety(utf16_with_dot.c_str(),
- Component(0, utf16_with_dot.size())));
-
- // Test with UTF-16 and characters that are not part of the component.
- std::u16string utf16_with_bonus_characters =
- gurl_base::UTF8ToUTF16(host_with_bonus_characters);
- EXPECT_EQ(test_case.expected_safety_status,
- CheckHostnameSafety(utf16_with_bonus_characters.c_str(),
- Component(2, utf16.size())));
- }
-}
-
} // namespace url
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index 4029d61..381d74e 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc
@@ -11,7 +11,6 @@
#include <ostream>
#include "polyfills/base/check_op.h"
-#include "base/no_destructor.h"
#include <unicode/uidna.h>
#include <unicode/utypes.h>
#include "url/url_canon_icu.h"
@@ -19,10 +18,8 @@
namespace url {
-namespace {
-
-// A wrapper to use gurl_base::NoDestructor with ICU's UIDNA, a C pointer to
-// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().
+// Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with
+// uidna_openUTS46().
//
// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
// code points allowed) to IDNA 2008 with
@@ -42,12 +39,12 @@
// http://goo.gl/3XBhqw ).
// See http://http://unicode.org/reports/tr46/ and references therein
// for more details.
-struct UIDNAWrapper {
- UIDNAWrapper() {
+UIDNA* GetUIDNA() {
+ static UIDNA* uidna = [] {
UErrorCode err = U_ZERO_ERROR;
// TODO(jungshik): Change options as different parties (browsers,
// registrars, search engines) converge toward a consensus.
- value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
+ UIDNA* value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
if (U_FAILURE(err)) {
GURL_CHECK(false) << "failed to open UTS46 data with error: "
<< u_errorName(err)
@@ -56,16 +53,9 @@
<< "tables for libicu. See https://crbug.com/778929.";
value = nullptr;
}
- }
-
- UIDNA* value;
-};
-
-} // namespace
-
-UIDNA* GetUIDNA() {
- static gurl_base::NoDestructor<UIDNAWrapper> uidna_wrapper;
- return uidna_wrapper->value;
+ return value;
+ }();
+ return uidna;
}
// Converts the Unicode input representing a hostname to ASCII using IDN rules.
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc
index b67b550..b23dcf8 100644
--- a/url/url_parse_unittest.cc
+++ b/url/url_parse_unittest.cc
@@ -6,7 +6,7 @@
#include <stddef.h>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
@@ -491,7 +491,7 @@
struct FileCase {
const char* input;
const char* expected;
- } file_cases[] = {
+ } extract_cases[] = {
{"http://www.google.com", nullptr},
{"http://www.google.com/", ""},
{"http://www.google.com/search", "search"},
@@ -509,8 +509,8 @@
{"http://www.google.com/foo;bar;html", "foo"},
};
- for (size_t i = 0; i < gurl_base::size(file_cases); i++) {
- const char* url = file_cases[i].input;
+ for (size_t i = 0; i < gurl_base::size(extract_cases); i++) {
+ const char* url = extract_cases[i].input;
int len = static_cast<int>(strlen(url));
Parsed parsed;
@@ -519,7 +519,7 @@
Component file_name;
ExtractFileName(url, parsed.path, &file_name);
- EXPECT_TRUE(ComponentMatches(url, file_cases[i].expected, file_name));
+ EXPECT_TRUE(ComponentMatches(url, extract_cases[i].expected, file_name));
}
}
diff --git a/url/url_util.cc b/url/url_util.cc
index 0c35913..470da30 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -6,7 +6,9 @@
#include <stddef.h>
#include <string.h>
+
#include <atomic>
+#include <ostream>
#include "polyfills/base/check_op.h"
#include "base/compiler_specific.h"
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index 3dcfa76..158e3d6 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -4,7 +4,7 @@
#include <stddef.h>
-#include "base/stl_util.h"
+#include "base/cxx17_backports.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"