Update googleurl to the latest Chromium version This updates googleurl to the Chromium upstream version 8948596515b3eaf06c0e3db256c9513b5dca52d8 from Tue Dec 13 16:51:45 2022 +0000 Change-Id: I16d52bfff8ebf049358821c8c8c0b11c377e6268

diff --git a/AUTHORS b/AUTHORS
index b32867a..0e64013 100644
--- a/AUTHORS
+++ b/AUTHORS

@@ -17,6 +17,7 @@
 Aaron Randolph <aaron.randolph@gmail.com>
 Aaryaman Vasishta <jem456.vasishta@gmail.com>
 Abdu Ameen <abdu.ameen000@gmail.com>
+Abdullah Abu Tasneem <a.tasneem@samsung.com>
 Abhijeet Kandalkar <abhijeet.k@samsung.com>
 Abhinav Vij <abhinav.vij@samsung.com>
 Abhishek Agarwal <abhishek.a21@samsung.com>
@@ -82,6 +83,7 @@
 Alvaro Silva <alvaro.fagner@gmail.com>
 Ambarish Rapte <ambarish.r@samsung.com>
 Amey Jahagirdar <jahagird@amazon.com>
+Amit Paul <a.paul@samsung.com>
 Amit Sarkar <amit.srkr@samsung.com>
 Amogh Bihani <amogh.bihani@samsung.com>
 Amos Lim <amoseui@gmail.com>
@@ -126,6 +128,7 @@
 Anuj Kumar Sharma <anujk.sharma@samsung.com>
 Ao Sun <ntusunao@gmail.com>
 Ao Wang <wangao.james@bytedance.com>
+Aquibuzzaman Md. Sayem <md.sayem@samsung.com>
 Arjun Karthik <arjunkar@amazon.com>
 Arman Ghotb <armanghotb@gmail.com>
 Armin Burgmeier <aburgmeier@bloomberg.net>
@@ -141,6 +144,7 @@
 Arunoday Sarkar <a.sarkar.arun@gmail.com>
 Arunprasad Rajkumar <ararunprasad@gmail.com>
 Arunprasad Rajkumar <arurajku@cisco.com>
+Arup Barua <arup.barua@samsung.com>
 Asami Doi <d0iasm.pub@gmail.com>
 Ashish Kumar Gupta <guptaag@amazon.com>
 Ashlin Joseph <ashlin.j@samsung.com>
@@ -170,12 +174,14 @@
 Bhanukrushana Rout <b.rout@samsung.com>
 Biljith Jayan <billy.jayan@samsung.com>
 Bin Liao <bin.liao@intel.com>
+Bin Miao <bin.miao@intel.com>
 Boaz Sender <boaz@bocoup.com>
 Bobby Powers <bobbypowers@gmail.com>
 Branden Archer <bma4@zips.uakron.edu>
 Brendan Kirby <brendan.kirby@imgtec.com>
 Brendan Long <self@brendanlong.com>
 Brendon Tiszka <btiszka@gmail.com>
+Brett Lewis <brettlewis@brettlewis.us>
 Brian Clifton <clifton@brave.com>
 Brian Dunn <brian@theophil.us>
 Brian G. Merrell <bgmerrell@gmail.com>
@@ -228,6 +234,7 @@
 Cheng Yu <yuzichengcode@gmail.com>
 Cheung Ho <uioptt24@gmail.com>
 Choongwoo Han <cwhan.tunz@gmail.com>
+Choudhury M. Shamsujjoha <choudhury.s@samsung.com>
 Chris Greene <cwgreene@amazon.com>
 Chris Harrelson <chrishtr@gmail.com>
 Chris Nardi <hichris123@gmail.com>
@@ -316,6 +323,7 @@
 Dominic Farolino <domfarolino@gmail.com>
 Dominic Jodoin <dominic.jodoin@gmail.com>
 Dominik Röttsches <dominik.rottsches@intel.com>
+Dominik Schütz <do.sch.dev@gmail.com>
 Don Woodward <woodward@adobe.com>
 Donghee Na <corona10@gmail.com>
 Dong-hee Na <donghee.na92@gmail.com>
@@ -432,6 +440,7 @@
 Hari Singh <hari.singh1@samsung.com>
 Harpreet Singh Khurana <harpreet.sk@samsung.com>
 Harshikesh Kumar <harshikeshnobug@gmail.com>
+Harshit Pal <harshitp12345@gmail.com>
 Hassan Salehe Matar <hassansalehe@gmail.com>
 Hautio Kari <khautio@gmail.com>
 Heejin R. Chung <heejin.r.chung@samsung.com>
@@ -460,6 +469,8 @@
 Hyemi Shin <hyemi.sin@samsung.com>
 HyeockJin Kim <kherootz@gmail.com>
 Hyojeong Kim <42.4.hyojekim@gmail.com>
+Hyomin Kim <ajtwlsalsdl0@gmail.com>
+Hyomin Kim <hyoputer.kim@samsung.com>
 Hyungchan Kim <inlinechan@gmail.com>
 Hyungun Kim <khw3754@gmail.com>
 Hyungwook Lee <hyungwook.lee@navercorp.com>
@@ -497,6 +508,7 @@
 Jaemin Seo <jaemin86.seo@samsung.com>
 Jaeseok Yoon <yjaeseok@gmail.com>
 Jaewon Choi <jaewon.james.choi@gmail.com>
+Jaewon Jung <jw.jung@navercorp.com>
 Jaeyong Bae <jdragon.bae@gmail.com>
 Jagdish Chourasia <jagdish.c@samsung.com>
 Jaime Soriano Pastor <jsorianopastor@gmail.com>
@@ -545,6 +557,7 @@
 Jesus Sanchez-Palencia <jesus.sanchez-palencia.fernandez.fil@intel.com>
 Jiadong Chen <chenjiadong@huawei.com>
 Jiadong Zhu <jiadong.zhu@linaro.org>
+Jiahao Lu <lujjjh@gmail.com>
 Jiahe Zhang <jiahe.zhang@intel.com>
 Jiajia Qin <jiajia.qin@intel.com>
 Jiajie Hu <jiajie.hu@intel.com>
@@ -811,6 +824,7 @@
 Md. Hasanur Rashid <hasanur.r@samsung.com>
 Md Jobed Hossain <jobed.h@samsung.com>
 Md Raiyan bin Sayeed <mrbsayee@uwaterloo.ca>
+Md. Sadiqul Amin <sadiqul.amin@samsung.com>
 Md Sami Uddin <md.sami@samsung.com>
 Micha Hanselmann <micha.hanselmann@gmail.com>
 Michael Cirone <mikecirone@gmail.com>
@@ -1241,6 +1255,7 @@
 U. Artie Eoff <ullysses.a.eoff@intel.com>
 Umar Hansa <umar.hansa@gmail.com>
 Upendra Gowda <upendrag.gowda@gmail.com>
+Utzcoz <utzcoz@gmail.com>
 UwU UwU <uwu7586@gmail.com>
 Uzair Jaleel <uzair.jaleel@samsung.com>
 Vadim Gorbachev <bmsdave@gmail.com>
@@ -1443,6 +1458,7 @@
 Rakuten Kobo Inc. <*@kobo.com>
 Rakuten Kobo Inc. <*@rakuten.com>
 Red Hat Inc. <*@redhat.com>
+Sajeesh Sidharthan <sajeesh.sidharthan@amd.corp-partner.google.com>
 Semihalf <*@semihalf.com>
 Seznam.cz, a.s. <*@firma.seznam.cz>
 Slack Technologies Inc. <*@slack-corp.com>

diff --git a/base/BUILD b/base/BUILD
index a1410fb..5f36123 100644
--- a/base/BUILD
+++ b/base/BUILD

@@ -18,6 +18,7 @@
         "//conditions:default": [],
     }),
     hdrs = [
+        "bits.h",
         "compiler_specific.h",
         "containers/checked_iterators.h",
         "containers/contains.h",
@@ -41,6 +42,7 @@
         "numerics/safe_conversions.h",
         "numerics/safe_conversions_arm_impl.h",
         "numerics/safe_conversions_impl.h",
+        "numerics/safe_math.h",
         "numerics/safe_math_arm_impl.h",
         "numerics/safe_math_clang_gcc_impl.h",
         "numerics/safe_math_shared_impl.h",

diff --git a/base/bits.h b/base/bits.h
new file mode 100644
index 0000000..ea011ad
--- /dev/null
+++ b/base/bits.h

@@ -0,0 +1,143 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This file defines some bit utilities.
+
+#ifndef BASE_BITS_H_
+#define BASE_BITS_H_
+
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+
+#include "polyfills/base/check.h"
+#include "base/compiler_specific.h"
+#include "build/build_config.h"
+
+namespace gurl_base {
+namespace bits {
+
+// Returns true iff |value| is a power of 2.
+//
+// TODO(pkasting): When C++20 is available, replace with std::has_single_bit().
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>>
+constexpr bool IsPowerOfTwo(T value) {
+  // From "Hacker's Delight": Section 2.1 Manipulating Rightmost Bits.
+  //
+  // Only positive integers with a single bit set are powers of two. If only one
+  // bit is set in x (e.g. 0b00000100000000) then |x-1| will have that bit set
+  // to zero and all bits to its right set to 1 (e.g. 0b00000011111111). Hence
+  // |x & (x-1)| is 0 iff x is a power of two.
+  return value > 0 && (value & (value - 1)) == 0;
+}
+
+// Round down |size| to a multiple of alignment, which must be a power of two.
+template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
+constexpr T AlignDown(T size, T alignment) {
+  GURL_DCHECK(IsPowerOfTwo(alignment));
+  return size & ~(alignment - 1);
+}
+
+// Move |ptr| back to the previous multiple of alignment, which must be a power
+// of two. Defined for types where sizeof(T) is one byte.
+template <typename T, typename = typename std::enable_if<sizeof(T) == 1>::type>
+inline T* AlignDown(T* ptr, uintptr_t alignment) {
+  return reinterpret_cast<T*>(
+      AlignDown(reinterpret_cast<uintptr_t>(ptr), alignment));
+}
+
+// Round up |size| to a multiple of alignment, which must be a power of two.
+template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
+constexpr T AlignUp(T size, T alignment) {
+  GURL_DCHECK(IsPowerOfTwo(alignment));
+  return (size + alignment - 1) & ~(alignment - 1);
+}
+
+// Advance |ptr| to the next multiple of alignment, which must be a power of
+// two. Defined for types where sizeof(T) is one byte.
+template <typename T, typename = typename std::enable_if<sizeof(T) == 1>::type>
+inline T* AlignUp(T* ptr, uintptr_t alignment) {
+  return reinterpret_cast<T*>(
+      AlignUp(reinterpret_cast<uintptr_t>(ptr), alignment));
+}
+
+// CountLeadingZeroBits(value) returns the number of zero bits following the
+// most significant 1 bit in |value| if |value| is non-zero, otherwise it
+// returns {sizeof(T) * 8}.
+// Example: 00100010 -> 2
+//
+// CountTrailingZeroBits(value) returns the number of zero bits preceding the
+// least significant 1 bit in |value| if |value| is non-zero, otherwise it
+// returns {sizeof(T) * 8}.
+// Example: 00100010 -> 1
+//
+// C does not have an operator to do this, but fortunately the various
+// compilers have built-ins that map to fast underlying processor instructions.
+//
+// TODO(pkasting): When C++20 is available, replace with std::countl_zero() and
+// similar.
+
+// __builtin_clz has undefined behaviour for an input of 0, even though there's
+// clearly a return value that makes sense, and even though some processor clz
+// instructions have defined behaviour for 0. We could drop to raw __asm__ to
+// do better, but we'll avoid doing that unless we see proof that we need to.
+template <typename T, int bits = sizeof(T) * 8>
+ALWAYS_INLINE constexpr
+    typename std::enable_if<std::is_unsigned<T>::value && sizeof(T) <= 8,
+                            int>::type
+    CountLeadingZeroBits(T value) {
+  static_assert(bits > 0, "invalid instantiation");
+  return LIKELY(value)
+             ? bits == 64
+                   ? __builtin_clzll(static_cast<uint64_t>(value))
+                   : __builtin_clz(static_cast<uint32_t>(value)) - (32 - bits)
+             : bits;
+}
+
+template <typename T, int bits = sizeof(T) * 8>
+ALWAYS_INLINE constexpr
+    typename std::enable_if<std::is_unsigned<T>::value && sizeof(T) <= 8,
+                            int>::type
+    CountTrailingZeroBits(T value) {
+  return LIKELY(value) ? bits == 64
+                             ? __builtin_ctzll(static_cast<uint64_t>(value))
+                             : __builtin_ctz(static_cast<uint32_t>(value))
+                       : bits;
+}
+
+// Returns the integer i such as 2^i <= n < 2^(i+1).
+//
+// There is a common `BitLength` function, which returns the number of bits
+// required to represent a value. Rather than implement that function,
+// use `Log2Floor` and add 1 to the result.
+//
+// TODO(pkasting): When C++20 is available, replace with std::bit_xxx().
+constexpr int Log2Floor(uint32_t n) {
+  return 31 - CountLeadingZeroBits(n);
+}
+
+// Returns the integer i such as 2^(i-1) < n <= 2^i.
+constexpr int Log2Ceiling(uint32_t n) {
+  // When n == 0, we want the function to return -1.
+  // When n == 0, (n - 1) will underflow to 0xFFFFFFFF, which is
+  // why the statement below starts with (n ? 32 : -1).
+  return (n ? 32 : -1) - CountLeadingZeroBits(n - 1);
+}
+
+// Returns a value of type T with a single bit set in the left-most position.
+// Can be used instead of manually shifting a 1 to the left.
+template <typename T>
+constexpr T LeftmostBit() {
+  static_assert(std::is_integral<T>::value,
+                "This function can only be used with integral types.");
+  T one(1u);
+  return one << ((CHAR_BIT * sizeof(T) - 1));
+}
+
+}  // namespace bits
+}  // namespace base
+
+#endif  // BASE_BITS_H_

diff --git a/base/numerics/checked_math.h b/base/numerics/checked_math.h
index 4973f09..0e6ad4f 100644
--- a/base/numerics/checked_math.h
+++ b/base/numerics/checked_math.h

@@ -41,11 +41,10 @@
 
   // This is not an explicit constructor because we implicitly upgrade regular
   // numerics to CheckedNumerics to make them easier to use.
-  template <typename Src>
+  template <typename Src,
+            typename = std::enable_if_t<std::is_arithmetic<Src>::value>>
   // NOLINTNEXTLINE(google-explicit-constructor)
-  constexpr CheckedNumeric(Src value) : state_(value) {
-    static_assert(UnderlyingType<Src>::is_numeric, "Argument must be numeric.");
-  }
+  constexpr CheckedNumeric(Src value) : state_(value) {}
 
   // This is not an explicit constructor because we want a seamless conversion
   // from StrictNumeric types.

diff --git a/base/numerics/safe_conversions.h b/base/numerics/safe_conversions.h
index 4a9494e..3e04bf4 100644
--- a/base/numerics/safe_conversions.h
+++ b/base/numerics/safe_conversions.h

@@ -20,10 +20,6 @@
 #define BASE_HAS_OPTIMIZED_SAFE_CONVERSIONS (0)
 #endif
 
-#if !BASE_NUMERICS_DISABLE_OSTREAM_OPERATORS
-#include <ostream>
-#endif
-
 namespace gurl_base {
 namespace internal {
 

diff --git a/base/strings/escape.cc b/base/strings/escape.cc
index d855c1c..867e04b 100644
--- a/base/strings/escape.cc
+++ b/base/strings/escape.cc

@@ -7,7 +7,7 @@
 #include <ostream>
 
 #include "polyfills/base/check_op.h"
-#include "base/feature_list.h"
+#include "polyfills/base/feature_list.h"
 #include "base/features.h"
 #include "base/strings/string_piece.h"
 #include "base/strings/string_util.h"

diff --git a/base/strings/string_piece.h b/base/strings/string_piece.h
index a1db548..fe524df 100644
--- a/base/strings/string_piece.h
+++ b/base/strings/string_piece.h

@@ -34,6 +34,7 @@
 #include "polyfills/base/check_op.h"
 #include "base/compiler_specific.h"
 #include "base/cxx20_is_constant_evaluated.h"
+#include "base/numerics/safe_math.h"
 #include "base/strings/string_piece_forward.h"  // IWYU pragma: export
 #include "build/build_config.h"
 
@@ -117,8 +118,9 @@
   constexpr BasicStringPiece(const BasicStringPiece& other) noexcept = default;
   constexpr BasicStringPiece& operator=(const BasicStringPiece& view) noexcept =
       default;
-  constexpr BasicStringPiece(const CharT* s, size_type count)
-      : ptr_(s), length_(count) {}
+  constexpr BasicStringPiece(const CharT* s, CheckedNumeric<size_t> count)
+      : ptr_(s), length_(count.ValueOrDie()) {}
+  // NOLINTNEXTLINE(google-explicit-constructor)
   constexpr BasicStringPiece(const CharT* s)
       : ptr_(s), length_(s ? traits_type::length(s) : 0) {
     // Intentional STL deviation: Null-check instead of UB.

diff --git a/base/strings/string_piece_rust.h b/base/strings/string_piece_rust.h
index 0d89aa4..ff08c34 100644
--- a/base/strings/string_piece_rust.h
+++ b/base/strings/string_piece_rust.h

@@ -5,6 +5,10 @@
 #ifndef BASE_STRINGS_STRING_PIECE_RUST_H_
 #define BASE_STRINGS_STRING_PIECE_RUST_H_
 
+#include "build/rust/rust_buildflags.h"
+
+#if BUILDFLAG(TOOLCHAIN_HAS_RUST)
+
 #include <stdint.h>
 
 #include "base/strings/string_piece.h"
@@ -35,4 +39,6 @@
 
 }  // namespace base
 
+#endif  // BUILDFLAG(TOOLCHAIN_HAS_RUST)
+
 #endif  // BASE_STRINGS_STRING_PIECE_RUST_H_

diff --git a/base/strings/string_piece_rust_unittest.cc b/base/strings/string_piece_rust_unittest.cc
index 38d50d4..2f8db6c 100644
--- a/base/strings/string_piece_rust_unittest.cc
+++ b/base/strings/string_piece_rust_unittest.cc

@@ -3,9 +3,12 @@
 // found in the LICENSE file.
 
 #include "base/strings/string_piece_rust.h"
+#include "build/rust/rust_buildflags.h"
 
 #include "testing/gtest/include/gtest/gtest.h"
 
+#if BUILDFLAG(TOOLCHAIN_HAS_RUST)
+
 namespace gurl_base {
 namespace {
 
@@ -28,3 +31,5 @@
 
 }  // namespace
 }  // namespace base
+
+#endif  // BUILDFLAG(TOOLCHAIN_HAS_RUST)

diff --git a/base/strings/string_piece_unittest.cc b/base/strings/string_piece_unittest.cc
index fb1be66..9cd9b3f 100644
--- a/base/strings/string_piece_unittest.cc
+++ b/base/strings/string_piece_unittest.cc

@@ -720,6 +720,11 @@
   }
 }
 
+TEST(StringPieceTest, InvalidLengthDeath) {
+  int length = -1;
+  ASSERT_DEATH_IF_SUPPORTED({ StringPiece piece("hello", length); }, "");
+}
+
 TEST(StringPieceTest, ConstexprData) {
   {
     constexpr StringPiece piece;

diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index b7bee67..48d3fac 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h

@@ -135,16 +135,21 @@
   return (c >= 'a' && c <= 'z') ? static_cast<CharT>(c + 'A' - 'a') : c;
 }
 
-// Converts the given string to it's ASCII-lowercase equivalent.
+// Converts the given string to its ASCII-lowercase equivalent. Non-ASCII
+// bytes (or UTF-16 code units in `StringPiece16`) are permitted but will be
+// unmodified.
 BASE_EXPORT std::string ToLowerASCII(StringPiece str);
 BASE_EXPORT std::u16string ToLowerASCII(StringPiece16 str);
 
-// Converts the given string to it's ASCII-uppercase equivalent.
+// Converts the given string to its ASCII-uppercase equivalent. Non-ASCII
+// bytes (or UTF-16 code units in `StringPiece16`) are permitted but will be
+// unmodified.
 BASE_EXPORT std::string ToUpperASCII(StringPiece str);
 BASE_EXPORT std::u16string ToUpperASCII(StringPiece16 str);
 
-// Functor for case-insensitive ASCII comparisons for STL algorithms like
-// std::search.
+// Functor for ASCII case-insensitive comparisons for STL algorithms like
+// std::search. Non-ASCII bytes (or UTF-16 code units in `StringPiece16`) are
+// permitted but will be compared as-is.
 //
 // Note that a full Unicode version of this functor is not possible to write
 // because case mappings might change the number of characters, depend on
@@ -158,13 +163,17 @@
   }
 };
 
-// Like strcasecmp for case-insensitive ASCII characters only. Returns:
+// Like strcasecmp for ASCII case-insensitive comparisons only. Returns:
 //   -1  (a < b)
 //    0  (a == b)
 //    1  (a > b)
-// (unlike strcasecmp which can return values greater or less than 1/-1). For
-// full Unicode support, use gurl_base::i18n::ToLower or gurl_base::i18n::FoldCase
-// and then just call the normal string operators on the result.
+// (unlike strcasecmp which can return values greater or less than 1/-1). To
+// compare all Unicode code points case-insensitively, use gurl_base::i18n::ToLower
+// or gurl_base::i18n::FoldCase and then just call the normal string operators on the
+// result.
+//
+// Non-ASCII bytes (or UTF-16 code units in `StringPiece16`) are permitted but
+// will be compared unmodified.
 BASE_EXPORT constexpr int CompareCaseInsensitiveASCII(StringPiece a,
                                                       StringPiece b) {
   return internal::CompareCaseInsensitiveASCIIT(a, b);
@@ -174,9 +183,11 @@
   return internal::CompareCaseInsensitiveASCIIT(a, b);
 }
 
-// Equality for ASCII case-insensitive comparisons. For full Unicode support,
-// use gurl_base::i18n::ToLower or gurl_base::i18n::FoldCase and then compare with either
-// == or !=.
+// Equality for ASCII case-insensitive comparisons. Non-ASCII bytes (or UTF-16
+// code units in `StringPiece16`) are permitted but will be compared unmodified.
+// To compare all Unicode code points case-insensitively, use
+// gurl_base::i18n::ToLower or gurl_base::i18n::FoldCase and then compare with either ==
+// or !=.
 inline bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
   return internal::EqualsCaseInsensitiveASCIIT(a, b);
 }
@@ -215,6 +226,9 @@
     kWhitespaceNoCrLfUTF16[];  // Unicode w/o CR/LF.
 BASE_EXPORT extern const char kWhitespaceASCII[];
 BASE_EXPORT extern const char16_t kWhitespaceASCIIAs16[];  // No unicode.
+                                                           //
+// https://infra.spec.whatwg.org/#ascii-whitespace
+BASE_EXPORT extern const char kInfraAsciiWhitespace[];
 
 // Null-terminated string representing the UTF-8 byte order mark.
 BASE_EXPORT extern const char kUtf8ByteOrderMark[];

diff --git a/base/strings/string_util_constants.cc b/base/strings/string_util_constants.cc
index fece0af..12a3c5e 100644
--- a/base/strings/string_util_constants.cc
+++ b/base/strings/string_util_constants.cc

@@ -49,6 +49,8 @@
 const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0};
 const char16_t kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0};
 
+const char kInfraAsciiWhitespace[] = {0x09, 0x0A, 0x0C, 0x0D, 0x20, 0};
+
 const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
 
 }  // namespace base

diff --git a/base/strings/string_util_impl_helpers.h b/base/strings/string_util_impl_helpers.h
index 970a912..4cd9a3b 100644
--- a/base/strings/string_util_impl_helpers.h
+++ b/base/strings/string_util_impl_helpers.h

@@ -228,10 +228,6 @@
     case CompareCase::INSENSITIVE_ASCII:
       return std::equal(search_for.begin(), search_for.end(), source.begin(),
                         CaseInsensitiveCompareASCII<CharT>());
-
-    default:
-      GURL_NOTREACHED();
-      return false;
   }
 }
 
@@ -250,10 +246,6 @@
     case CompareCase::INSENSITIVE_ASCII:
       return std::equal(source.begin(), source.end(), search_for.begin(),
                         CaseInsensitiveCompareASCII<CharT>());
-
-    default:
-      GURL_NOTREACHED();
-      return false;
   }
 }
 

diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h
index 3a493dd..b05cb7a 100644
--- a/base/strings/string_util_internal.h
+++ b/base/strings/string_util_internal.h

@@ -5,6 +5,8 @@
 #ifndef BASE_STRINGS_STRING_UTIL_INTERNAL_H_
 #define BASE_STRINGS_STRING_UTIL_INTERNAL_H_
 
+#include <type_traits>
+
 #include "base/ranges/algorithm.h"
 #include "base/strings/string_piece.h"
 
@@ -18,15 +20,18 @@
   return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
 }
 
-template <typename T, typename CharT = typename T::value_type>
+template <typename T>
 constexpr int CompareCaseInsensitiveASCIIT(T a, T b) {
   // Find the first characters that aren't equal and compare them.  If the end
   // of one of the strings is found before a nonequal character, the lengths
-  // of the strings are compared.
+  // of the strings are compared. Compare using the unsigned type so the sort
+  // order is independent of the signedness of `char`.
+  static_assert(std::is_integral_v<typename T::value_type>);
+  using UCharT = std::make_unsigned_t<typename T::value_type>;
   size_t i = 0;
   while (i < a.length() && i < b.length()) {
-    CharT lower_a = ToLowerASCII(a[i]);
-    CharT lower_b = ToLowerASCII(b[i]);
+    UCharT lower_a = static_cast<UCharT>(ToLowerASCII(a[i]));
+    UCharT lower_b = static_cast<UCharT>(ToLowerASCII(b[i]));
     if (lower_a < lower_b)
       return -1;
     if (lower_a > lower_b)

diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index 4109f8e..e15358f 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc

@@ -648,6 +648,11 @@
 
   EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
   EXPECT_EQ(u"cc2", ToLowerASCII(u"Cc2"));
+
+  // Non-ASCII characters are unmodified. U+00C4 is LATIN CAPITAL LETTER A WITH
+  // DIAERESIS.
+  EXPECT_EQ('\xc4', ToLowerASCII('\xc4'));
+  EXPECT_EQ(u'\x00c4', ToLowerASCII(u'\x00c4'));
 }
 
 TEST(StringUtilTest, ToUpperASCII) {
@@ -661,6 +666,11 @@
 
   EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
   EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
+
+  // Non-ASCII characters are unmodified. U+00E4 is LATIN SMALL LETTER A WITH
+  // DIAERESIS.
+  EXPECT_EQ('\xe4', ToUpperASCII('\xe4'));
+  EXPECT_EQ(u'\x00e4', ToUpperASCII(u'\x00e4'));
 }
 
 TEST(StringUtilTest, FormatBytesUnlocalized) {
@@ -1475,6 +1485,15 @@
   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
   EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
 
+  // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
+  EXPECT_EQ(0, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
+  EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4"));
+  EXPECT_EQ(1, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84"));
+
+  // ASCII bytes should sort before non-ASCII ones.
+  EXPECT_EQ(-1, CompareCaseInsensitiveASCII("a", "\xc3\xa4"));
+  EXPECT_EQ(1, CompareCaseInsensitiveASCII("\xc3\xa4", "a"));
+
   // For constexpr.
   static_assert(CompareCaseInsensitiveASCII("", "") == 0);
   static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDf") == 0);
@@ -1482,6 +1501,14 @@
   static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDf") == 1);
   static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDfb") == -1);
   static_assert(CompareCaseInsensitiveASCII("Asdfb", "aSDfA") == 1);
+  static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4") ==
+                0);
+  static_assert(CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4") ==
+                -1);
+  static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84") ==
+                1);
+  static_assert(CompareCaseInsensitiveASCII("a", "\xc3\xa4") == -1);
+  static_assert(CompareCaseInsensitiveASCII("\xc3\xa4", "a") == 1);
 }
 
 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
@@ -1505,6 +1532,10 @@
   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", u"aSDF"));
   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", u"aSDFz"));
 
+  // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
+  EXPECT_TRUE(EqualsCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
+  EXPECT_FALSE(EqualsCaseInsensitiveASCII("aaa \xc3\x84", "AAA \xc3\xa4"));
+
   // The `WStringPiece` overloads are only defined on Windows.
 #if BUILDFLAG(IS_WIN)
   EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", L""));

diff --git a/base/strings/sys_string_conversions_win.cc b/base/strings/sys_string_conversions_win.cc
index da19245..50b7c76 100644
--- a/base/strings/sys_string_conversions_win.cc
+++ b/base/strings/sys_string_conversions_win.cc

@@ -5,6 +5,7 @@
 #include "base/strings/sys_string_conversions.h"
 
 #include <windows.h>
+
 #include <stdint.h>
 
 #include "base/strings/string_piece.h"

diff --git a/copy.bara.sky b/copy.bara.sky
index 89f70ce..a586cea 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky

@@ -12,6 +12,7 @@
     include = [
         "AUTHORS",
         "LICENSE",
+        "base/bits.h",
         "base/compiler_specific.h",
         "base/containers/checked_iterators.h",
         "base/containers/contains.h",
@@ -77,6 +78,7 @@
     #"base/dcheck_is_on.h",
     "base/debug/alias.h",
     "base/export_template.h",
+    "base/feature_list.h",
     "base/logging.h",
     "base/memory/raw_ptr.h",
     "base/notreached.h",

diff --git a/polyfills/BUILD b/polyfills/BUILD
index ea1b73e..7e887e3 100644
--- a/polyfills/BUILD
+++ b/polyfills/BUILD

@@ -15,6 +15,7 @@
         "base/dcheck_is_on.h",
         "base/debug/alias.h",
         "base/export_template.h",
+        "base/feature_list.h",
         "base/logging.h",
         "base/memory/raw_ptr.h",
         "base/metrics/histogram_macros.h",

diff --git a/polyfills/base/feature_list.h b/polyfills/base/feature_list.h
new file mode 100644
index 0000000..b687509
--- /dev/null
+++ b/polyfills/base/feature_list.h

@@ -0,0 +1,37 @@
+// Copyright 2022 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef POLYFILLS_BASE_FEATURE_LIST_H_
+#define POLYFILLS_BASE_FEATURE_LIST_H_
+
+#define BASE_DECLARE_FEATURE(feature) extern const gurl_base::Feature feature
+
+#define BASE_FEATURE(feature, name, default_value) \
+  const gurl_base::Feature feature(name, default_value)
+
+namespace gurl_base {
+
+enum FeatureState {
+  FEATURE_DISABLED_BY_DEFAULT,
+  FEATURE_ENABLED_BY_DEFAULT,
+};
+
+struct Feature {
+  constexpr Feature(const char* name, FeatureState default_state)
+      : name(name), default_state(default_state) {}
+
+  const char* const name;
+  const FeatureState default_state;
+};
+
+class FeatureList {
+ public:
+  static bool IsEnabled(const Feature& feature) {
+    return feature.default_state == FEATURE_ENABLED_BY_DEFAULT;
+  }
+};
+
+}  // namespace gurl_base
+
+#endif  // POLYFILLS_BASE_FEATURE_LIST_H_

diff --git a/url/BUILD b/url/BUILD
index c6b53ab..327c38a 100644
--- a/url/BUILD
+++ b/url/BUILD

@@ -31,6 +31,7 @@
         "url_canon_stdstring.cc",
         "url_canon_stdurl.cc",
         "url_constants.cc",
+        "url_features.cc",
         "url_parse_file.cc",
         "url_parse_internal.h",
         "url_util.cc",
@@ -44,6 +45,7 @@
         "url_canon_ip.h",
         "url_canon_stdstring.h",
         "url_constants.h",
+        "url_features.h",
         "url_file.h",
         "url_util.h",
     ],

diff --git a/url/gurl.cc b/url/gurl.cc
index 6c0429e..c5e3f19 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc

@@ -331,6 +331,15 @@
   return Resolve(".");
 }
 
+GURL GURL::GetWithoutRef() const {
+  if (!has_ref())
+    return GURL(*this);
+
+  Replacements replacements;
+  replacements.ClearRef();
+  return ReplaceComponents(replacements);
+}
+
 bool GURL::IsStandard() const {
   return url::IsStandard(spec_.data(), parsed_.scheme);
 }
@@ -402,13 +411,13 @@
 }
 
 gurl_base::StringPiece GURL::PathForRequestPiece() const {
-  GURL_DCHECK(parsed_.path.len > 0)
+  GURL_DCHECK(parsed_.path.is_nonempty())
       << "Canonical path for requests should be non-empty";
-  if (parsed_.ref.len >= 0) {
+  if (parsed_.ref.is_valid()) {
     // Clip off the reference when it exists. The reference starts after the
     // #-sign, so we have to subtract one to also remove it.
-    return gurl_base::StringPiece(&spec_[parsed_.path.begin],
-                             parsed_.ref.begin - parsed_.path.begin - 1);
+    return gurl_base::StringPiece(spec_).substr(
+        parsed_.path.begin, parsed_.ref.begin - parsed_.path.begin - 1);
   }
   // Compute the actual path length, rather than depending on the spec's
   // terminator. If we're an inner_url, our spec continues on into our outer
@@ -417,7 +426,7 @@
   if (parsed_.query.is_valid())
     path_len = parsed_.query.end() - parsed_.path.begin;
 
-  return gurl_base::StringPiece(&spec_[parsed_.path.begin], path_len);
+  return gurl_base::StringPiece(spec_).substr(parsed_.path.begin, path_len);
 }
 
 std::string GURL::PathForRequest() const {
@@ -446,7 +455,7 @@
   if (!is_valid_)
     return gurl_base::StringPiece();
   url::Component content_component = parsed_.GetContent();
-  if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.len >= 0)
+  if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.is_valid())
     content_component.len -= parsed_.ref.len + 1;
   return ComponentStringPiece(content_component);
 }

diff --git a/url/gurl.h b/url/gurl.h
index 919ae5c..1b29989 100644
--- a/url/gurl.h
+++ b/url/gurl.h

@@ -189,6 +189,14 @@
   // scheme, authority or path, it will return an empty, invalid GURL.
   GURL GetWithoutFilename() const;
 
+  // A helper function to return a GURL without the Ref (also named Fragment
+  // Identifier). For example,
+  // GURL("https://www.foo.com/index.html#test").GetWithoutRef().spec()
+  // will return "https://www.foo.com/index.html".
+  // If the GURL is invalid or missing a
+  // scheme, authority or path, it will return an empty, invalid GURL.
+  GURL GetWithoutRef() const;
+
   // A helper function to return a GURL containing just the scheme, host,
   // and port from a URL. Equivalent to clearing any username and password,
   // replacing the path with a slash, and clearing everything after that. If
@@ -285,9 +293,7 @@
   bool HostIsIPAddress() const;
 
   // Not including the colon. If you are comparing schemes, prefer SchemeIs.
-  bool has_scheme() const {
-    return parsed_.scheme.len >= 0;
-  }
+  bool has_scheme() const { return parsed_.scheme.is_valid(); }
   std::string scheme() const {
     return ComponentString(parsed_.scheme);
   }
@@ -295,9 +301,7 @@
     return ComponentStringPiece(parsed_.scheme);
   }
 
-  bool has_username() const {
-    return parsed_.username.len >= 0;
-  }
+  bool has_username() const { return parsed_.username.is_valid(); }
   std::string username() const {
     return ComponentString(parsed_.username);
   }
@@ -305,9 +309,7 @@
     return ComponentStringPiece(parsed_.username);
   }
 
-  bool has_password() const {
-    return parsed_.password.len >= 0;
-  }
+  bool has_password() const { return parsed_.password.is_valid(); }
   std::string password() const {
     return ComponentString(parsed_.password);
   }
@@ -320,7 +322,7 @@
   // HostNoBrackets() below.
   bool has_host() const {
     // Note that hosts are special, absence of host means length 0.
-    return parsed_.host.len > 0;
+    return parsed_.host.is_nonempty();
   }
   std::string host() const {
     return ComponentString(parsed_.host);
@@ -332,9 +334,7 @@
   // The port if one is explicitly specified. Most callers will want IntPort()
   // or EffectiveIntPort() instead of these. The getters will not include the
   // ':'.
-  bool has_port() const {
-    return parsed_.port.len >= 0;
-  }
+  bool has_port() const { return parsed_.port.is_valid(); }
   std::string port() const {
     return ComponentString(parsed_.port);
   }
@@ -344,9 +344,7 @@
 
   // Including first slash following host, up to the query. The URL
   // "http://www.google.com/" has a path of "/".
-  bool has_path() const {
-    return parsed_.path.len >= 0;
-  }
+  bool has_path() const { return parsed_.path.is_valid(); }
   std::string path() const {
     return ComponentString(parsed_.path);
   }
@@ -355,9 +353,7 @@
   }
 
   // Stuff following '?' up to the ref. The getters will not include the '?'.
-  bool has_query() const {
-    return parsed_.query.len >= 0;
-  }
+  bool has_query() const { return parsed_.query.is_valid(); }
   std::string query() const {
     return ComponentString(parsed_.query);
   }
@@ -367,9 +363,7 @@
 
   // Stuff following '#' to the end of the string. This will be %-escaped UTF-8.
   // The getters will not include the '#'.
-  bool has_ref() const {
-    return parsed_.ref.len >= 0;
-  }
+  bool has_ref() const { return parsed_.ref.is_valid(); }
   std::string ref() const {
     return ComponentString(parsed_.ref);
   }
@@ -470,16 +464,13 @@
 
   // Returns the substring of the input identified by the given component.
   std::string ComponentString(const url::Component& comp) const {
-    if (!comp.is_nonempty())
-      return std::string();
-    return std::string(spec_, static_cast<size_t>(comp.begin),
-                       static_cast<size_t>(comp.len));
+    return std::string(ComponentStringPiece(comp));
   }
   gurl_base::StringPiece ComponentStringPiece(const url::Component& comp) const {
-    if (!comp.is_nonempty())
+    if (comp.is_empty())
       return gurl_base::StringPiece();
-    return gurl_base::StringPiece(&spec_[static_cast<size_t>(comp.begin)],
-                             static_cast<size_t>(comp.len));
+    return gurl_base::StringPiece(spec_).substr(static_cast<size_t>(comp.begin),
+                                           static_cast<size_t>(comp.len));
   }
 
   void ProcessFileSystemURLAfterReplaceComponents();

diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index 16e3a8e..c6be656 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc

@@ -478,6 +478,81 @@
   }
 }
 
+TEST(GURLTest, GetWithoutRef) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+      // Common Standard URLs.
+      {"https://www.google.com/index.html",
+       "https://www.google.com/index.html"},
+      {"https://www.google.com/index.html#maps/",
+       "https://www.google.com/index.html"},
+
+      {"https://foo:bar@www.google.com/maps.htm",
+       "https://foo:bar@www.google.com/maps.htm"},
+      {"https://foo:bar@www.google.com/maps.htm#fragment",
+       "https://foo:bar@www.google.com/maps.htm"},
+
+      {"https://www.google.com/maps/au/index.html?q=maps",
+       "https://www.google.com/maps/au/index.html?q=maps"},
+      {"https://www.google.com/maps/au/index.html?q=maps#fragment/",
+       "https://www.google.com/maps/au/index.html?q=maps"},
+
+      {"http://www.google.com:8000/maps/au/index.html?q=maps",
+       "http://www.google.com:8000/maps/au/index.html?q=maps"},
+      {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
+       "http://www.google.com:8000/maps/au/index.html?q=maps"},
+
+      {"https://www.google.com/maps/au/north/?q=maps",
+       "https://www.google.com/maps/au/north/?q=maps"},
+      {"https://www.google.com/maps/au/north?q=maps#fragment",
+       "https://www.google.com/maps/au/north?q=maps"},
+
+      // Less common standard URLs.
+      {"filesystem:http://www.google.com/temporary/bar.html?baz=22",
+       "filesystem:http://www.google.com/temporary/bar.html?baz=22"},
+      {"file:///temporary/bar.html?baz=22#fragment",
+       "file:///temporary/bar.html?baz=22"},
+
+      {"ftp://foo/test/index.html", "ftp://foo/test/index.html"},
+      {"ftp://foo/test/index.html#fragment", "ftp://foo/test/index.html"},
+
+      {"gopher://foo/test/index.html", "gopher://foo/test/index.html"},
+      {"gopher://foo/test/index.html#fragment", "gopher://foo/test/index.html"},
+
+      {"ws://foo/test/index.html", "ws://foo/test/index.html"},
+      {"ws://foo/test/index.html#fragment", "ws://foo/test/index.html"},
+
+      // Non-standard, hierarchical URLs.
+      {"chrome://foo/bar.html", "chrome://foo/bar.html"},
+      {"chrome://foo/bar.html#fragment", "chrome://foo/bar.html"},
+
+      {"httpa://foo/test/index.html", "httpa://foo/test/index.html"},
+      {"httpa://foo/test/index.html#fragment", "httpa://foo/test/index.html"},
+
+      // Non-standard, non-hierarchical URLs.
+      {"blob:https://foo.bar/test/index.html",
+       "blob:https://foo.bar/test/index.html"},
+      {"blob:https://foo.bar/test/index.html#fragment",
+       "blob:https://foo.bar/test/index.html"},
+
+      {"about:blank", "about:blank"},
+      {"about:blank#ref", "about:blank"},
+
+      {"data:foobar", "data:foobar"},
+      {"scheme:opaque_data", "scheme:opaque_data"},
+      // Invalid URLs.
+      {"foobar", ""},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    GURL without_ref = url.GetWithoutRef();
+    EXPECT_EQ(cases[i].expected, without_ref.spec());
+  }
+}
+
 TEST(GURLTest, Replacements) {
   // The URL canonicalizer replacement test will handle most of these case.
   // The most important thing to do here is to check that the proper

diff --git a/url/origin.h b/url/origin.h
index 2b8caa5..5da5d84 100644
--- a/url/origin.h
+++ b/url/origin.h

@@ -18,7 +18,6 @@
 #include "base/unguessable_token.h"
 #include "build/build_config.h"
 #include "build/buildflag.h"
-#include "ipc/ipc_param_traits.h"
 #include "absl/types/optional.h"
 #include "polyfills/third_party/perfetto/include/perfetto/tracing/traced_value.h"
 #include "url/scheme_host_port.h"
@@ -43,6 +42,11 @@
 class SecurityOriginTest;
 }  // namespace blink
 
+namespace IPC {
+template <class P>
+struct ParamTraits;
+}  // namespace IPC
+
 namespace ipc_fuzzer {
 template <class T>
 struct FuzzTraits;

diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc
index 2500fc6..a7b72a5 100644
--- a/url/third_party/mozilla/url_parse.cc
+++ b/url/third_party/mozilla/url_parse.cc

@@ -57,7 +57,7 @@
 // Returns the offset of the next authority terminator in the input starting
 // from start_offset. If no terminator is found, the return value will be equal
 // to spec_len.
-template<typename CHAR>
+template <typename CHAR>
 int FindNextAuthorityTerminator(const CHAR* spec,
                                 int start_offset,
                                 int spec_len) {
@@ -68,7 +68,7 @@
   return spec_len;  // Not found.
 }
 
-template<typename CHAR>
+template <typename CHAR>
 void ParseUserInfo(const CHAR* spec,
                    const Component& user,
                    Component* username,
@@ -82,8 +82,7 @@
   if (colon_offset < user.len) {
     // Found separator: <username>:<password>
     *username = Component(user.begin, colon_offset);
-    *password = MakeRange(user.begin + colon_offset + 1,
-                          user.begin + user.len);
+    *password = MakeRange(user.begin + colon_offset + 1, user.begin + user.len);
   } else {
     // No separator, treat everything as the username
     *username = user;
@@ -91,7 +90,7 @@
   }
 }
 
-template<typename CHAR>
+template <typename CHAR>
 void ParseServerInfo(const CHAR* spec,
                      const Component& serverinfo,
                      Component* hostname,
@@ -141,7 +140,7 @@
 // parts. The port number will be parsed and the resulting integer will be
 // filled into the given *port variable, or -1 if there is no port number or it
 // is invalid.
-template<typename CHAR>
+template <typename CHAR>
 void DoParseAuthority(const CHAR* spec,
                       const Component& auth,
                       Component* username,
@@ -165,10 +164,10 @@
 
   if (spec[i] == '@') {
     // Found user info: <user-info>@<server-info>
-    ParseUserInfo(spec, Component(auth.begin, i - auth.begin),
-                  username, password);
-    ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),
-                    hostname, port_num);
+    ParseUserInfo(spec, Component(auth.begin, i - auth.begin), username,
+                  password);
+    ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), hostname,
+                    port_num);
   } else {
     // No user info, everything is server info.
     username->reset();
@@ -179,30 +178,47 @@
 
 template <typename CHAR>
 inline void FindQueryAndRefParts(const CHAR* spec,
-                          const Component& path,
-                          int* query_separator,
-                          int* ref_separator) {
-  int path_end = path.begin + path.len;
-  for (int i = path.begin; i < path_end; i++) {
-    switch (spec[i]) {
-      case '?':
-        // Only match the query string if it precedes the reference fragment
-        // and when we haven't found one already.
-        if (*query_separator < 0)
-          *query_separator = i;
-        break;
-      case '#':
-        // Record the first # sign only.
-        if (*ref_separator < 0) {
-          *ref_separator = i;
-          return;
-        }
-        break;
+                                 const Component& path,
+                                 int* query_separator,
+                                 int* ref_separator) {
+  if constexpr (sizeof(*spec) == 1) {
+    // memchr is much faster than any scalar code we can write.
+    const CHAR* ptr = spec + path.begin;
+    const CHAR* first_hash =
+        reinterpret_cast<const CHAR*>(memchr(ptr, '#', path.len));
+    size_t len_before_fragment =
+        first_hash == nullptr ? path.len : first_hash - ptr;
+    const CHAR* first_question =
+        reinterpret_cast<const CHAR*>(memchr(ptr, '?', len_before_fragment));
+    if (first_hash != nullptr) {
+      *ref_separator = first_hash - spec;
+    }
+    if (first_question != nullptr) {
+      *query_separator = first_question - spec;
+    }
+  } else {
+    int path_end = path.begin + path.len;
+    for (int i = path.begin; i < path_end; i++) {
+      switch (spec[i]) {
+        case '?':
+          // Only match the query string if it precedes the reference fragment
+          // and when we haven't found one already.
+          if (*query_separator < 0)
+            *query_separator = i;
+          break;
+        case '#':
+          // Record the first # sign only.
+          if (*ref_separator < 0) {
+            *ref_separator = i;
+            return;
+          }
+          break;
+      }
     }
   }
 }
 
-template<typename CHAR>
+template <typename CHAR>
 void ParsePath(const CHAR* spec,
                const Component& path,
                Component* filepath,
@@ -217,7 +233,7 @@
     ref->reset();
     return;
   }
-  GURL_DCHECK(path.len > 0) << "We should never have 0 length paths";
+  GURL_DCHECK(path.is_nonempty()) << "We should never have 0 length paths";
 
   // Search for first occurrence of either ? or #.
   int query_separator = -1;  // Index of the '?'
@@ -255,10 +271,8 @@
     filepath->reset();
 }
 
-template<typename CHAR>
-bool DoExtractScheme(const CHAR* url,
-                     int url_len,
-                     Component* scheme) {
+template <typename CHAR>
+bool DoExtractScheme(const CHAR* url, int url_len, Component* scheme) {
   // Skip leading whitespace and control characters.
   int begin = 0;
   while (begin < url_len && ShouldTrimFromURL(url[begin]))
@@ -326,7 +340,7 @@
 
 // The main parsing function for standard URLs. Standard URLs have a scheme,
 // host, path, etc.
-template<typename CHAR>
+template <typename CHAR>
 void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   GURL_DCHECK(spec_len >= 0);
 
@@ -347,7 +361,7 @@
   DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
 }
 
-template<typename CHAR>
+template <typename CHAR>
 void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   GURL_DCHECK(spec_len >= 0);
 
@@ -356,9 +370,9 @@
   parsed->password.reset();
   parsed->host.reset();
   parsed->port.reset();
-  parsed->path.reset();   // May use this; reset for convenience.
-  parsed->ref.reset();    // May use this; reset for convenience.
-  parsed->query.reset();  // May use this; reset for convenience.
+  parsed->path.reset();          // May use this; reset for convenience.
+  parsed->ref.reset();           // May use this; reset for convenience.
+  parsed->query.reset();         // May use this; reset for convenience.
   parsed->clear_inner_parsed();  // May use this; reset for convenience.
 
   // Strip leading & trailing spaces and control characters.
@@ -453,8 +467,7 @@
     return;
   }
   int inner_path_end = inner_parsed.path.begin + 1;  // skip the leading slash
-  while (inner_path_end < spec_len &&
-      !IsURLSlash(spec[inner_path_end]))
+  while (inner_path_end < spec_len && !IsURLSlash(spec[inner_path_end]))
     ++inner_path_end;
   parsed->path.begin = inner_path_end;
   int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
@@ -464,8 +477,9 @@
 
 // Initializes a path URL which is merely a scheme followed by a path. Examples
 // include "about:foo" and "javascript:alert('bar');"
-template<typename CHAR>
-void DoParsePathURL(const CHAR* spec, int spec_len,
+template <typename CHAR>
+void DoParsePathURL(const CHAR* spec,
+                    int spec_len,
                     bool trim_path_end,
                     Parsed* parsed) {
   // Get the non-path and non-scheme parts of the URL out of the way, we never
@@ -507,14 +521,11 @@
     return;
   GURL_DCHECK_LT(path_begin, spec_len);
 
-  ParsePath(spec,
-            MakeRange(path_begin, spec_len),
-            &parsed->path,
-            &parsed->query,
-            &parsed->ref);
+  ParsePath(spec, MakeRange(path_begin, spec_len), &parsed->path,
+            &parsed->query, &parsed->ref);
 }
 
-template<typename CHAR>
+template <typename CHAR>
 void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {
   GURL_DCHECK(spec_len >= 0);
 
@@ -580,11 +591,11 @@
 // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
 // we copy the digits to a small stack buffer (since we know the maximum number
 // of digits in a valid port number) that we can NULL terminate.
-template<typename CHAR>
+template <typename CHAR>
 int DoParsePort(const CHAR* spec, const Component& component) {
   // Easy success case when there is no port.
   const int kMaxDigits = 5;
-  if (!component.is_nonempty())
+  if (component.is_empty())
     return PORT_UNSPECIFIED;
 
   // Skip over any leading 0s.
@@ -623,12 +634,12 @@
   return port;
 }
 
-template<typename CHAR>
+template <typename CHAR>
 void DoExtractFileName(const CHAR* spec,
                        const Component& path,
                        Component* file_name) {
   // Handle empty paths: they have no file names.
-  if (!path.is_nonempty()) {
+  if (path.is_empty()) {
     file_name->reset();
     return;
   }
@@ -652,7 +663,7 @@
   return;
 }
 
-template<typename CHAR>
+template <typename CHAR>
 bool DoExtractQueryKeyValue(const CHAR* spec,
                             Component* query,
                             Component* key,

diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h
index 2246d53..d44e20a 100644
--- a/url/third_party/mozilla/url_parse.h
+++ b/url/third_party/mozilla/url_parse.h

@@ -24,17 +24,14 @@
     return begin + len;
   }
 
-  // Returns true if this component is valid, meaning the length is given. Even
-  // valid components may be empty to record the fact that they exist.
-  bool is_valid() const {
-    return (len != -1);
-  }
+  // Returns true if this component is valid, meaning the length is given.
+  // Valid components may be empty to record the fact that they exist.
+  bool is_valid() const { return len >= 0; }
 
-  // Returns true if the given component is specified on false, the component
-  // is either empty or invalid.
-  bool is_nonempty() const {
-    return (len > 0);
-  }
+  // Determine if the component is empty or not. Empty means the length is
+  // zero or the component is invalid.
+  bool is_empty() const { return len <= 0; }
+  bool is_nonempty() const { return len > 0; }
 
   void reset() {
     begin = 0;

diff --git a/url/url_canon.h b/url/url_canon.h
index abeea84..1eed379 100644
--- a/url/url_canon.h
+++ b/url/url_canon.h

@@ -26,7 +26,7 @@
 // resize function that is called when the existing buffer is not big enough.
 // The derived class is then in charge of setting up our buffer which we will
 // manage.
-template<typename T>
+template <typename T>
 class CanonOutputT {
  public:
   CanonOutputT() = default;
@@ -60,12 +60,8 @@
   // Called by the user of this class to get the output. The output will NOT
   // be NULL-terminated. Call length() to get the
   // length.
-  const T* data() const {
-    return buffer_;
-  }
-  T* data() {
-    return buffer_;
-  }
+  const T* data() const { return buffer_; }
+  T* data() { return buffer_; }
 
   // Shortens the URL to the new length. Used for "backing up" when processing
   // relative paths. This can also be used if an external function writes a lot
@@ -102,8 +98,7 @@
       if (!Grow(str_len - (buffer_len_ - cur_len_)))
         return;
     }
-    for (size_t i = 0; i < str_len; i++)
-      buffer_[cur_len_ + i] = str[i];
+    memcpy(buffer_ + cur_len_, str, str_len * sizeof(T));
     cur_len_ += str_len;
   }
 
@@ -140,7 +135,7 @@
 // Simple implementation of the CanonOutput using new[]. This class
 // also supports a static buffer so if it is allocated on the stack, most
 // URLs can be canonicalized with no heap allocations.
-template<typename T, int fixed_capacity = 1024>
+template <typename T, int fixed_capacity = 1024>
 class RawCanonOutputT : public CanonOutputT<T> {
  public:
   RawCanonOutputT() : CanonOutputT<T>() {
@@ -178,7 +173,7 @@
 typedef CanonOutputT<char> CanonOutput;
 typedef CanonOutputT<char16_t> CanonOutputW;
 
-template<int fixed_capacity>
+template <int fixed_capacity>
 class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
 template <int fixed_capacity>
 class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
@@ -357,16 +352,16 @@
 
   // This field summarizes how the input was classified by the canonicalizer.
   enum Family {
-    NEUTRAL,   // - Doesn't resemble an IP address. As far as the IP
-               //   canonicalizer is concerned, it should be treated as a
-               //   hostname.
-    BROKEN,    // - Almost an IP, but was not canonicalized. This could be an
-               //   IPv4 address where truncation occurred, or something
-               //   containing the special characters :[] which did not parse
-               //   as an IPv6 address. Never attempt to connect to this
-               //   address, because it might actually succeed!
-    IPV4,      // - Successfully canonicalized as an IPv4 address.
-    IPV6,      // - Successfully canonicalized as an IPv6 address.
+    NEUTRAL,  // - Doesn't resemble an IP address. As far as the IP
+              //   canonicalizer is concerned, it should be treated as a
+              //   hostname.
+    BROKEN,   // - Almost an IP, but was not canonicalized. This could be an
+              //   IPv4 address where truncation occurred, or something
+              //   containing the special characters :[] which did not parse
+              //   as an IPv6 address. Never attempt to connect to this
+              //   address, because it might actually succeed!
+    IPV4,     // - Successfully canonicalized as an IPv4 address.
+    IPV6,     // - Successfully canonicalized as an IPv6 address.
   };
   Family family;
 
@@ -392,7 +387,6 @@
   }
 };
 
-
 // Host.
 //
 // The 8-bit version requires UTF-8 encoding. Use this version when you only
@@ -709,7 +703,7 @@
 // This structures does not own any data. It is the caller's responsibility to
 // ensure that the data the pointers point to stays in scope and is not
 // modified.
-template<typename CHAR>
+template <typename CHAR>
 struct URLComponentSource {
   // Constructor normally used by callers wishing to replace components. This
   // will make them all NULL, which is no replacement. The caller would then
@@ -734,8 +728,7 @@
         port(default_value),
         path(default_value),
         query(default_value),
-        ref(default_value) {
-  }
+        ref(default_value) {}
 
   const CHAR* scheme;
   const CHAR* username;
@@ -757,11 +750,10 @@
 // IN SCOPE BY THE CALLER for as long as this object exists!
 //
 // Prefer the 8-bit replacement version if possible since it is more efficient.
-template<typename CHAR>
+template <typename CHAR>
 class Replacements {
  public:
-  Replacements() {
-  }
+  Replacements() {}
 
   // Scheme
   void SetScheme(const CHAR* s, const Component& comp) {

diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index e54b843..cfe3fe8 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc

@@ -31,12 +31,22 @@
   // Fast verification that there's nothing that needs removal. This is the 99%
   // case, so we want it to be fast and don't care about impacting the speed
   // when we do find whitespace.
-  int found_whitespace = false;
-  for (int i = 0; i < input_len; i++) {
-    if (!IsRemovableURLWhitespace(input[i]))
-      continue;
-    found_whitespace = true;
-    break;
+  bool found_whitespace = false;
+  if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) {
+    // For large strings, memchr is much faster than any scalar code we can
+    // write, even if we need to run it three times. (If this turns out to still
+    // be a bottleneck, we could write our own vector code, but given that
+    // memchr is so fast, it's unlikely to be relevant.)
+    found_whitespace = memchr(input, '\n', input_len) != nullptr ||
+                       memchr(input, '\r', input_len) != nullptr ||
+                       memchr(input, '\t', input_len) != nullptr;
+  } else {
+    for (int i = 0; i < input_len; i++) {
+      if (!IsRemovableURLWhitespace(input[i]))
+        continue;
+      found_whitespace = true;
+      break;
+    }
   }
 
   if (!found_whitespace) {
@@ -72,6 +82,7 @@
 // Contains the canonical version of each possible input letter in the scheme
 // (basically, lower-cased). The corresponding entry will be 0 if the letter
 // is not allowed in a scheme.
+// clang-format off
 const char kSchemeCanonical[0x80] = {
 // 00-1f: all are invalid
      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
@@ -88,6 +99,7 @@
      0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
 //   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
     'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',  0 ,  0 ,  0 ,  0 ,  0 };
+// clang-format on
 
 // This could be a table lookup as well by setting the high bit for each
 // valid character, but it's only called once per URL, and it makes the lookup
@@ -96,12 +108,12 @@
   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
 }
 
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoScheme(const CHAR* spec,
               const Component& scheme,
               CanonOutput* output,
               Component* out_scheme) {
-  if (!scheme.is_nonempty()) {
+  if (scheme.is_empty()) {
     // Scheme is unspecified or empty, convert to empty by appending a colon.
     *out_scheme = Component(output->length(), 0);
     output->push_back(':');
@@ -161,7 +173,7 @@
 // *_spec strings. Typically, these specs will be the same (we're
 // canonicalizing a single source string), but may be different when
 // replacing components.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoUserInfo(const CHAR* username_spec,
                 const Component& username,
                 const CHAR* password_spec,
@@ -169,7 +181,7 @@
                 CanonOutput* output,
                 Component* out_username,
                 Component* out_password) {
-  if (username.len <= 0 && password.len <= 0) {
+  if (username.is_empty() && password.is_empty()) {
     // Common case: no user info. We strip empty username/passwords.
     *out_username = Component();
     *out_password = Component();
@@ -178,7 +190,7 @@
 
   // Write the username.
   out_username->begin = output->length();
-  if (username.len > 0) {
+  if (username.is_nonempty()) {
     // This will escape characters not valid for the username.
     AppendStringOfType(&username_spec[username.begin],
                        static_cast<size_t>(username.len), CHAR_USERINFO,
@@ -188,7 +200,7 @@
 
   // When there is a password, we need the separator. Note that we strip
   // empty but specified passwords.
-  if (password.len > 0) {
+  if (password.is_nonempty()) {
     output->push_back(':');
     out_password->begin = output->length();
     AppendStringOfType(&password_spec[password.begin],
@@ -209,7 +221,7 @@
 }
 
 // This function will prepend the colon if there will be a port.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 bool DoPort(const CHAR* spec,
             const Component& port,
             int default_port_for_scheme,
@@ -284,7 +296,7 @@
 };
 // clang-format on
 
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
 void DoCanonicalizeRef(const CHAR* spec,
                        const Component& ref,
                        CanonOutput* output,
@@ -364,9 +376,9 @@
                           CanonOutput* output,
                           Component* out_username,
                           Component* out_password) {
-  return DoUserInfo<char, unsigned char>(
-      username_source, username, password_source, password,
-      output, out_username, out_password);
+  return DoUserInfo<char, unsigned char>(username_source, username,
+                                         password_source, password, output,
+                                         out_username, out_password);
 }
 
 bool CanonicalizeUserInfo(const char16_t* username_source,
@@ -386,8 +398,7 @@
                       int default_port_for_scheme,
                       CanonOutput* output,
                       Component* out_port) {
-  return DoPort<char, unsigned char>(spec, port,
-                                     default_port_for_scheme,
+  return DoPort<char, unsigned char>(spec, port, default_port_for_scheme,
                                      output, out_port);
 }
 

diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index d29f7ab..eacc69f 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc

@@ -357,7 +357,7 @@
             const Component& host,
             CanonOutput* output,
             CanonHostInfo* host_info) {
-  if (!host.is_nonempty()) {
+  if (host.is_empty()) {
     // Empty hosts don't need anything.
     host_info->family = CanonHostInfo::NEUTRAL;
     host_info->out_host = Component();

diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index eb24cee..393fc4b 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc

@@ -7,10 +7,16 @@
 #include <errno.h>
 #include <stddef.h>
 #include <stdlib.h>
+#ifdef __SSE2__
+#include <immintrin.h>
+#elif defined(__aarch64__)
+#include <arm_neon.h>
+#endif
 
 #include <cstdio>
 #include <string>
 
+#include "base/bits.h"
 #include "base/numerics/safe_conversions.h"
 #include "base/strings/utf_string_conversion_utils.h"
 
@@ -18,12 +24,62 @@
 
 namespace {
 
+// Find the initial segment of the given string that consists solely
+// of characters valid for CHAR_QUERY. (We can have false negatives in
+// one specific case, namely the exclamation mark 0x21, but false negatives
+// are fine, and it's not worth adding a separate test for.) This is
+// a fast path to speed up checking of very long query strings that are
+// already valid, which happen on some web pages.
+//
+// This has some startup cost to load the constants and such, so it's
+// usually not worth it for short strings.
+size_t FindInitialQuerySafeString(const char* source, size_t length) {
+#if defined(__SSE2__) || defined(__aarch64__)
+  constexpr size_t kChunkSize = 16;
+  size_t i;
+  for (i = 0; i < gurl_base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
+    char b __attribute__((vector_size(16)));
+    memcpy(&b, source + i, sizeof(b));
+
+    // Compare each element with the ranges for CHAR_QUERY
+    // (see kSharedCharTypeTable), vectorized so that it creates
+    // a mask of which elements match. For completeness, we could
+    // have had (...) | b == 0x21 here, but exclamation marks are
+    // rare and the extra test costs us some time.
+    auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
+
+#ifdef __SSE2__
+    if (_mm_movemask_epi8(mask) != 0xffff) {
+      return i;
+    }
+#else
+    if (vminvq_u8(mask) == 0) {
+      return i;
+    }
+#endif
+  }
+  return i;
+#else
+  // Need SIMD support (with fast reductions) for this to be efficient.
+  return 0;
+#endif
+}
+
 template <typename CHAR, typename UCHAR>
 void DoAppendStringOfType(const CHAR* source,
                           size_t length,
                           SharedCharTypes type,
                           CanonOutput* output) {
-  for (size_t i = 0; i < length; i++) {
+  size_t i = 0;
+  // We only instantiate this for char, to avoid a Clang crash
+  // (and because Append() does not support converting).
+  if constexpr (sizeof(CHAR) == 1) {
+    if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
+      i = FindInitialQuerySafeString(source, length);
+      output->Append(source, i);
+    }
+  }
+  for (; i < length; i++) {
     if (static_cast<UCHAR>(source[i]) >= 0x80) {
       // ReadChar will fill the code point with kUnicodeReplacementCharacter
       // when the input is invalid, which is what we want.
@@ -113,6 +169,7 @@
 }  // namespace
 
 // See the header file for this array's declaration.
+// clang-format off
 const unsigned char kSharedCharTypeTable[0x100] = {
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
@@ -221,6 +278,7 @@
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
 };
+// clang-format on
 
 const char kHexCharLookup[0x10] = {
     '0', '1', '2', '3', '4', '5', '6', '7',
@@ -324,27 +382,27 @@
   const URLComponentSource<char>& repl_source = repl.sources();
   const Parsed& repl_parsed = repl.components();
 
-  DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
-                      &source->scheme, &parsed->scheme);
+  DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
+                      &parsed->scheme);
   DoOverrideComponent(repl_source.username, repl_parsed.username,
                       &source->username, &parsed->username);
   DoOverrideComponent(repl_source.password, repl_parsed.password,
                       &source->password, &parsed->password);
 
   // Our host should be empty if not present, so override the default setup.
-  DoOverrideComponent(repl_source.host, repl_parsed.host,
-                      &source->host, &parsed->host);
+  DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
+                      &parsed->host);
   if (parsed->host.len == -1)
     parsed->host.len = 0;
 
-  DoOverrideComponent(repl_source.port, repl_parsed.port,
-                      &source->port, &parsed->port);
-  DoOverrideComponent(repl_source.path, repl_parsed.path,
-                      &source->path, &parsed->path);
-  DoOverrideComponent(repl_source.query, repl_parsed.query,
-                      &source->query, &parsed->query);
-  DoOverrideComponent(repl_source.ref, repl_parsed.ref,
-                      &source->ref, &parsed->ref);
+  DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
+                      &parsed->port);
+  DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
+                      &parsed->path);
+  DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
+                      &parsed->query);
+  DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
+                      &parsed->ref);
 }
 
 bool SetupUTF16OverrideComponents(const char* base,
@@ -359,41 +417,43 @@
   const Parsed& repl_parsed = repl.components();
 
   success &= PrepareUTF16OverrideComponent(
-      repl_source.scheme, repl_parsed.scheme,
-      utf8_buffer, &parsed->scheme);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.username, repl_parsed.username,
-      utf8_buffer, &parsed->username);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.password, repl_parsed.password,
-      utf8_buffer, &parsed->password);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.host, repl_parsed.host,
-      utf8_buffer, &parsed->host);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.port, repl_parsed.port,
-      utf8_buffer, &parsed->port);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.path, repl_parsed.path,
-      utf8_buffer, &parsed->path);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.query, repl_parsed.query,
-      utf8_buffer, &parsed->query);
-  success &= PrepareUTF16OverrideComponent(
-      repl_source.ref, repl_parsed.ref,
-      utf8_buffer, &parsed->ref);
+      repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
+  success &=
+      PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
+                                    utf8_buffer, &parsed->username);
+  success &=
+      PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
+                                    utf8_buffer, &parsed->password);
+  success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
+                                           utf8_buffer, &parsed->host);
+  success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
+                                           utf8_buffer, &parsed->port);
+  success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
+                                           utf8_buffer, &parsed->path);
+  success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
+                                           utf8_buffer, &parsed->query);
+  success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
+                                           utf8_buffer, &parsed->ref);
 
   // PrepareUTF16OverrideComponent will not have set the data pointer since the
   // buffer could be resized, invalidating the pointers. We set the data
   // pointers for affected components now that the buffer is finalized.
-  if (repl_source.scheme)   source->scheme = utf8_buffer->data();
-  if (repl_source.username) source->username = utf8_buffer->data();
-  if (repl_source.password) source->password = utf8_buffer->data();
-  if (repl_source.host)     source->host = utf8_buffer->data();
-  if (repl_source.port)     source->port = utf8_buffer->data();
-  if (repl_source.path)     source->path = utf8_buffer->data();
-  if (repl_source.query)    source->query = utf8_buffer->data();
-  if (repl_source.ref)      source->ref = utf8_buffer->data();
+  if (repl_source.scheme)
+    source->scheme = utf8_buffer->data();
+  if (repl_source.username)
+    source->username = utf8_buffer->data();
+  if (repl_source.password)
+    source->password = utf8_buffer->data();
+  if (repl_source.host)
+    source->host = utf8_buffer->data();
+  if (repl_source.port)
+    source->port = utf8_buffer->data();
+  if (repl_source.path)
+    source->path = utf8_buffer->data();
+  if (repl_source.query)
+    source->query = utf8_buffer->data();
+  if (repl_source.ref)
+    source->ref = utf8_buffer->data();
 
   return success;
 }

diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 58ae144..b9ac5bf 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h

@@ -132,9 +132,8 @@
 // does no checking that thee character requires escaping.
 // Escaping makes sense only 8 bit chars, so code works in all cases of
 // input parameters (8/16bit).
-template<typename UINCHAR, typename OUTCHAR>
-inline void AppendEscapedChar(UINCHAR ch,
-                              CanonOutputT<OUTCHAR>* output) {
+template <typename UINCHAR, typename OUTCHAR>
+inline void AppendEscapedChar(UINCHAR ch, CanonOutputT<OUTCHAR>* output) {
   output->push_back('%');
   output->push_back(static_cast<OUTCHAR>(kHexCharLookup[(ch >> 4) & 0xf]));
   output->push_back(static_cast<OUTCHAR>(kHexCharLookup[ch & 0xf]));
@@ -173,22 +172,17 @@
     Appender(static_cast<unsigned char>(char_value), output);
   } else if (char_value <= 0x7ff) {
     // 110xxxxx 10xxxxxx
-    Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)),
-             output);
-    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
-             output);
+    Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)), output);
+    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
   } else if (char_value <= 0xffff) {
     // 1110xxxx 10xxxxxx 10xxxxxx
-    Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)),
-             output);
+    Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)), output);
     Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
              output);
-    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
-             output);
+    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
   } else {
     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-    Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
-             output);
+    Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)), output);
     Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
              output);
     Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
@@ -316,8 +310,8 @@
                           size_t* begin,
                           size_t end,
                           unsigned char* unescaped_value) {
-  if (*begin + 3 > end ||
-      !Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
+  if (*begin + 3 > end || !Is8BitChar(spec[*begin + 1]) ||
+      !Is8BitChar(spec[*begin + 2])) {
     // Invalid escape sequence because there's not enough room, or the
     // digits are not ASCII.
     return false;
@@ -446,7 +440,7 @@
 int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix);
 
 // Secure template overloads for these functions
-template<size_t N>
+template <size_t N>
 inline int _itoa_s(int value, char (&buffer)[N], int radix) {
   return _itoa_s(value, buffer, N, radix);
 }
@@ -458,12 +452,20 @@
 
 // _strtoui64 and strtoull behave the same
 inline unsigned long long _strtoui64(const char* nptr,
-                                     char** endptr, int base) {
+                                     char** endptr,
+                                     int base) {
   return strtoull(nptr, endptr, base);
 }
 
 #endif  // WIN32
 
+// The threshold we set to consider SIMD processing, in bytes; there is
+// no deep theory here, it's just set empirically to a value that seems
+// to be good. (We don't really know why there's a slowdown for zero;
+// but a guess would be that there's no need in going into a complex loop
+// with a lot of setup for a five-byte string.)
+static constexpr int kMinimumLengthForSIMD = 50;
+
 }  // namespace url
 
 #endif  // URL_URL_CANON_INTERNAL_H_

diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc
index fde31f1..ec8617e 100644
--- a/url/url_canon_ip.cc
+++ b/url/url_canon_ip.cc

@@ -44,7 +44,7 @@
                                             const Component& component,
                                             uint32_t* number) {
   // Empty components are considered non-numeric.
-  if (!component.is_nonempty())
+  if (component.is_empty())
     return CanonHostInfo::NEUTRAL;
 
   // Figure out the base
@@ -133,7 +133,7 @@
     --host.len;
 
   // Do nothing if empty.
-  if (!host.is_nonempty())
+  if (host.is_empty())
     return CanonHostInfo::NEUTRAL;
 
   // Read component values.  The first `existing_components` of them are
@@ -302,7 +302,7 @@
   // Zero-out the info.
   parsed->reset();
 
-  if (!host.is_nonempty())
+  if (host.is_empty())
     return false;
 
   // The index for start and end of address range (no brackets).
@@ -447,7 +447,7 @@
                            unsigned char address[16]) {
   // Make sure the component is bounded by '[' and ']'.
   int end = host.end();
-  if (!host.is_nonempty() || spec[host.begin] != '[' || spec[end - 1] != ']')
+  if (host.is_empty() || spec[host.begin] != '[' || spec[end - 1] != ']')
     return false;
 
   // Exclude the square brackets.

diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 3480517..9a03fb4 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc

@@ -255,7 +255,7 @@
                            const Component& path,
                            size_t path_begin_in_output,
                            CanonOutput* output) {
-  if (!path.is_nonempty())
+  if (path.is_empty())
     return true;
 
   size_t end = static_cast<size_t>(path.end());
@@ -407,7 +407,7 @@
             Component* out_path) {
   bool success = true;
   out_path->begin = output->length();
-  if (path.len > 0) {
+  if (path.is_nonempty()) {
     // Write out an initial slash if the input has none. If we just parse a URL
     // and then canonicalize it, it will of course have a slash already. This
     // check is for the replacement and relative URL resolving cases of file

diff --git a/url/url_canon_query.cc b/url/url_canon_query.cc
index d326ce8..b48800c 100644
--- a/url/url_canon_query.cc
+++ b/url/url_canon_query.cc

@@ -106,7 +106,7 @@
                          CharsetConverter* converter,
                          CanonOutput* output,
                          Component* out_query) {
-  if (query.len < 0) {
+  if (!query.is_valid()) {
     *out_query = Component();
     return;
   }

diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc
index 80588fe..67780b1 100644
--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc

@@ -239,7 +239,7 @@
                       const Component& source_component,
                       CanonOutput* output,
                       Component* output_component) {
-  if (source_component.len < 0) {
+  if (!source_component.is_valid()) {
     // This component is not present.
     *output_component = Component();
     return;
@@ -323,7 +323,7 @@
                               std::max({path.end(), query.end(), ref.end()}));
   output->Append(base_url, base_parsed.path.begin);
 
-  if (path.len > 0) {
+  if (path.is_nonempty()) {
     // The path is replaced or modified.
     int true_path_begin = output->length();
 
@@ -492,7 +492,7 @@
   // paths (even the default path of "/" is OK).
   //
   // We allow hosts with no length so we can handle file URLs, for example.
-  if (base_parsed.path.len <= 0) {
+  if (base_parsed.path.is_empty()) {
     // On error, return the input (resolving a relative URL on a non-relative
     // base = the base).
     int base_len = base_parsed.Length();
@@ -501,7 +501,7 @@
     return false;
   }
 
-  if (relative_component.len <= 0) {
+  if (relative_component.is_empty()) {
     // Empty relative URL, leave unchanged, only removing the ref component.
     int base_len = base_parsed.Length();
     base_len -= base_parsed.ref.len + 1;

diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc
index da18d42..8096b56 100644
--- a/url/url_canon_stdurl.cc
+++ b/url/url_canon_stdurl.cc

@@ -58,7 +58,7 @@
                                 output, &new_parsed->host);
 
     // Host must not be empty for standard URLs.
-    if (!parsed.host.is_nonempty())
+    if (parsed.host.is_empty())
       success = false;
 
     // Port: the port canonicalizer will handle the colon.

diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 62a5c36..8890639 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc

@@ -10,10 +10,12 @@
 #include "base/strings/string_piece.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/test/gtest_util.h"
+#include "base/test/scoped_feature_list.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon_internal.h"
 #include "url/url_canon_stdstring.h"
+#include "url/url_features.h"
 #include "url/url_test_utils.h"
 
 namespace url {
@@ -285,38 +287,78 @@
   EXPECT_EQ(0, out_comp.len);
 }
 
-TEST(URLCanonTest, Host) {
+// IDNA mode to use in CanonHost tests.
+enum class IDNAMode { kTransitional, kNonTransitional };
+
+class URLCanonHostTest : public ::testing::Test,
+                         public ::testing::WithParamInterface<IDNAMode> {
+ public:
+  URLCanonHostTest() {
+    if (GetParam() == IDNAMode::kNonTransitional) {
+      scoped_feature_list_.InitAndEnableFeature(kUseIDNA2008NonTransitional);
+    } else {
+      scoped_feature_list_.InitAndDisableFeature(kUseIDNA2008NonTransitional);
+    }
+  }
+
+ private:
+  gurl_base::test::ScopedFeatureList scoped_feature_list_;
+};
+
+INSTANTIATE_TEST_SUITE_P(All,
+                         URLCanonHostTest,
+                         ::testing::Values(IDNAMode::kTransitional,
+                                           IDNAMode::kNonTransitional));
+
+TEST_P(URLCanonHostTest, Host) {
+  bool use_idna_non_transitional = IsUsingIDNA2008NonTransitional();
+
   IPAddressCase host_cases[] = {
-       // Basic canonicalization, uppercase should be converted to lowercase.
-    {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+      // Basic canonicalization, uppercase should be converted to lowercase.
+      {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Spaces and some other characters should be escaped.
-    {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
+      {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com",
+       Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
       // Exciting different types of spaces!
-    {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
+      {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Other types of space (no-break, zero-width, zero-width-no-break) are
       // name-prepped away to nothing.
-    {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+      {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Ideographic full stop (full-width period for Chinese, etc.) should be
       // treated as a dot.
-    {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+      {NULL,
+       L"www.foo\x3002"
+       L"bar.com",
+       "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
       // Invalid unicode characters should fail...
       // ...In wide input, ICU will barf and we'll end up with the input as
       //    escaped UTF-8 (the invalid character should be replaced with the
       //    replacement character).
-    {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com",
+       Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
       // ...This is the same as previous but with with escaped.
-    {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
-      // Test name prepping, fullwidth input should be converted to ASCII and NOT
+      {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com",
+       Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      // Test name prepping, fullwidth input should be converted to ASCII and
+      // NOT
       // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
-    {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com",
+       Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
       // Test that fullwidth escaped values are properly name-prepped,
       // then converted or rejected.
       // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
-    {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
-    {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
       // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
-    {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
-    {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com",
+       "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com",
+       "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
       // ICU will convert weird percents into ASCII percents, but not unescape
       // further. A weird percent is U+FE6A (EF B9 AA in UTF-8) which is a
       // "small percent". At this point we should be within our rights to mark
@@ -324,12 +366,30 @@
       // happens to allow ASCII characters (%41 = "A" -> 'a') to be unescaped
       // and kept as valid, so we validate that behavior here, but this level
       // of fixing the input shouldn't be seen as required. "%81" is invalid.
-    {"\xef\xb9\xaa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
-    {"%ef%b9%aa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
-    {"\xef\xb9\xaa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
-    {"%ef%b9%aa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      {"\xef\xb9\xaa"
+       "41.com",
+       L"\xfe6a"
+       L"41.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"%ef%b9%aa"
+       "41.com",
+       L"\xfe6a"
+       L"41.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xef\xb9\xaa"
+       "81.com",
+       L"\xfe6a"
+       L"81.com",
+       "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      {"%ef%b9%aa"
+       "81.com",
+       L"\xfe6a"
+       L"81.com",
+       "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
       // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
-    {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+       L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // See http://unicode.org/cldr/utility/idna.jsp for other
       // examples/experiments and http://goo.gl/7yG11o
       // for the full list of characters handled differently by
@@ -337,169 +397,206 @@
 
       // 4 Deviation characters are mapped/ignored in UTS 46 transitional
       // mechansm. UTS 46, table 4 row (g).
-      // Sharp-s is mapped to 'ss' in UTS 46 and IDNA 2003.
-      // Otherwise, it'd be "xn--fuball-cta.de".
-    {"fu\xc3\x9f" "ball.de", L"fu\x00df" L"ball.de", "fussball.de",
-      Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
-      // Final-sigma (U+03C3) is mapped to regular sigma (U+03C2).
-      // Otherwise, it'd be "xn--wxaijb9b".
-    {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2",
-      "xn--wxaikc6b", Component(0, 12),
-      CanonHostInfo::NEUTRAL, -1, ""},
+      // Sharp-s is mapped to 'ss' in IDNA 2003, not in IDNA 2008 or UTF 46
+      // after transitional period.
+      // Previously, it'd be "fussball.de".
+      {"fu\xc3\x9f"
+       "ball.de",
+       L"fu\x00df"
+       L"ball.de",
+       use_idna_non_transitional ? "xn--fuball-cta.de" : "fussball.de",
+       use_idna_non_transitional ? Component(0, 17) : Component(0, 11),
+       CanonHostInfo::NEUTRAL, -1, ""},
+
+      // Final-sigma (U+03C3) was mapped to regular sigma (U+03C2).
+      // Previously, it'd be "xn--wxaikc9b".
+      {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2",
+       use_idna_non_transitional ? "xn--wxaijb9b" : "xn--wxaikc6b",
+       Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+
       // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional
-      // handling as well as in IDNA 2003.
-    {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc",
-      Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""},
-      // ZWJ between Devanagari characters is still mapped away in UTS 46
-      // transitional handling. IDNA 2008 would give xn--11bo0mv54g.
-    {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
-     L"\x915\x94d\x200d\x91c", "xn--11bo0m",
-     Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+      // handling as well as in IDNA 2003, but not thereafter.
+      {"a\xe2\x80\x8c"
+       "b\xe2\x80\x8d"
+       "c",
+       L"a\x200c"
+       L"b\x200d"
+       L"c",
+       use_idna_non_transitional ? "xn--abc-9m0ag" : "abc",
+       use_idna_non_transitional ? Component(0, 13) : Component(0, 3),
+       CanonHostInfo::NEUTRAL, -1, ""},
+
+      // ZWJ between Devanagari characters was still mapped away in UTS 46
+      // transitional handling. IDNA 2008 gives xn--11bo0mv54g.
+      // Previously "xn--11bo0m".
+      {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
+       L"\x915\x94d\x200d\x91c",
+       use_idna_non_transitional ? "xn--11bo0mv54g" : "xn--11bo0m",
+       use_idna_non_transitional ? Component(0, 14) : Component(0, 10),
+       CanonHostInfo::NEUTRAL, -1, ""},
+
       // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
       // However, we do allow this at the moment because we don't use
       // STD3 rules and canonicalize full-width ASCII to ASCII.
-    {"wow\xef\xbc\x81", L"wow\xff01", "wow%21",
-      Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+      {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", Component(0, 6),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
       // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
-    {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo",
-      Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
+      {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", Component(0, 11),
+       CanonHostInfo::BROKEN, -1, ""},
       // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d)
       // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
-    {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
-      "%F0%AF%A1%A8%E5%A7%BB.cn",
-      Component(0, 24), CanonHostInfo::BROKEN, -1, ""},
+      {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
+       "%F0%AF%A1%A8%E5%A7%BB.cn", Component(0, 24), CanonHostInfo::BROKEN, -1,
+       ""},
       // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e)
-    {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya",
-      Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // An already-IDNA host is not modified.
-    {"xn--mnchen-3ya", L"xn--mnchen-3ya", "xn--mnchen-3ya",
-      Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      {"xn--mnchen-3ya", L"xn--mnchen-3ya", "xn--mnchen-3ya", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Symbol/punctuations are allowed in IDNA 2003/UTS46.
       // Not allowed in IDNA 2008. UTS 46 table 4 row (f).
-    {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us",
-      Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us", Component(0, 13),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // U+11013 is new in Unicode 6.0 and is allowed. UTS 46 table 4, row (h)
       // We used to allow it because we passed through unassigned code points.
-    {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com",
-      Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com",
+       Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
       // U+0602 is disallowed in UTS46/IDNA 2008. UTS 46 table 4, row(i)
       // Used to be allowed in INDA 2003.
-    {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg",
-      Component(0, 9), CanonHostInfo::BROKEN, -1, ""},
+      {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg", Component(0, 9),
+       CanonHostInfo::BROKEN, -1, ""},
       // U+20B7 is new in Unicode 5.2 (not a part of IDNA 2003 based
       // on Unicode 3.2). We did allow it in the past because we let unassigned
       // code point pass. We continue to allow it even though it's a
       // "punctuation and symbol" blocked in IDNA 2008.
       // UTS 46 table 4, row (j)
-    {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com",
-      Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com", Component(0, 11),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Maps uppercase letters to lower case letters.
       // In IDNA 2003, it's allowed without case-folding
       // ( xn--bc-7cb.com ) because it's not defined in Unicode 3.2
       // (added in Unicode 4.1). UTS 46 table 4 row (k)
-    {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com",
-      Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+      {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com", Component(0, 15),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Maps U+FF43 (Full Width Small Letter C) to 'c'.
-    {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz",
-      Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
+      {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz", Component(0, 7),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Maps U+1D68C (Math Monospace Small C) to 'c'.
       // U+1D68C = \xD835\xDE8C in UTF-16
-    {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz",
-      Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
+      {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz",
+       Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
       // BiDi check test
       // "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM.
       // Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008.
-    {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8",
-     L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw",
-     Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8",
+       L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw", Component(0, 13),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Disallowed in both IDNA 2003 and 2008 with BiDi check.
       // Labels starting with a RTL character cannot end with a LTR character.
-    {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz",
-     "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21),
-     CanonHostInfo::BROKEN, -1, ""},
+      {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz",
+       "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21), CanonHostInfo::BROKEN, -1,
+       ""},
       // Labels starting with a RTL character can end with BC=EN (European
       // number). Disallowed in IDNA 2003 but now allowed.
-    {"\xd8\xac\xd8\xa7\xd8\xb1" "2", L"\x62c\x627\x631" L"2",
-     "xn--2-ymcov", Component(0, 11),
-     CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xd8\xac\xd8\xa7\xd8\xb1"
+       "2",
+       L"\x62c\x627\x631"
+       L"2",
+       "xn--2-ymcov", Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
       // Labels starting with a RTL character cannot have "L" characters
       // even if it ends with an BC=EN. Disallowed in both IDNA 2003/2008.
-    {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2",
-     "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21),
-     CanonHostInfo::BROKEN, -1, ""},
+      {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2",
+       "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21), CanonHostInfo::BROKEN, -1,
+       ""},
       // Labels starting with a RTL character can end with BC=AN (Arabic number)
       // Disallowed in IDNA 2003, but now allowed.
-    {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662",
-     "xn--mgbjq0r", Component(0, 11),
-     CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662",
+       "xn--mgbjq0r", Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
       // Labels starting with a RTL character cannot have "L" characters
       // even if it ends with an BC=AN (Arabic number).
       // Disallowed in both IDNA 2003/2008.
-    {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662",
-     "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26),
-     CanonHostInfo::BROKEN, -1, ""},
+      {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662",
+       "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26), CanonHostInfo::BROKEN,
+       -1, ""},
       // Labels starting with a RTL character cannot mix BC=EN and BC=AN
-    {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662",
-     "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27),
-     CanonHostInfo::BROKEN, -1, ""},
+      {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662",
+       "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27), CanonHostInfo::BROKEN,
+       -1, ""},
       // As of Unicode 6.2, U+20CF is not assigned. We do not allow it.
-    {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com",
-      Component(0, 13), CanonHostInfo::BROKEN, -1, ""},
+      {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com", Component(0, 13),
+       CanonHostInfo::BROKEN, -1, ""},
       // U+0080 is not allowed.
-    {"\xc2\x80.com", L"\x80.com", "%C2%80.com",
-      Component(0, 10), CanonHostInfo::BROKEN, -1, ""},
+      {"\xc2\x80.com", L"\x80.com", "%C2%80.com", Component(0, 10),
+       CanonHostInfo::BROKEN, -1, ""},
       // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
       // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
       // UTF-8 (wide case). The output should be equivalent to the true wide
       // character input above).
-    {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd",
-      L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba",
-      Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+      {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd",
+       L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
       // Invalid escaped characters should fail and the percents should be
       // escaped.
-    {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10),
-      CanonHostInfo::BROKEN, -1, ""},
+      {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10),
+       CanonHostInfo::BROKEN, -1, ""},
       // If we get an invalid character that has been escaped.
-    {"%25", L"%25", "%25", Component(0, 3),
-      CanonHostInfo::BROKEN, -1, ""},
-    {"hello%00", L"hello%00", "hello%00", Component(0, 8),
-      CanonHostInfo::BROKEN, -1, ""},
+      {"%25", L"%25", "%25", Component(0, 3), CanonHostInfo::BROKEN, -1, ""},
+      {"hello%00", L"hello%00", "hello%00", Component(0, 8),
+       CanonHostInfo::BROKEN, -1, ""},
       // Escaped numbers should be treated like IP addresses if they are.
-    {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
-      "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
-      "C0A80001"},
-    {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
-      "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
-      "C0A80001"},
+      {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
+       "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+      {"%30%78%63%30%2e%30%32%35%30.01%2e",
+       L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", Component(0, 11),
+       CanonHostInfo::IPV4, 3, "C0A80001"},
       // Invalid escaping should trigger the regular host error handling.
-    {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
+      {"%3g%78%63%30%2e%30%32%35%30%2E.01",
+       L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01",
+       Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
       // Something that isn't exactly an IP should get treated as a host and
       // spaces escaped.
-    {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+      {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello",
+       Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
       // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
       // These are "0Xc0.0250.01" in fullwidth.
-    {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+      {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%"
+       "8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%"
+       "8E\xef\xbc\x90\xef\xbc\x91",
+       L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10"
+       L"\xff11",
+       "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
       // Broken IP addresses get marked as such.
-    {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), CanonHostInfo::BROKEN, -1, ""},
-    {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHostInfo::BROKEN, -1, ""},
+      {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13),
+       CanonHostInfo::BROKEN, -1, ""},
+      {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12),
+       CanonHostInfo::BROKEN, -1, ""},
       // Cyrillic letter followed by '(' should return punycode for '(' escaped
       // before punycode string was created. I.e.
       // if '(' is escaped after punycode is created we would get xn--%28-8tb
       // (incorrect).
-    {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
-      CanonHostInfo::NEUTRAL, -1, ""},
-      // Address with all hexidecimal characters with leading number of 1<<32
+      {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Address with all hexadecimal characters with leading number of 1<<32
       // or greater and should return NEUTRAL rather than BROKEN if not all
       // components are numbers.
-    {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""},
-    {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
-    {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "12345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""},
-    {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
-    {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Component(0, 19), CanonHostInfo::BROKEN, -1, ""},
-    // A label that starts with "xn--" but contains non-ASCII characters should
-    // be an error. Escape the invalid characters.
-    {"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      {"12345678912345.de", L"12345678912345.de", "12345678912345.de",
+       Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""},
+      {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de",
+       Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+      {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de",
+       "12345678912345.12345678912345.de", Component(0, 32),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de",
+       Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
+      {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde",
+       Component(0, 19), CanonHostInfo::BROKEN, -1, ""},
+      // A label that starts with "xn--" but contains non-ASCII characters
+      // should
+      // be an error. Escape the invalid characters.
+      {"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen",
+       Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
   };
 
   // CanonicalizeHost() non-verbose.

diff --git a/url/url_features.cc b/url/url_features.cc
new file mode 100644
index 0000000..149cd4a
--- /dev/null
+++ b/url/url_features.cc

@@ -0,0 +1,16 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_features.h"
+
+namespace url {
+
+BASE_FEATURE(kUseIDNA2008NonTransitional,
+             "UseIDNA2008NonTransitional",
+             gurl_base::FEATURE_ENABLED_BY_DEFAULT);
+
+bool IsUsingIDNA2008NonTransitional() {
+  return gurl_base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
+}
+}  // namespace url

diff --git a/url/url_features.h b/url/url_features.h
new file mode 100644
index 0000000..3fed085
--- /dev/null
+++ b/url/url_features.h

@@ -0,0 +1,19 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_FEATURES_H_
+#define URL_URL_FEATURES_H_
+
+#include "polyfills/base/component_export.h"
+#include "polyfills/base/feature_list.h"
+
+namespace url {
+
+COMPONENT_EXPORT(URL) BASE_DECLARE_FEATURE(kUseIDNA2008NonTransitional);
+
+// Returns true if Chrome is using IDNA 2008 in Non-Transitional mode.
+COMPONENT_EXPORT(URL) bool IsUsingIDNA2008NonTransitional();
+}  // namespace url
+
+#endif  // URL_URL_FEATURES_H_

diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index 356a1cd..4a3a602 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc

@@ -15,21 +15,25 @@
 #include <unicode/utypes.h>
 #include "url/url_canon_icu.h"
 #include "url/url_canon_internal.h"  // for _itoa_s
+#include "url/url_features.h"
 
 namespace url {
 
+namespace {
+
 // Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with
 // uidna_openUTS46().
 //
 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
-// code points allowed) to IDNA 2008 with
-// the backward compatibility in mind. What it does:
+// code points allowed) to IDNA 2008 with the backward compatibility in mind.
+// What it does:
 //
 // 1. Use the up-to-date Unicode data.
 // 2. Define a case folding/mapping with the up-to-date Unicode data as
 //    in IDNA 2003.
-// 3. Use transitional mechanism for 4 deviation characters (sharp-s,
-//    final sigma, ZWJ and ZWNJ) for now.
+// 3. If `use_idna_non_transitional` is true, use non-transitional mechanism for
+//    4 deviation characters (sharp-s, final sigma, ZWJ and ZWNJ) per
+//    url.spec.whatwg.org.
 // 4. Continue to allow symbols and punctuations.
 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
 // 6. Do not apply STD3 rules
@@ -39,25 +43,39 @@
 // http://goo.gl/3XBhqw ).
 // See http://http://unicode.org/reports/tr46/ and references therein
 // for more details.
-UIDNA* GetUIDNA() {
-  static UIDNA* uidna = [] {
-    UErrorCode err = U_ZERO_ERROR;
-    // TODO(jungshik): Change options as different parties (browsers,
-    // registrars, search engines) converge toward a consensus.
-    UIDNA* value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
-    if (U_FAILURE(err)) {
-      GURL_CHECK(false) << "failed to open UTS46 data with error: "
-                   << u_errorName(err)
-                   << ". If you see this error message in a test environment "
-                   << "your test environment likely lacks the required data "
-                   << "tables for libicu. See https://crbug.com/778929.";
-      value = nullptr;
-    }
-    return value;
-  }();
-  return uidna;
+UIDNA* CreateIDNA(bool use_idna_non_transitional) {
+  uint32_t options = UIDNA_CHECK_BIDI;
+  if (use_idna_non_transitional) {
+    // Use non-transitional processing if enabled. See
+    // https://url.spec.whatwg.org/#idna for details.
+    options |=
+        UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE;
+  }
+  UErrorCode err = U_ZERO_ERROR;
+  UIDNA* idna = uidna_openUTS46(options, &err);
+  if (U_FAILURE(err)) {
+    GURL_CHECK(false) << "failed to open UTS46 data with error: " << u_errorName(err)
+                 << ". If you see this error message in a test environment "
+                 << "your test environment likely lacks the required data "
+                 << "tables for libicu. See https://crbug.com/778929.";
+    idna = nullptr;
+  }
+  return idna;
 }
 
+UIDNA* GetUIDNA() {
+  // This logic results in having two UIDNA instances in tests. This is okay.
+  if (IsUsingIDNA2008NonTransitional()) {
+    static UIDNA* uidna = CreateIDNA(/*use_idna_non_transitional=*/true);
+    return uidna;
+  } else {
+    static UIDNA* uidna = CreateIDNA(/*use_idna_non_transitional=*/false);
+    return uidna;
+  }
+}
+
+}  // namespace
+
 // Converts the Unicode input representing a hostname to ASCII using IDN rules.
 // The output must be ASCII, but is represented as wide characters.
 //

diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc
index f67a445..88b6f05 100644
--- a/url/url_parse_unittest.cc
+++ b/url/url_parse_unittest.cc

@@ -89,8 +89,8 @@
 bool ComponentMatches(const char* input,
                       const char* reference,
                       const Component& component) {
-  // If the component is nonexistent (length == -1), it should begin at 0.
-  EXPECT_TRUE(component.len >= 0 || component.len == -1);
+  // Check that the -1 sentinel is the only allowed negative value.
+  EXPECT_TRUE(component.is_valid() || component.len == -1);
 
   // Begin should be valid.
   EXPECT_LE(0, component.begin);
@@ -98,7 +98,7 @@
   // A NULL reference means the component should be nonexistent.
   if (!reference)
     return component.len == -1;
-  if (component.len < 0)
+  if (!component.is_valid())
     return false;  // Reference is not NULL but we don't have anything
 
   if (strlen(reference) != static_cast<size_t>(component.len))

diff --git a/url/url_util.cc b/url/url_util.cc
index 872e469..da29651 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc

@@ -163,7 +163,7 @@
 inline bool DoCompareSchemeComponent(const CHAR* spec,
                                      const Component& component,
                                      const char* compare_to) {
-  if (!component.is_nonempty())
+  if (component.is_empty())
     return compare_to[0] == 0;  // When component is empty, match empty scheme.
   return gurl_base::EqualsCaseInsensitiveASCII(
       typename CharToStringPiece<CHAR>::Piece(&spec[component.begin],
@@ -178,7 +178,7 @@
                    const Component& scheme,
                    SchemeType* type,
                    const std::vector<SchemeWithType>& schemes) {
-  if (!scheme.is_nonempty())
+  if (scheme.is_empty())
     return false;  // Empty or invalid schemes are non-standard.
 
   for (const SchemeWithType& scheme_with_type : schemes) {