Update googleurl to a2e9430da59b678c058131fee8aa04370b1f8eed This uses the latest Chromium version from Wed Jul 6 15:47:16 2022 +0000 Change-Id: I100fa197b5967a1d1098e851f1059e7e30916b24

commit: f8b82c668b56aabd8e84c4fd8908e40539920e6c [log] [tgz]
author: Victor Vasiliev <vasilvv@google.com> Wed Jul 06 12:14:03 2022 -0400
committer: Victor Vasiliev <vasilvv@google.com> Wed Jul 06 12:14:03 2022 -0400
tree: 4c29dd00224bc7385877c42cbf2dc0e639103166
parent: 643329026355a2ea6c076e125a508eeffd16ee58 [diff]
diff --git a/AUTHORS b/AUTHORS
index a68e49b..b81d16f 100644
--- a/AUTHORS
+++ b/AUTHORS

@@ -22,6 +22,7 @@
 Abhishek Agarwal <abhishek.a21@samsung.com>
 Abhishek Kanike <abhishek.ka@samsung.com>
 Abhishek Singh <abhi.rathore@samsung.com>
+Abin K Paul <abin.paul1@gmail.com>
 Abul Hasan Md Osama <ahm.osama@samsung.com>
 Adam Bonner <abonner-chromium@solscope.com>
 Adam Bujalski <abujalski@gmail.com>
@@ -382,6 +383,7 @@
 Ganesh Borle <ganesh.borle@samsung.com>
 Gao Chun <chun.gao@intel.com>
 Gao Chun <gaochun.dev@gmail.com>
+Gao Yu <wanggao@tencent.com>
 Gaurav Dhol <gaurav.dhol@einfochips.com>
 Gautham Banasandra <gautham.bangalore@gmail.com>
 George Adams <geoada@amazon.com>
@@ -471,6 +473,7 @@
 Isaac Murchie <murchieisaac@gmail.com>
 Isaac Reilly <reillyi@amazon.com>
 Ivan Naydonov <samogot@gmail.com>
+Ivan Pavlotskiy <ivan.pavlotskiy@lgepartner.com>
 Ivan Sham <ivansham@amazon.com>
 Jack Bates <jack@nottheoilrig.com>
 Jacky Hu <flameddd@gmail.com>
@@ -700,6 +703,7 @@
 Leo Wolf <jclw@ymail.com>
 Leon Han <leon.han@intel.com>
 Leung Wing Chung <lwchkg@gmail.com>
+Li Yanbo <liyanbo.monster@bytedance.com>
 Li Yin <li.yin@intel.com>
 Lidwine Genevet <lgenevet@cisco.com>
 Lin Sun <lin.sun@intel.com>
@@ -1139,10 +1143,12 @@
 Sunil Ratnu <sunil.ratnu@samsung.com>
 Sunitha Srivatsa <srivats@amazon.com>
 Sunwoo Nam <jegalzz88@gmail.com>
+Suresh Guttula <suresh.guttula@amd.corp-partner.google.com>
 Surya K M <suryagowda590@gmail.com>
 Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
 Suvanjan Mukherjee <suvanjanmukherjee@gmail.com>
 Suyambulingam R M <suyambu.rm@samsung.com>
+Suyash Nayan <suyashnyn1@gmail.com>
 Suyash Sengar <suyash.s@samsung.com>
 Swarali Raut <swarali.sr@samsung.com>
 Swati Jaiswal <swa.jaiswal@samsung.com>
@@ -1158,6 +1164,7 @@
 Taeseong Yu <yugeeklab@gmail.com>
 Taeyeon Kim <ssg9732@gmail.com>
 Tae Shin <taeshindev@gmail.com>
+Taher Ali <taher.dasten@gmail.com>
 Takaaki Suzuki <takaakisuzuki.14@gmail.com>
 Takahiro Aoyagi <hogehoge@gachapin.jp>
 Takashi Fujita <tgfjt.mail@gmail.com>
@@ -1328,6 +1335,7 @@
 Zhengkun Li <zhengkli@amazon.com>
 Zhenyu Liang <zhenyu.liang@intel.com>
 Zhenyu Shan <zhenyu.shan@intel.com>
+Zhibo Wang <zhibo1.wang@intel.com>
 Zhifei Fang <facetothefate@gmail.com>
 Zhiyuan Ye <zhiyuanye@tencent.com>
 Zhuoyu Qian <zhuoyu.qian@samsung.com>

diff --git a/base/BUILD b/base/BUILD
index f86c18b..69dffea 100644
--- a/base/BUILD
+++ b/base/BUILD

@@ -34,6 +34,8 @@
         "memory/raw_ptr.h",
         "memory/raw_ptr_exclusion.h",
         "no_destructor.h",
+        "numerics/safe_conversions.h",
+        "numerics/safe_conversions_impl.h",
         "ranges/algorithm.h",
         "ranges/functional.h",
         "ranges/ranges.h",

diff --git a/base/memory/raw_ptr.h b/base/memory/raw_ptr.h
index f1ef810..0b378f4 100644
--- a/base/memory/raw_ptr.h
+++ b/base/memory/raw_ptr.h

@@ -62,6 +62,14 @@
 // These classes/structures are part of the raw_ptr implementation.
 // DO NOT USE THESE CLASSES DIRECTLY YOURSELF.
 
+// This type trait verifies a type can be used as a pointer offset.
+//
+// We support pointer offsets in signed (ptrdiff_t) or unsigned (size_t) values.
+// Smaller types are also allowed.
+template <typename Z>
+static constexpr bool offset_type =
+    std::is_integral_v<Z> && sizeof(Z) <= sizeof(ptrdiff_t);
+
 struct RawPtrNoOpImpl {
   // Wraps a pointer.
   template <typename T>
@@ -105,8 +113,10 @@
   }
 
   // Advance the wrapped pointer by `delta_elems`.
-  template <typename T>
-  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+  template <typename T,
+            typename Z,
+            typename = std::enable_if_t<offset_type<Z>, void>>
+  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
     return wrapped_ptr + delta_elems;
   }
 
@@ -119,6 +129,7 @@
 
   // This is for accounting only, used by unit tests.
   static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+  static ALWAYS_INLINE void IncrementLessCountForTest() {}
   static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
 };
 
@@ -246,8 +257,10 @@
   }
 
   // Advance the wrapped pointer by `delta_elems`.
-  template <typename T>
-  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+  template <typename T,
+            typename Z,
+            typename = std::enable_if_t<offset_type<Z>, void>>
+  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
     return wrapped_ptr + delta_elems;
   }
 
@@ -260,6 +273,7 @@
 
   // This is for accounting only, used by unit tests.
   static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+  static ALWAYS_INLINE void IncrementLessCountForTest() {}
   static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
 
  private:
@@ -423,8 +437,10 @@
   }
 
   // Advance the wrapped pointer by `delta_elems`.
-  template <typename T>
-  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+  template <typename T,
+            typename Z,
+            typename = std::enable_if_t<offset_type<Z>, void>>
+  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
 #if GURL_DCHECK_IS_ON() || BUILDFLAG(ENABLE_BACKUP_REF_PTR_SLOW_CHECKS)
     uintptr_t address = reinterpret_cast<uintptr_t>(wrapped_ptr);
     if (IsSupportedAndNotNull(address))
@@ -445,6 +461,7 @@
 
   // This is for accounting only, used by unit tests.
   static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+  static ALWAYS_INLINE void IncrementLessCountForTest() {}
   static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
 
  private:
@@ -457,8 +474,17 @@
   static BASE_EXPORT NOINLINE void AcquireInternal(uintptr_t address);
   static BASE_EXPORT NOINLINE void ReleaseInternal(uintptr_t address);
   static BASE_EXPORT NOINLINE bool IsPointeeAlive(uintptr_t address);
-  static BASE_EXPORT NOINLINE bool IsValidDelta(uintptr_t address,
-                                                ptrdiff_t delta_in_bytes);
+  template <typename Z, typename = std::enable_if_t<offset_type<Z>, void>>
+  static ALWAYS_INLINE bool IsValidDelta(uintptr_t address, Z delta_in_bytes) {
+    if constexpr (std::is_signed_v<Z>)
+      return IsValidSignedDelta(address, ptrdiff_t{delta_in_bytes});
+    else
+      return IsValidUnsignedDelta(address, size_t{delta_in_bytes});
+  }
+  static BASE_EXPORT NOINLINE bool IsValidSignedDelta(uintptr_t address,
+                                                      ptrdiff_t delta_in_bytes);
+  static BASE_EXPORT NOINLINE bool IsValidUnsignedDelta(uintptr_t address,
+                                                        size_t delta_in_bytes);
 };
 
 #endif  // BUILDFLAG(USE_BACKUP_REF_PTR)
@@ -510,8 +536,10 @@
   }
 
   // Advance the wrapped pointer by `delta_elems`.
-  template <typename T>
-  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+  template <typename T,
+            typename Z,
+            typename = std::enable_if_t<offset_type<Z>, void>>
+  static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
     return wrapped_ptr + delta_elems;
   }
 
@@ -524,6 +552,7 @@
 
   // This is for accounting only, used by unit tests.
   static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+  static ALWAYS_INLINE void IncrementLessCountForTest() {}
   static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
 
  private:
@@ -657,7 +686,9 @@
 using RawPtrBanDanglingIfSupported = internal::RawPtrNoOpImpl;
 #endif
 
-template <typename T, typename Impl = RawPtrBanDanglingIfSupported>
+using DefaultRawPtrImpl = RawPtrBanDanglingIfSupported;
+
+template <typename T, typename Impl = DefaultRawPtrImpl>
 class TRIVIAL_ABI GSL_POINTER raw_ptr {
  public:
   static_assert(raw_ptr_traits::IsSupportedType<T>::value,
@@ -847,11 +878,15 @@
     --(*this);
     return result;
   }
-  ALWAYS_INLINE raw_ptr& operator+=(ptrdiff_t delta_elems) {
+  template <typename Z,
+            typename = std::enable_if_t<internal::offset_type<Z>, void>>
+  ALWAYS_INLINE raw_ptr& operator+=(Z delta_elems) {
     wrapped_ptr_ = Impl::Advance(wrapped_ptr_, delta_elems);
     return *this;
   }
-  ALWAYS_INLINE raw_ptr& operator-=(ptrdiff_t delta_elems) {
+  template <typename Z,
+            typename = std::enable_if_t<internal::offset_type<Z>, void>>
+  ALWAYS_INLINE raw_ptr& operator-=(Z delta_elems) {
     return *this += -delta_elems;
   }
 
@@ -1050,6 +1085,37 @@
   return lhs.GetForComparison() >= rhs.GetForComparison();
 }
 
+// Template helpers for working with T* or raw_ptr<T>.
+template <typename T>
+struct IsPointer : std::false_type {};
+
+template <typename T>
+struct IsPointer<T*> : std::true_type {};
+
+template <typename T, typename I>
+struct IsPointer<raw_ptr<T, I>> : std::true_type {};
+
+template <typename T>
+inline constexpr bool IsPointerV = IsPointer<T>::value;
+
+template <typename T>
+struct RemovePointer {
+  using type = T;
+};
+
+template <typename T>
+struct RemovePointer<T*> {
+  using type = T;
+};
+
+template <typename T, typename I>
+struct RemovePointer<raw_ptr<T, I>> {
+  using type = T;
+};
+
+template <typename T>
+using RemovePointerT = typename RemovePointer<T>::type;
+
 }  // namespace base
 
 using gurl_base::raw_ptr;
@@ -1065,21 +1131,70 @@
 // never be dereferenced after becoming dangling.
 using DisableDanglingPtrDetection = gurl_base::RawPtrMayDangle;
 
+// See `docs/dangling_ptr.md`
+// Annotates known dangling raw_ptr. Those haven't been triaged yet. All the
+// occurrences are meant to be removed. See https://cbug.com/1291138.
+using DanglingUntriaged = DisableDanglingPtrDetection;
+
+// The following template parameters are only meaningful when `raw_ptr`
+// is `MTECheckedPtr` (never the case unless a particular GN arg is set
+// true.) `raw_ptr` users need not worry about this and can refer solely
+// to `DisableDanglingPtrDetection` and `DanglingUntriaged` above.
+//
+// The `raw_ptr` definition allows users to specify an implementation.
+// When `MTECheckedPtr` is in play, we need to augment this
+// implementation setting with another layer that allows the `raw_ptr`
+// to degrade into the no-op version.
+#if defined(PA_USE_MTE_CHECKED_PTR_WITH_64_BITS_POINTERS)
+
+// Direct pass-through to no-op implementation.
+using DegradeToNoOpWhenMTE = gurl_base::internal::RawPtrNoOpImpl;
+
+// As above, but with the "untriaged dangling" annotation.
+using DanglingUntriagedDegradeToNoOpWhenMTE = gurl_base::internal::RawPtrNoOpImpl;
+
+// As above, but with the "explicitly disable protection" annotation.
+using DisableDanglingPtrDetectionDegradeToNoOpWhenMTE =
+    gurl_base::internal::RawPtrNoOpImpl;
+
+#else
+
+// Direct pass-through to default implementation specified by `raw_ptr`
+// template.
+using DegradeToNoOpWhenMTE = gurl_base::RawPtrBanDanglingIfSupported;
+
+// Direct pass-through to `DanglingUntriaged`.
+using DanglingUntriagedDegradeToNoOpWhenMTE = DanglingUntriaged;
+
+// Direct pass-through to `DisableDanglingPtrDetection`.
+using DisableDanglingPtrDetectionDegradeToNoOpWhenMTE =
+    DisableDanglingPtrDetection;
+
+#endif  // defined(PA_USE_MTE_CHECKED_PTR_WITH_64_BITS_POINTERS)
+
 namespace std {
 
 // Override so set/map lookups do not create extra raw_ptr. This also allows
 // dangling pointers to be used for lookup.
-template <typename T, typename I>
-struct less<raw_ptr<T, I>> {
+template <typename T, typename Impl>
+struct less<raw_ptr<T, Impl>> {
   using is_transparent = void;
 
-  bool operator()(const raw_ptr<T, I>& lhs, const raw_ptr<T, I>& rhs) const {
+  bool operator()(const raw_ptr<T, Impl>& lhs,
+                  const raw_ptr<T, Impl>& rhs) const {
+    Impl::IncrementLessCountForTest();
     return lhs < rhs;
   }
 
-  bool operator()(T* lhs, const raw_ptr<T, I>& rhs) const { return lhs < rhs; }
+  bool operator()(T* lhs, const raw_ptr<T, Impl>& rhs) const {
+    Impl::IncrementLessCountForTest();
+    return lhs < rhs;
+  }
 
-  bool operator()(const raw_ptr<T, I>& lhs, T* rhs) const { return lhs < rhs; }
+  bool operator()(const raw_ptr<T, Impl>& lhs, T* rhs) const {
+    Impl::IncrementLessCountForTest();
+    return lhs < rhs;
+  }
 };
 
 }  // namespace std

diff --git a/base/strings/escape.cc b/base/strings/escape.cc
index 011b79c..5cd770d 100644
--- a/base/strings/escape.cc
+++ b/base/strings/escape.cc

@@ -61,7 +61,7 @@
       escaped.push_back(IntToHex(c >> 4));
       escaped.push_back(IntToHex(c & 0xf));
     } else {
-      escaped.push_back(c);
+      escaped.push_back(static_cast<char>(c));
     }
   }
   return escaped;
@@ -198,8 +198,8 @@
   char most_sig_digit(escaped_text[index + 1]);
   char least_sig_digit(escaped_text[index + 2]);
   if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
-    *value =
-        HexDigitToInt(most_sig_digit) * 16 + HexDigitToInt(least_sig_digit);
+    *value = static_cast<unsigned char>(HexDigitToInt(most_sig_digit) * 16 +
+                                        HexDigitToInt(least_sig_digit));
     return true;
   }
   return false;
@@ -236,7 +236,7 @@
     }
   }
 
-  int32_t char_index = 0;
+  size_t char_index = 0;
   // Check if the unicode "character" that was just unescaped is valid.
   if (!ReadUnicodeCharacter(reinterpret_cast<char*>(bytes), num_bytes,
                             &char_index, code_point_out)) {
@@ -253,10 +253,11 @@
 
 // This method takes a Unicode code point and returns true if it should be
 // unescaped, based on |rules|.
-bool ShouldUnescapeCodePoint(UnescapeRule::Type rules, uint32_t code_point) {
+bool ShouldUnescapeCodePoint(UnescapeRule::Type rules,
+                             base_icu::UChar32 code_point) {
   // If this is an ASCII character, use the lookup table.
-  if (code_point < 0x80) {
-    return kUrlUnescape[code_point] ||
+  if (code_point >= 0 && code_point < 0x80) {
+    return kUrlUnescape[static_cast<size_t>(code_point)] ||
            // Allow some additional unescaping when flags are set.
            (code_point == ' ' && (rules & UnescapeRule::SPACES)) ||
            // Allow any of the prohibited but non-control characters when doing
@@ -418,7 +419,7 @@
       // sequences.
       unsigned char non_utf8_byte;
       if (UnescapeUnsignedByteAtIndex(escaped_text, i, &non_utf8_byte)) {
-        result.push_back(non_utf8_byte);
+        result.push_back(static_cast<char>(non_utf8_byte));
         if (adjustments)
           adjustments->push_back(OffsetAdjuster::Adjustment(i, 3, 1));
         i += 3;
@@ -569,7 +570,7 @@
     // UnescapeUnsignedByteAtIndex does bounds checking, so this is always safe
     // to call.
     if (UnescapeUnsignedByteAtIndex(escaped_text, i, &byte)) {
-      unescaped_text[output_index++] = byte;
+      unescaped_text[output_index++] = static_cast<char>(byte);
       i += 3;
       continue;
     }
@@ -595,7 +596,7 @@
   unescaped_text->clear();
 
   std::set<unsigned char> illegal_encoded_bytes;
-  for (char c = '\x00'; c < '\x20'; ++c) {
+  for (unsigned char c = '\x00'; c < '\x20'; ++c) {
     illegal_encoded_bytes.insert(c);
   }
   if (fail_on_path_separators) {
@@ -632,7 +633,7 @@
 std::u16string UnescapeForHTML(StringPiece16 input) {
   static const struct {
     const char* ampersand_code;
-    const char replacement;
+    const char16_t replacement;
   } kEscapeToChars[] = {
       {"&lt;", '<'},   {"&gt;", '>'},   {"&amp;", '&'},
       {"&quot;", '"'}, {"&#39;", '\''},
@@ -648,14 +649,15 @@
        ++iter) {
     if (*iter == '&') {
       // Potential ampersand encode char.
-      size_t index = iter - text.begin();
+      size_t index = static_cast<size_t>(iter - text.begin());
       for (size_t i = 0; i < std::size(kEscapeToChars); i++) {
         if (ampersand_chars[i].empty()) {
           ampersand_chars[i] = ASCIIToUTF16(kEscapeToChars[i].ampersand_code);
         }
         if (text.find(ampersand_chars[i], index) == index) {
-          text.replace(iter, iter + ampersand_chars[i].length(), 1,
-                       kEscapeToChars[i].replacement);
+          text.replace(
+              iter, iter + static_cast<ptrdiff_t>(ampersand_chars[i].length()),
+              1, kEscapeToChars[i].replacement);
           break;
         }
       }

diff --git a/base/strings/escape.h b/base/strings/escape.h
index 57f2f9a..02203be 100644
--- a/base/strings/escape.h
+++ b/base/strings/escape.h

@@ -74,41 +74,39 @@
   // functions.
   typedef uint32_t Type;
 
-  enum {
-    // Don't unescape anything at all.
-    NONE = 0,
+  // Don't unescape anything at all.
+  static constexpr Type NONE = 0;
 
-    // Don't unescape anything special, but all normal unescaping will happen.
-    // This is a placeholder and can't be combined with other flags (since it's
-    // just the absence of them). All other unescape rules imply "normal" in
-    // addition to their special meaning. Things like escaped letters, digits,
-    // and most symbols will get unescaped with this mode.
-    NORMAL = 1 << 0,
+  // Don't unescape anything special, but all normal unescaping will happen.
+  // This is a placeholder and can't be combined with other flags (since it's
+  // just the absence of them). All other unescape rules imply "normal" in
+  // addition to their special meaning. Things like escaped letters, digits,
+  // and most symbols will get unescaped with this mode.
+  static constexpr Type NORMAL = 1 << 0;
 
-    // Convert %20 to spaces. In some places where we're showing URLs, we may
-    // want this. In places where the URL may be copied and pasted out, then
-    // you wouldn't want this since it might not be interpreted in one piece
-    // by other applications.  Other UTF-8 spaces will not be unescaped.
-    SPACES = 1 << 1,
+  // Convert %20 to spaces. In some places where we're showing URLs, we may
+  // want this. In places where the URL may be copied and pasted out, then
+  // you wouldn't want this since it might not be interpreted in one piece
+  // by other applications.  Other UTF-8 spaces will not be unescaped.
+  static constexpr Type SPACES = 1 << 1;
 
-    // Unescapes '/' and '\\'. If these characters were unescaped, the resulting
-    // URL won't be the same as the source one. Moreover, they are dangerous to
-    // unescape in strings that will be used as file paths or names. This value
-    // should only be used when slashes don't have special meaning, like data
-    // URLs.
-    PATH_SEPARATORS = 1 << 2,
+  // Unescapes '/' and '\\'. If these characters were unescaped, the resulting
+  // URL won't be the same as the source one. Moreover, they are dangerous to
+  // unescape in strings that will be used as file paths or names. This value
+  // should only be used when slashes don't have special meaning, like data
+  // URLs.
+  static constexpr Type PATH_SEPARATORS = 1 << 2;
 
-    // Unescapes various characters that will change the meaning of URLs,
-    // including '%', '+', '&', '#'. Does not unescape path separators.
-    // If these characters were unescaped, the resulting URL won't be the same
-    // as the source one. This flag is used when generating final output like
-    // filenames for URLs where we won't be interpreting as a URL and want to do
-    // as much unescaping as possible.
-    URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS = 1 << 3,
+  // Unescapes various characters that will change the meaning of URLs,
+  // including '%', '+', '&', '#'. Does not unescape path separators.
+  // If these characters were unescaped, the resulting URL won't be the same
+  // as the source one. This flag is used when generating final output like
+  // filenames for URLs where we won't be interpreting as a URL and want to do
+  // as much unescaping as possible.
+  static constexpr Type URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS = 1 << 3;
 
-    // URL queries use "+" for space. This flag controls that replacement.
-    REPLACE_PLUS_WITH_SPACE = 1 << 4,
-  };
+  // URL queries use "+" for space. This flag controls that replacement.
+  static constexpr Type REPLACE_PLUS_WITH_SPACE = 1 << 4;
 };
 
 // Unescapes |escaped_text| and returns the result.

diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc
index d7c9a47..607d6d5 100644
--- a/base/strings/pattern.cc
+++ b/base/strings/pattern.cc

@@ -124,7 +124,7 @@
   base_icu::UChar32 operator()(const char** p, const char* end) {
     base_icu::UChar32 c;
     int offset = 0;
-    CBU8_NEXT(*p, offset, end - *p, c);
+    CBU8_NEXT(reinterpret_cast<const uint8_t*>(*p), offset, end - *p, c);
     *p += offset;
     return c;
   }

diff --git a/base/strings/safe_sprintf.cc b/base/strings/safe_sprintf.cc
index 6c9aa19..26a7715 100644
--- a/base/strings/safe_sprintf.cc
+++ b/base/strings/safe_sprintf.cc

@@ -223,8 +223,13 @@
   // if |pad| is ' '.
   //
   // Returns "false", if the |buffer_| overflowed at any time.
-  bool IToASCII(bool sign, bool upcase, int64_t i, int base,
-                char pad, size_t padding, const char* prefix);
+  bool IToASCII(bool sign,
+                bool upcase,
+                int64_t i,
+                size_t base,
+                char pad,
+                size_t padding,
+                const char* prefix);
 
  private:
   // Increments |count_| by |inc| unless this would cause |count_| to
@@ -275,9 +280,13 @@
   size_t count_;
 };
 
-
-bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base,
-                      char pad, size_t padding, const char* prefix) {
+bool Buffer::IToASCII(bool sign,
+                      bool upcase,
+                      int64_t i,
+                      size_t base,
+                      char pad,
+                      size_t padding,
+                      const char* prefix) {
   // Sanity check for parameters. None of these should ever fail, but see
   // above for the rationale why we can't call GURL_CHECK().
   DEBUG_CHECK(base >= 2);
@@ -295,7 +304,7 @@
   //   if (sign && i < 0)
   //     prefix = "-";
   //   num = abs(i);
-  int minint = 0;
+  size_t minint = 0;
   uint64_t num;
   if (sign && i < 0) {
     prefix = "-";
@@ -335,7 +344,7 @@
     }
   } else
     prefix = nullptr;
-  const size_t prefix_length = reverse_prefix - prefix;
+  const size_t prefix_length = static_cast<size_t>(reverse_prefix - prefix);
 
   // Loop until we have converted the entire number. Output at least one
   // character (i.e. '0').
@@ -384,7 +393,8 @@
       }
     } else {
       started = true;
-      Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]);
+      Out((upcase ? kUpCaseHexDigits
+                  : kDownCaseHexDigits)[num % base + minint]);
     }
 
     minint = 0;
@@ -457,13 +467,14 @@
         // character from a space ' ' to a zero '0'.
         pad = ch == '0' ? '0' : ' ';
         for (;;) {
+          const size_t digit = static_cast<size_t>(ch - '0');
           // The maximum allowed padding fills all the available address
           // space and leaves just enough space to insert the trailing NUL.
           const size_t max_padding = kSSizeMax - 1;
-          if (padding > max_padding/10 ||
-              10*padding > max_padding - (ch - '0')) {
-            DEBUG_CHECK(padding <= max_padding/10 &&
-                        10*padding <= max_padding - (ch - '0'));
+          if (padding > max_padding / 10 ||
+              10 * padding > max_padding - digit) {
+            DEBUG_CHECK(padding <= max_padding / 10 &&
+                        10 * padding <= max_padding - digit);
             // Integer overflow detected. Skip the rest of the width until
             // we find the format character, then do the normal error handling.
           padding_overflow:
@@ -475,7 +486,7 @@
             }
             goto fail_to_expand;
           }
-          padding = 10*padding + ch - '0';
+          padding = 10 * padding + digit;
           if (padding > max_padding) {
             // This doesn't happen for "sane" values of kSSizeMax. But once
             // kSSizeMax gets smaller than about 10, our earlier range checks
@@ -552,9 +563,9 @@
         } else {
           // Pointer values require an actual pointer or a string.
           if (arg.type == Arg::POINTER) {
-            i = reinterpret_cast<uintptr_t>(arg.ptr);
+            i = static_cast<int64_t>(reinterpret_cast<uintptr_t>(arg.ptr));
           } else if (arg.type == Arg::STRING) {
-            i = reinterpret_cast<uintptr_t>(arg.str);
+            i = static_cast<int64_t>(reinterpret_cast<uintptr_t>(arg.str));
           } else if (arg.type == Arg::INT &&
                      arg.integer.width == sizeof(NULL) &&
                      arg.integer.i == 0) {  // Allow C++'s version of NULL

diff --git a/base/strings/safe_sprintf.h b/base/strings/safe_sprintf.h
index 8e6c922..a5b242b 100644
--- a/base/strings/safe_sprintf.h
+++ b/base/strings/safe_sprintf.h

@@ -167,7 +167,7 @@
     integer.width = sizeof(long);
   }
   Arg(unsigned long j) : type(UINT) {
-    integer.i = j;
+    integer.i = static_cast<int64_t>(j);
     integer.width = sizeof(long);
   }
   Arg(signed long long j) : type(INT) {

diff --git a/base/strings/safe_sprintf_unittest.cc b/base/strings/safe_sprintf_unittest.cc
index 71814b3..ad1cca2 100644
--- a/base/strings/safe_sprintf_unittest.cc
+++ b/base/strings/safe_sprintf_unittest.cc

@@ -337,22 +337,23 @@
 
   // Pointer
   char addr[20];
-  sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf);
+  snprintf(addr, sizeof(addr), "0x%llX", (unsigned long long)(uintptr_t)buf);
   SafeSPrintf(buf, "%p", buf);
   EXPECT_EQ(std::string(addr), std::string(buf));
   SafeSPrintf(buf, "%p", (const char *)buf);
   EXPECT_EQ(std::string(addr), std::string(buf));
-  sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)sprintf);
-  SafeSPrintf(buf, "%p", sprintf);
+  snprintf(addr, sizeof(addr), "0x%llX",
+           (unsigned long long)(uintptr_t)snprintf);
+  SafeSPrintf(buf, "%p", snprintf);
   EXPECT_EQ(std::string(addr), std::string(buf));
 
   // Padding for pointers is a little more complicated because of the "0x"
   // prefix. Padding with '0' zeros is relatively straight-forward, but
   // padding with ' ' spaces requires more effort.
-  sprintf(addr, "0x%017llX", (unsigned long long)(uintptr_t)buf);
+  snprintf(addr, sizeof(addr), "0x%017llX", (unsigned long long)(uintptr_t)buf);
   SafeSPrintf(buf, "%019p", buf);
   EXPECT_EQ(std::string(addr), std::string(buf));
-  sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf);
+  snprintf(addr, sizeof(addr), "0x%llX", (unsigned long long)(uintptr_t)buf);
   memset(addr, ' ',
          (char*)memmove(addr + sizeof(addr) - strlen(addr) - 1,
                         addr, strlen(addr)+1) - addr);
@@ -427,8 +428,8 @@
     EXPECT_EQ('X', tmp[i]);
 
   // The text that was generated by SafeSPrintf() should always match the
-  // equivalent text generated by sprintf(). Please note that the format
-  // string for sprintf() is not complicated, as it does not have the
+  // equivalent text generated by snprintf(). Please note that the format
+  // string for snprintf() is not complicated, as it does not have the
   // benefit of getting type information from the C++ compiler.
   //
   // N.B.: It would be so much cleaner to use snprintf(). But unfortunately,
@@ -436,10 +437,10 @@
   //       are all really awkward.
   char ref[256];
   GURL_CHECK_LE(sz, sizeof(ref));
-  sprintf(ref, "A long string: %%d 00DEADBEEF %lld 0x%llX <NULL>",
-          static_cast<long long>(std::numeric_limits<intptr_t>::min()),
-          static_cast<unsigned long long>(
-            reinterpret_cast<uintptr_t>(PrintLongString)));
+  snprintf(ref, sizeof(ref), "A long string: %%d 00DEADBEEF %lld 0x%llX <NULL>",
+           static_cast<long long>(std::numeric_limits<intptr_t>::min()),
+           static_cast<unsigned long long>(
+               reinterpret_cast<uintptr_t>(PrintLongString)));
   ref[sz-1] = '\000';
 
 #if defined(NDEBUG)
@@ -448,7 +449,7 @@
   const size_t kSSizeMax = internal::GetSafeSPrintfSSizeMaxForTest();
 #endif
 
-  // Compare the output from SafeSPrintf() to the one from sprintf().
+  // Compare the output from SafeSPrintf() to the one from snprintf().
   EXPECT_EQ(std::string(ref).substr(0, kSSizeMax-1), std::string(tmp.get()));
 
   // We allocated a slightly larger buffer, so that we could perform some

diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h
index 2abd324..7c1804c 100644
--- a/base/strings/string_number_conversions_internal.h
+++ b/base/strings/string_number_conversions_internal.h

@@ -61,13 +61,13 @@
 absl::optional<uint8_t> CharToDigit(CHAR c) {
   static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]");
   if (c >= '0' && c < '0' + std::min(BASE, 10))
-    return c - '0';
+    return static_cast<uint8_t>(c - '0');
 
   if (c >= 'a' && c < 'a' + BASE - 10)
-    return c - 'a' + 10;
+    return static_cast<uint8_t>(c - 'a' + 10);
 
   if (c >= 'A' && c < 'A' + BASE - 10)
-    return c - 'A' + 10;
+    return static_cast<uint8_t>(c - 'A' + 10);
 
   return absl::nullopt;
 }

diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc
index f9b9422..e76e6ad 100644
--- a/base/strings/string_piece.cc
+++ b/base/strings/string_piece.cc

@@ -129,7 +129,7 @@
                                          s.begin(), s.end());
   if (found == self.end())
     return BasicStringPiece<CharT>::npos;
-  return found - self.begin();
+  return static_cast<size_t>(found - self.begin());
 }
 
 size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) {

diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index 8729bf4..8623096 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc

@@ -174,7 +174,8 @@
   while (char_index >= 0) {
     int32_t prev = char_index;
     base_icu::UChar32 code_point = 0;
-    CBU8_NEXT(data, char_index, truncation_length, code_point);
+    CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index,
+              truncation_length, code_point);
     if (!IsValidCharacter(code_point)) {
       char_index = prev - 1;
     } else {
@@ -183,7 +184,7 @@
   }
 
   if (char_index >= 0 )
-    *output = input.substr(0, char_index);
+    *output = input.substr(0, static_cast<size_t>(char_index));
   else
     output->clear();
 }
@@ -254,14 +255,6 @@
   return internal::DoIsStringUTF8<IsValidCodepoint>(str);
 }
 
-bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
-  return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
-}
-
-bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
-  return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
-}
-
 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
   return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
 }

diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index 435b5c6..c3224f1 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h

@@ -337,13 +337,6 @@
 BASE_EXPORT bool IsStringASCII(WStringPiece str);
 #endif
 
-// Compare the lower-case form of the given string against the given
-// previously-lower-cased ASCII string (typically a constant).
-BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
-                                      StringPiece lowercase_ascii);
-BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
-                                      StringPiece lowercase_ascii);
-
 // Performs a case-sensitive string compare of the given 16-bit string against
 // the given 8-bit ASCII string (typically a constant). The behavior is
 // undefined if the |ascii| string is not ASCII.

diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h
index 173cfaa..8608eba 100644
--- a/base/strings/string_util_internal.h
+++ b/base/strings/string_util_internal.h

@@ -242,28 +242,6 @@
   return true;
 }
 
-// Implementation note: Normally this function will be called with a hardcoded
-// constant for the lowercase_ascii parameter. Constructing a StringPiece from
-// a C constant requires running strlen, so the result will be two passes
-// through the buffers, one to file the length of lowercase_ascii, and one to
-// compare each letter.
-//
-// This function could have taken a const char* to avoid this and only do one
-// pass through the string. But the strlen is faster than the case-insensitive
-// compares and lets us early-exit in the case that the strings are different
-// lengths (will often be the case for non-matches). So whether one approach or
-// the other will be faster depends on the case.
-//
-// The hardcoded strings are typically very short so it doesn't matter, and the
-// string piece gives additional flexibility for the caller (doesn't have to be
-// null terminated) so we choose the StringPiece route.
-template <typename T, typename CharT = typename T::value_type>
-inline bool DoLowerCaseEqualsASCII(T str, StringPiece lowercase_ascii) {
-  return std::equal(
-      str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(),
-      [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; });
-}
-
 template <typename T, typename CharT = typename T::value_type>
 bool StartsWithT(T str, T search_for, CompareCase case_sensitivity) {
   if (search_for.size() > str.size())

diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index f41f6d8..4d2ac71 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc

@@ -660,22 +660,6 @@
   EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
 }
 
-TEST(StringUtilTest, LowerCaseEqualsASCII) {
-  static const struct {
-    const char*    src_a;
-    const char*    dst;
-  } lowercase_cases[] = {
-    { "FoO", "foo" },
-    { "foo", "foo" },
-    { "FOO", "foo" },
-  };
-
-  for (const auto& i : lowercase_cases) {
-    EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(i.src_a), i.dst));
-    EXPECT_TRUE(LowerCaseEqualsASCII(i.src_a, i.dst));
-  }
-}
-
 TEST(StringUtilTest, FormatBytesUnlocalized) {
   static const struct {
     int64_t bytes;

diff --git a/base/strings/string_util_win.cc b/base/strings/string_util_win.cc
index 7a9b891..7ab9061 100644
--- a/base/strings/string_util_win.cc
+++ b/base/strings/string_util_win.cc

@@ -71,10 +71,6 @@
   return input.find_first_not_of(characters) == StringPiece::npos;
 }
 
-bool LowerCaseEqualsASCII(WStringPiece str, StringPiece lowercase_ascii) {
-  return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
-}
-
 bool EqualsASCII(WStringPiece str, StringPiece ascii) {
   return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
 }

diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h
index fbf815b..6b5fba3 100644
--- a/base/strings/string_util_win.h
+++ b/base/strings/string_util_win.h

@@ -157,9 +157,6 @@
 
 BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters);
 
-BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str,
-                                      StringPiece lowercase_ascii);
-
 BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
 
 BASE_EXPORT bool StartsWith(

diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc
index e0c5e20..8de48f3 100644
--- a/base/strings/stringprintf.cc
+++ b/base/strings/stringprintf.cc

@@ -65,14 +65,14 @@
   int result = vsnprintfT(stack_buf, std::size(stack_buf), format, ap_copy);
   va_end(ap_copy);
 
-  if (result >= 0 && result < static_cast<int>(std::size(stack_buf))) {
+  if (result >= 0 && static_cast<size_t>(result) < std::size(stack_buf)) {
     // It fit.
-    dst->append(stack_buf, result);
+    dst->append(stack_buf, static_cast<size_t>(result));
     return;
   }
 
   // Repeatedly increase buffer size until it fits.
-  int mem_length = std::size(stack_buf);
+  size_t mem_length = std::size(stack_buf);
   while (true) {
     if (result < 0) {
 #if BUILDFLAG(IS_WIN)
@@ -88,7 +88,7 @@
 #endif
     } else {
       // We need exactly "result + 1" characters.
-      mem_length = result + 1;
+      mem_length = static_cast<size_t>(result) + 1;
     }
 
     if (mem_length > 32 * 1024 * 1024) {
@@ -107,9 +107,9 @@
     result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
     va_end(ap_copy);
 
-    if ((result >= 0) && (result < mem_length)) {
+    if ((result >= 0) && (static_cast<size_t>(result) < mem_length)) {
       // It fit.
-      dst->append(&mem_buf[0], result);
+      dst->append(&mem_buf[0], static_cast<size_t>(result));
       return;
     }
   }

diff --git a/base/strings/sys_string_conversions_win.cc b/base/strings/sys_string_conversions_win.cc
index 3f08956..c0b4829 100644
--- a/base/strings/sys_string_conversions_win.cc
+++ b/base/strings/sys_string_conversions_win.cc

@@ -42,7 +42,7 @@
     return std::wstring();
 
   std::wstring wide;
-  wide.resize(charcount);
+  wide.resize(static_cast<size_t>(charcount));
   MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount);
 
   return wide;
@@ -61,7 +61,7 @@
     return std::string();
 
   std::string mb;
-  mb.resize(charcount);
+  mb.resize(static_cast<size_t>(charcount));
   WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
                       &mb[0], charcount, NULL, NULL);
 

diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc
index e6bb6d6..b67b6a7 100644
--- a/base/strings/utf_offset_string_conversions.cc
+++ b/base/strings/utf_offset_string_conversions.cc

@@ -39,7 +39,8 @@
   GURL_DCHECK(offset);
   if (*offset == std::u16string::npos)
     return;
-  int adjustment = 0;
+  size_t original_lengths = 0;
+  size_t output_lengths = 0;
   for (const auto& i : adjustments) {
     if (*offset <= i.original_offset)
       break;
@@ -47,9 +48,10 @@
       *offset = std::u16string::npos;
       return;
     }
-    adjustment += static_cast<int>(i.original_length - i.output_length);
+    original_lengths += i.original_length;
+    output_lengths += i.output_length;
   }
-  *offset -= adjustment;
+  *offset += output_lengths - original_lengths;
 
   if (*offset > limit)
     *offset = std::u16string::npos;
@@ -70,17 +72,20 @@
                                     size_t* offset) {
   if (*offset == std::u16string::npos)
     return;
-  int adjustment = 0;
+  size_t original_lengths = 0;
+  size_t output_lengths = 0;
   for (const auto& i : adjustments) {
-    if (*offset + adjustment <= i.original_offset)
+    if (*offset + original_lengths - output_lengths <= i.original_offset)
       break;
-    adjustment += static_cast<int>(i.original_length - i.output_length);
-    if ((*offset + adjustment) < (i.original_offset + i.original_length)) {
+    original_lengths += i.original_length;
+    output_lengths += i.output_length;
+    if ((*offset + original_lengths - output_lengths) <
+        (i.original_offset + i.original_length)) {
       *offset = std::u16string::npos;
       return;
     }
   }
-  *offset += adjustment;
+  *offset += original_lengths - output_lengths;
 }
 
 // static
@@ -149,15 +154,15 @@
       //   <=
       //   adjusted_iter->original_offset + shift +
       //       adjusted_iter->original_length
-
       // Modify the current |adjusted_iter| to include whatever collapsing
       // happened in |first_iter|, then advance to the next |first_adjustments|
       // because we dealt with the current one.
-      const int collapse = static_cast<int>(first_iter->original_length) -
-          static_cast<int>(first_iter->output_length);
+
       // This function does not know how to deal with a string that expands and
       // then gets modified, only strings that collapse and then get modified.
-      GURL_DCHECK_GT(collapse, 0);
+      GURL_DCHECK_GT(first_iter->original_length, first_iter->output_length);
+      const size_t collapse =
+          first_iter->original_length - first_iter->output_length;
       adjusted_iter->original_length += collapse;
       currently_collapsing += collapse;
       ++first_iter;
@@ -188,14 +193,12 @@
                     OffsetAdjuster::Adjustments* adjustments) {
   if (adjustments)
     adjustments->clear();
-  // ICU requires 32-bit numbers.
   bool success = true;
-  int32_t src_len32 = static_cast<int32_t>(src_len);
-  for (int32_t i = 0; i < src_len32; i++) {
+  for (size_t i = 0; i < src_len; i++) {
     base_icu::UChar32 code_point;
     size_t original_i = i;
     size_t chars_written = 0;
-    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
+    if (ReadUnicodeCharacter(src, src_len, &i, &code_point)) {
       chars_written = WriteUnicodeCharacter(code_point, output);
     } else {
       chars_written = WriteUnicodeCharacter(0xFFFD, output);

diff --git a/base/strings/utf_string_conversion_utils.cc b/base/strings/utf_string_conversion_utils.cc
index 76dd725..3ddcc7b 100644
--- a/base/strings/utf_string_conversion_utils.cc
+++ b/base/strings/utf_string_conversion_utils.cc

@@ -12,11 +12,12 @@
 // ReadUnicodeCharacter --------------------------------------------------------
 
 bool ReadUnicodeCharacter(const char* src,
-                          int32_t src_len,
-                          int32_t* char_index,
+                          size_t src_len,
+                          size_t* char_index,
                           base_icu::UChar32* code_point_out) {
   base_icu::UChar32 code_point;
-  CBU8_NEXT(src, *char_index, src_len, code_point);
+  CBU8_NEXT(reinterpret_cast<const uint8_t*>(src), *char_index, src_len,
+            code_point);
   *code_point_out = code_point;
 
   // The ICU macro above moves to the next char, we want to point to the last
@@ -28,13 +29,12 @@
 }
 
 bool ReadUnicodeCharacter(const char16_t* src,
-                          int32_t src_len,
-                          int32_t* char_index,
+                          size_t src_len,
+                          size_t* char_index,
                           base_icu::UChar32* code_point) {
   if (CBU16_IS_SURROGATE(src[*char_index])) {
-    if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
-        *char_index + 1 >= src_len ||
-        !CBU16_IS_TRAIL(src[*char_index + 1])) {
+    if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) || !src_len ||
+        *char_index >= src_len - 1 || !CBU16_IS_TRAIL(src[*char_index + 1])) {
       // Invalid surrogate pair.
       return false;
     }
@@ -53,8 +53,8 @@
 
 #if defined(WCHAR_T_IS_UTF32)
 bool ReadUnicodeCharacter(const wchar_t* src,
-                          int32_t src_len,
-                          int32_t* char_index,
+                          size_t src_len,
+                          size_t* char_index,
                           base_icu::UChar32* code_point) {
   // Conversion is easy since the source is 32-bit.
   *code_point = src[*char_index];
@@ -66,20 +66,21 @@
 
 // WriteUnicodeCharacter -------------------------------------------------------
 
-size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) {
-  if (code_point <= 0x7f) {
+size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
+                             std::string* output) {
+  if (code_point >= 0 && code_point <= 0x7f) {
     // Fast path the common case of one byte.
     output->push_back(static_cast<char>(code_point));
     return 1;
   }
 
-
   // CBU8_APPEND_UNSAFE can append up to 4 bytes.
   size_t char_offset = output->length();
   size_t original_char_offset = char_offset;
   output->resize(char_offset + CBU8_MAX_LENGTH);
 
-  CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+  CBU8_APPEND_UNSAFE(reinterpret_cast<uint8_t*>(output->data()), char_offset,
+                     code_point);
 
   // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
   // it will represent the new length of the string.
@@ -87,9 +88,10 @@
   return char_offset - original_char_offset;
 }
 
-size_t WriteUnicodeCharacter(uint32_t code_point, std::u16string* output) {
+size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
+                             std::u16string* output) {
   if (CBU16_LENGTH(code_point) == 1) {
-    // Thie code point is in the Basic Multilingual Plane (BMP).
+    // The code point is in the Basic Multilingual Plane (BMP).
     output->push_back(static_cast<char16_t>(code_point));
     return 1;
   }

diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h
index 877d264..8c209a2 100644
--- a/base/strings/utf_string_conversion_utils.h
+++ b/base/strings/utf_string_conversion_utils.h

@@ -49,21 +49,21 @@
 //
 // Returns true on success. On false, |*code_point| will be invalid.
 BASE_EXPORT bool ReadUnicodeCharacter(const char* src,
-                                      int32_t src_len,
-                                      int32_t* char_index,
+                                      size_t src_len,
+                                      size_t* char_index,
                                       base_icu::UChar32* code_point_out);
 
 // Reads a UTF-16 character. The usage is the same as the 8-bit version above.
 BASE_EXPORT bool ReadUnicodeCharacter(const char16_t* src,
-                                      int32_t src_len,
-                                      int32_t* char_index,
+                                      size_t src_len,
+                                      size_t* char_index,
                                       base_icu::UChar32* code_point);
 
 #if defined(WCHAR_T_IS_UTF32)
 // Reads UTF-32 character. The usage is the same as the 8-bit version above.
 BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src,
-                                      int32_t src_len,
-                                      int32_t* char_index,
+                                      size_t src_len,
+                                      size_t* char_index,
                                       base_icu::UChar32* code_point);
 #endif  // defined(WCHAR_T_IS_UTF32)
 
@@ -71,20 +71,21 @@
 
 // Appends a UTF-8 character to the given 8-bit string.  Returns the number of
 // bytes written.
-BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
+BASE_EXPORT size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
                                          std::string* output);
 
 // Appends the given code point as a UTF-16 character to the given 16-bit
 // string.  Returns the number of 16-bit values written.
-BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
+BASE_EXPORT size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
                                          std::u16string* output);
 
 #if defined(WCHAR_T_IS_UTF32)
 // Appends the given UTF-32 character to the given 32-bit string.  Returns the
 // number of 32-bit values written.
-inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) {
+inline size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
+                                    std::wstring* output) {
   // This is the easy case, just append the character.
-  output->push_back(code_point);
+  output->push_back(static_cast<wchar_t>(code_point));
   return 1;
 }
 #endif  // defined(WCHAR_T_IS_UTF32)

diff --git a/base/third_party/icu/icu_utf.h b/base/third_party/icu/icu_utf.h
index 16792c4..0b50b71 100644
--- a/base/third_party/icu/icu_utf.h
+++ b/base/third_party/icu/icu_utf.h

@@ -118,7 +118,7 @@
  * @return TRUE or FALSE
  * @stable ICU 2.4
  */
-#define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+#define CBU_IS_SURROGATE(c) (((uint32_t)(c)&0xfffff800) == 0xd800)
 
 /**
  * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
@@ -278,25 +278,27 @@
  * @see U8_APPEND
  * @stable ICU 2.4
  */
-#define CBU8_APPEND_UNSAFE(s, i, c) CBUPRV_BLOCK_MACRO_BEGIN { \
-    uint32_t __uc=(c); \
-    if(__uc<=0x7f) { \
-        (s)[(i)++]=(uint8_t)__uc; \
-    } else { \
-        if(__uc<=0x7ff) { \
-            (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
-        } else { \
-            if(__uc<=0xffff) { \
-                (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
-            } else { \
-                (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
-                (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
-            } \
-            (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
-        } \
-        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
-    } \
-} CBUPRV_BLOCK_MACRO_END
+#define CBU8_APPEND_UNSAFE(s, i, c)                             \
+  CBUPRV_BLOCK_MACRO_BEGIN {                                    \
+    uint32_t __uc = (uint32_t)(c);                              \
+    if (__uc <= 0x7f) {                                         \
+      (s)[(i)++] = (uint8_t)__uc;                               \
+    } else {                                                    \
+      if (__uc <= 0x7ff) {                                      \
+        (s)[(i)++] = (uint8_t)((__uc >> 6) | 0xc0);             \
+      } else {                                                  \
+        if (__uc <= 0xffff) {                                   \
+          (s)[(i)++] = (uint8_t)((__uc >> 12) | 0xe0);          \
+        } else {                                                \
+          (s)[(i)++] = (uint8_t)((__uc >> 18) | 0xf0);          \
+          (s)[(i)++] = (uint8_t)(((__uc >> 12) & 0x3f) | 0x80); \
+        }                                                       \
+        (s)[(i)++] = (uint8_t)(((__uc >> 6) & 0x3f) | 0x80);    \
+      }                                                         \
+      (s)[(i)++] = (uint8_t)((__uc & 0x3f) | 0x80);             \
+    }                                                           \
+  }                                                             \
+  CBUPRV_BLOCK_MACRO_END
 
 // source/common/unicode/utf16.h
 
@@ -314,7 +316,7 @@
  * @return TRUE or FALSE
  * @stable ICU 2.4
  */
-#define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+#define CBU16_IS_LEAD(c) (((uint32_t)(c)&0xfffffc00) == 0xd800)
 
 /**
  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
@@ -322,7 +324,7 @@
  * @return TRUE or FALSE
  * @stable ICU 2.4
  */
-#define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+#define CBU16_IS_TRAIL(c) (((uint32_t)(c)&0xfffffc00) == 0xdc00)
 
 /**
  * Is this code unit a surrogate (U+d800..U+dfff)?

diff --git a/copy.bara.sky b/copy.bara.sky
index b1d8df5..a0c899b 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky

@@ -27,6 +27,8 @@
         "base/i18n/uchar.h",
         "base/memory/raw_ptr.h",
         "base/memory/raw_ptr_exclusion.h",
+        "base/numerics/safe_conversions.h",
+        "base/numerics/safe_conversions_impl.h",
         "base/no_destructor.h",
         "base/ranges/*.h",
         "base/stl_util.h",

diff --git a/url/gurl.h b/url/gurl.h
index 10ba992..97a60ab 100644
--- a/url/gurl.h
+++ b/url/gurl.h

@@ -470,14 +470,16 @@
 
   // Returns the substring of the input identified by the given component.
   std::string ComponentString(const url::Component& comp) const {
-    if (comp.len <= 0)
+    if (!comp.is_nonempty())
       return std::string();
-    return std::string(spec_, comp.begin, comp.len);
+    return std::string(spec_, static_cast<size_t>(comp.begin),
+                       static_cast<size_t>(comp.len));
   }
   gurl_base::StringPiece ComponentStringPiece(const url::Component& comp) const {
-    if (comp.len <= 0)
+    if (!comp.is_nonempty())
       return gurl_base::StringPiece();
-    return gurl_base::StringPiece(&spec_[comp.begin], comp.len);
+    return gurl_base::StringPiece(&spec_[static_cast<size_t>(comp.begin)],
+                             static_cast<size_t>(comp.len));
   }
 
   void ProcessFileSystemURLAfterReplaceComponents();

diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index b45cea0..c3ebddd 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc

@@ -101,7 +101,7 @@
               const Component& scheme,
               CanonOutput* output,
               Component* out_scheme) {
-  if (scheme.len <= 0) {
+  if (!scheme.is_nonempty()) {
     // Scheme is unspecified or empty, convert to empty by appending a colon.
     *out_scheme = Component(output->length(), 0);
     output->push_back(':');
@@ -117,12 +117,13 @@
   // FindAndCompareScheme, which could cause some security checks on
   // schemes to be incorrect.
   bool success = true;
-  int end = scheme.end();
-  for (int i = scheme.begin; i < end; i++) {
+  size_t begin = static_cast<size_t>(scheme.begin);
+  size_t end = static_cast<size_t>(scheme.end());
+  for (size_t i = begin; i < end; i++) {
     UCHAR ch = static_cast<UCHAR>(spec[i]);
     char replacement = 0;
     if (ch < 0x80) {
-      if (i == scheme.begin) {
+      if (i == begin) {
         // Need to do a special check for the first letter of the scheme.
         if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
           replacement = kSchemeCanonical[ch];
@@ -179,8 +180,9 @@
   out_username->begin = output->length();
   if (username.len > 0) {
     // This will escape characters not valid for the username.
-    AppendStringOfType(&username_spec[username.begin], username.len,
-                       CHAR_USERINFO, output);
+    AppendStringOfType(&username_spec[username.begin],
+                       static_cast<size_t>(username.len), CHAR_USERINFO,
+                       output);
   }
   out_username->len = output->length() - out_username->begin;
 
@@ -189,8 +191,9 @@
   if (password.len > 0) {
     output->push_back(':');
     out_password->begin = output->length();
-    AppendStringOfType(&password_spec[password.begin], password.len,
-                       CHAR_USERINFO, output);
+    AppendStringOfType(&password_spec[password.begin],
+                       static_cast<size_t>(password.len), CHAR_USERINFO,
+                       output);
     out_password->len = output->length() - out_password->begin;
   } else {
     *out_password = Component();
@@ -223,7 +226,8 @@
     // what the error was, and mark the URL as invalid by returning false.
     output->push_back(':');
     out_port->begin = output->length();
-    AppendInvalidNarrowString(spec, port.begin, port.end(), output);
+    AppendInvalidNarrowString(spec, static_cast<size_t>(port.begin),
+                              static_cast<size_t>(port.end()), output);
     out_port->len = output->length() - out_port->begin;
     return false;
   }
@@ -285,7 +289,7 @@
                        const Component& ref,
                        CanonOutput* output,
                        Component* out_ref) {
-  if (ref.len < 0) {
+  if (!ref.is_valid()) {
     // Common case of no ref.
     *out_ref = Component();
     return;
@@ -297,8 +301,8 @@
   out_ref->begin = output->length();
 
   // Now iterate through all the characters, converting to UTF-8 and validating.
-  int end = ref.end();
-  for (int i = ref.begin; i < end; i++) {
+  size_t end = static_cast<size_t>(ref.end());
+  for (size_t i = static_cast<size_t>(ref.begin); i < end; i++) {
     UCHAR current_char = static_cast<UCHAR>(spec[i]);
     if (current_char < 0x80) {
       if (kShouldEscapeCharInFragment[current_char])

diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index 370dd77..edc9d67 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc

@@ -123,15 +123,15 @@
 //    |*has_non_ascii| flag.
 //
 // The return value indicates if the output is a potentially valid host name.
-template<typename INCHAR, typename OUTCHAR>
+template <typename INCHAR, typename OUTCHAR>
 bool DoSimpleHost(const INCHAR* host,
-                  int host_len,
+                  size_t host_len,
                   CanonOutputT<OUTCHAR>* output,
                   bool* has_non_ascii) {
   *has_non_ascii = false;
 
   bool success = true;
-  for (int i = 0; i < host_len; ++i) {
+  for (size_t i = 0; i < host_len; ++i) {
     unsigned int source = host[i];
     if (source == '%') {
       // Unescape first, if possible.
@@ -175,7 +175,7 @@
 }
 
 // Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
   int original_output_len = output->length();  // So we can rewind below.
 
   // We need to escape URL before doing IDN conversion, since punicode strings
@@ -202,8 +202,8 @@
   // unescaping. Although we unescaped everything before this function call, if
   // somebody does %00 as fullwidth, ICU will convert this to ASCII.
   bool success = DoSimpleHost(wide_output.data(),
-                              wide_output.length(),
-                              output, &has_non_ascii);
+                              static_cast<size_t>(wide_output.length()), output,
+                              &has_non_ascii);
   if (has_non_ascii) {
     // ICU generated something that DoSimpleHost didn't think looked like
     // ASCII. This is quite rare, but ICU might convert some characters to
@@ -220,7 +220,8 @@
     // ASCII isn't strictly necessary, but DoSimpleHost handles this case
     // anyway so we handle it/
     output->set_length(original_output_len);
-    AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
+    AppendInvalidNarrowString(wide_output.data(), 0,
+                              static_cast<size_t>(wide_output.length()),
                               output);
     return false;
   }
@@ -230,8 +231,11 @@
 // 8-bit convert host to its ASCII version: this converts the UTF-8 input to
 // UTF-16. The has_escaped flag should be set if the input string requires
 // unescaping.
-bool DoComplexHost(const char* host, int host_len,
-                   bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char* host,
+                   size_t host_len,
+                   bool has_non_ascii,
+                   bool has_escaped,
+                   CanonOutput* output) {
   // Save the current position in the output. We may write stuff and rewind it
   // below, so we need to know where to rewind to.
   int begin_length = output->length();
@@ -239,7 +243,7 @@
   // Points to the UTF-8 data we want to convert. This will either be the
   // input or the unescaped version written to |*output| if necessary.
   const char* utf8_source;
-  int utf8_source_len;
+  size_t utf8_source_len;
   bool are_all_escaped_valid = true;
   if (has_escaped) {
     // Unescape before converting to UTF-16 for IDN. We write this into the
@@ -264,7 +268,7 @@
     // Save the pointer into the data was just converted (it may be appended to
     // other data in the output buffer).
     utf8_source = &output->data()[begin_length];
-    utf8_source_len = output->length() - begin_length;
+    utf8_source_len = static_cast<size_t>(output->length() - begin_length);
   } else {
     // We don't need to unescape, use input for IDNization later. (We know the
     // input has non-ASCII, or the simple version would have been called
@@ -280,17 +284,18 @@
   if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
     // In this error case, the input may or may not be the output.
     StackBuffer utf8;
-    for (int i = 0; i < utf8_source_len; i++)
+    for (size_t i = 0; i < utf8_source_len; i++)
       utf8.push_back(utf8_source[i]);
     output->set_length(begin_length);
-    AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
+    AppendInvalidNarrowString(utf8.data(), 0,
+                              static_cast<size_t>(utf8.length()), output);
     return false;
   }
   output->set_length(begin_length);
 
   // This will call DoSimpleHost which will do normal ASCII canonicalization
   // and also check for IP addresses in the outpt.
-  return DoIDNHost(utf16.data(), utf16.length(), output) &&
+  return DoIDNHost(utf16.data(), static_cast<size_t>(utf16.length()), output) &&
          are_all_escaped_valid;
 }
 
@@ -298,7 +303,7 @@
 // the backend, so we just pass through. The has_escaped flag should be set if
 // the input string requires unescaping.
 bool DoComplexHost(const char16_t* host,
-                   int host_len,
+                   size_t host_len,
                    bool has_non_ascii,
                    bool has_escaped,
                    CanonOutput* output) {
@@ -319,8 +324,8 @@
 
     // Once we convert to UTF-8, we can use the 8-bit version of the complex
     // host handling code above.
-    return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii,
-                         has_escaped, output);
+    return DoComplexHost(utf8.data(), static_cast<size_t>(utf8.length()),
+                         has_non_ascii, has_escaped, output);
   }
 
   // No unescaping necessary, we can safely pass the input to ICU. This
@@ -334,16 +339,18 @@
 bool DoHostSubstring(const CHAR* spec,
                      const Component& host,
                      CanonOutput* output) {
+  GURL_DCHECK(host.is_valid());
+
   bool has_non_ascii, has_escaped;
   ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
 
   if (has_non_ascii || has_escaped) {
-    return DoComplexHost(&spec[host.begin], host.len, has_non_ascii,
-                         has_escaped, output);
+    return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
+                         has_non_ascii, has_escaped, output);
   }
 
-  const bool success =
-      DoSimpleHost(&spec[host.begin], host.len, output, &has_non_ascii);
+  const bool success = DoSimpleHost(
+      &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
   GURL_DCHECK(!has_non_ascii);
   return success;
 }
@@ -353,7 +360,7 @@
             const Component& host,
             CanonOutput* output,
             CanonHostInfo* host_info) {
-  if (host.len <= 0) {
+  if (!host.is_nonempty()) {
     // Empty hosts don't need anything.
     host_info->family = CanonHostInfo::NEUTRAL;
     host_info->out_host = Component();

diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index bbb27a0..f6b4b03 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc

@@ -11,17 +11,19 @@
 #include <cstdio>
 #include <string>
 
+#include "base/numerics/safe_conversions.h"
 #include "base/strings/utf_string_conversion_utils.h"
 
 namespace url {
 
 namespace {
 
-template<typename CHAR, typename UCHAR>
-void DoAppendStringOfType(const CHAR* source, int length,
+template <typename CHAR, typename UCHAR>
+void DoAppendStringOfType(const CHAR* source,
+                          size_t length,
                           SharedCharTypes type,
                           CanonOutput* output) {
-  for (int i = 0; i < length; i++) {
+  for (size_t i = 0; i < length; i++) {
     if (static_cast<UCHAR>(source[i]) >= 0x80) {
       // ReadChar will fill the code point with kUnicodeReplacementCharacter
       // when the input is invalid, which is what we want.
@@ -41,10 +43,12 @@
 
 // This function assumes the input values are all contained in 8-bit,
 // although it allows any type. Returns true if input is valid, false if not.
-template<typename CHAR, typename UCHAR>
-void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
+template <typename CHAR, typename UCHAR>
+void DoAppendInvalidNarrowString(const CHAR* spec,
+                                 size_t begin,
+                                 size_t end,
                                  CanonOutput* output) {
-  for (int i = begin; i < end; i++) {
+  for (size_t i = begin; i < end; i++) {
     UCHAR uch = static_cast<UCHAR>(spec[i]);
     if (uch >= 0x80) {
       // Handle UTF-8/16 encodings. This call will correctly handle the error
@@ -98,7 +102,8 @@
       // Convert to UTF-8.
       dest_component->begin = utf8_buffer->length();
       success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
-                                   override_component.len, utf8_buffer);
+                                   static_cast<size_t>(override_component.len),
+                                   utf8_buffer);
       dest_component->len = utf8_buffer->length() - dest_component->begin;
     }
   }
@@ -235,26 +240,24 @@
 
 const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
 
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+                        size_t length,
                         SharedCharTypes type,
                         CanonOutput* output) {
   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
 }
 
 void AppendStringOfType(const char16_t* source,
-                        int length,
+                        size_t length,
                         SharedCharTypes type,
                         CanonOutput* output) {
   DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
 }
 
 bool ReadUTFChar(const char* str,
-                 int* begin,
-                 int length,
+                 size_t* begin,
+                 size_t length,
                  base_icu::UChar32* code_point_out) {
-  // This depends on ints and int32s being the same thing. If they're not, it
-  // will fail to compile.
-  // TODO(mmenke): This should probably be fixed.
   if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
       !gurl_base::IsValidCharacter(*code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;
@@ -264,12 +267,9 @@
 }
 
 bool ReadUTFChar(const char16_t* str,
-                 int* begin,
-                 int length,
+                 size_t* begin,
+                 size_t length,
                  base_icu::UChar32* code_point_out) {
-  // This depends on ints and int32s being the same thing. If they're not, it
-  // will fail to compile.
-  // TODO(mmenke): This should probably be fixed.
   if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
       !gurl_base::IsValidCharacter(*code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;
@@ -278,23 +278,25 @@
   return true;
 }
 
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+                               size_t begin,
+                               size_t end,
                                CanonOutput* output) {
   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
 }
 
 void AppendInvalidNarrowString(const char16_t* spec,
-                               int begin,
-                               int end,
+                               size_t begin,
+                               size_t end,
                                CanonOutput* output) {
   DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
 }
 
 bool ConvertUTF16ToUTF8(const char16_t* input,
-                        int input_len,
+                        size_t input_len,
                         CanonOutput* output) {
   bool success = true;
-  for (int i = 0; i < input_len; i++) {
+  for (size_t i = 0; i < input_len; i++) {
     base_icu::UChar32 code_point;
     success &= ReadUTFChar(input, &i, input_len, &code_point);
     AppendUTF8Value(code_point, output);
@@ -303,10 +305,10 @@
 }
 
 bool ConvertUTF8ToUTF16(const char* input,
-                        int input_len,
+                        size_t input_len,
                         CanonOutputT<char16_t>* output) {
   bool success = true;
-  for (int i = 0; i < input_len; i++) {
+  for (size_t i = 0; i < input_len; i++) {
     base_icu::UChar32 code_point;
     success &= ReadUTFChar(input, &i, input_len, &code_point);
     AppendUTF16Value(code_point, output);

diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 807ddc5..a41a771 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h

@@ -77,11 +77,12 @@
 
 // Appends the given string to the output, escaping characters that do not
 // match the given |type| in SharedCharTypes.
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+                        size_t length,
                         SharedCharTypes type,
                         CanonOutput* output);
 void AppendStringOfType(const char16_t* source,
-                        int length,
+                        size_t length,
                         SharedCharTypes type,
                         CanonOutput* output);
 
@@ -107,8 +108,8 @@
 // Indicates if the given character is a dot or dot equivalent, returning the
 // number of characters taken by it. This will be one for a literal dot, 3 for
 // an escaped dot. If the character is not a dot, this will return 0.
-template<typename CHAR>
-inline int IsDot(const CHAR* spec, int offset, int end) {
+template <typename CHAR>
+inline size_t IsDot(const CHAR* spec, size_t offset, size_t end) {
   if (spec[offset] == '.') {
     return 1;
   } else if (spec[offset] == '%' && offset + 3 <= end &&
@@ -154,8 +155,8 @@
 // (for a single-byte ASCII character, it will not be changed).
 COMPONENT_EXPORT(URL)
 bool ReadUTFChar(const char* str,
-                 int* begin,
-                 int length,
+                 size_t* begin,
+                 size_t length,
                  base_icu::UChar32* code_point_out);
 
 // Generic To-UTF-8 converter. This will call the given append method for each
@@ -231,8 +232,8 @@
 // (for a single-16-bit-word character, it will not be changed).
 COMPONENT_EXPORT(URL)
 bool ReadUTFChar(const char16_t* str,
-                 int* begin,
-                 int length,
+                 size_t* begin,
+                 size_t length,
                  base_icu::UChar32* code_point_out);
 
 // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
@@ -268,8 +269,8 @@
 // Assumes that ch[begin] is within range in the array, but does not assume
 // that any following characters are.
 inline bool AppendUTF8EscapedChar(const char16_t* str,
-                                  int* begin,
-                                  int length,
+                                  size_t* begin,
+                                  size_t length,
                                   CanonOutput* output) {
   // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
   // us the kUnicodeReplacementCharacter, so we don't have to do special
@@ -281,7 +282,9 @@
 }
 
 // Handles UTF-8 input. See the wide version above for usage.
-inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
+inline bool AppendUTF8EscapedChar(const char* str,
+                                  size_t* begin,
+                                  size_t length,
                                   CanonOutput* output) {
   // ReadUTF8Char will handle invalid characters for us and give us the
   // kUnicodeReplacementCharacter, so we don't have to do special checking
@@ -308,8 +311,10 @@
   return c <= 255;
 }
 
-template<typename CHAR>
-inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
+template <typename CHAR>
+inline bool DecodeEscaped(const CHAR* spec,
+                          size_t* begin,
+                          size_t end,
                           unsigned char* unescaped_value) {
   if (*begin + 3 > end ||
       !Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
@@ -338,11 +343,13 @@
 // This is used in error cases to append invalid output so that it looks
 // approximately correct. Non-error cases should not call this function since
 // the escaping rules are not guaranteed!
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+                               size_t begin,
+                               size_t end,
                                CanonOutput* output);
 void AppendInvalidNarrowString(const char16_t* spec,
-                               int begin,
-                               int end,
+                               size_t begin,
+                               size_t end,
                                CanonOutput* output);
 
 // Misc canonicalization helpers ----------------------------------------------
@@ -357,11 +364,11 @@
 // normal.
 COMPONENT_EXPORT(URL)
 bool ConvertUTF16ToUTF8(const char16_t* input,
-                        int input_len,
+                        size_t input_len,
                         CanonOutput* output);
 COMPONENT_EXPORT(URL)
 bool ConvertUTF8ToUTF16(const char* input,
-                        int input_len,
+                        size_t input_len,
                         CanonOutputT<char16_t>* output);
 
 // Converts from UTF-16 to 8-bit using the character set converter. If the

diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc
index f4fe2b4..ff62bea 100644
--- a/url/url_canon_mailtourl.cc
+++ b/url/url_canon_mailtourl.cc

@@ -57,8 +57,8 @@
     // Copy the path using path URL's more lax escaping rules.
     // We convert to UTF-8 and escape non-ASCII, but leave most
     // ASCII characters alone.
-    int end = parsed.path.end();
-    for (int i = parsed.path.begin; i < end; ++i) {
+    size_t end = static_cast<size_t>(parsed.path.end());
+    for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
       UCHAR uch = static_cast<UCHAR>(source.path[i]);
       if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
         success &= AppendUTF8EscapedChar(source.path, &i, end, output);

diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index e043043..32cb5f3 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc

@@ -101,9 +101,11 @@
 // If the input is "../foo", |after_dot| = 1, |end| = 6, and
 // at the end, |*consumed_len| = 2 for the "./" this function consumed. The
 // original dot length should be handled by the caller.
-template<typename CHAR>
-DotDisposition ClassifyAfterDot(const CHAR* spec, int after_dot,
-                                int end, int* consumed_len) {
+template <typename CHAR>
+DotDisposition ClassifyAfterDot(const CHAR* spec,
+                                size_t after_dot,
+                                size_t end,
+                                size_t* consumed_len) {
   if (after_dot == end) {
     // Single dot at the end.
     *consumed_len = 0;
@@ -115,9 +117,9 @@
     return DIRECTORY_CUR;
   }
 
-  int second_dot_len = IsDot(spec, after_dot, end);
+  size_t second_dot_len = IsDot(spec, after_dot, end);
   if (second_dot_len) {
-    int after_second_dot = after_dot + second_dot_len;
+    size_t after_second_dot = after_dot + second_dot_len;
     if (after_second_dot == end) {
       // Double dot at the end.
       *consumed_len = second_dot_len;
@@ -193,10 +195,10 @@
 // ends with a '%' followed by one or two characters, and the '%' is the one
 // pointed to by |last_invalid_percent_index|.  The last character in the string
 // was just unescaped.
-template<typename CHAR>
+template <typename CHAR>
 void CheckForNestedEscapes(const CHAR* spec,
-                           int next_input_index,
-                           int input_len,
+                           size_t next_input_index,
+                           size_t input_len,
                            int last_invalid_percent_index,
                            CanonOutput* output) {
   const int length = output->length();
@@ -218,9 +220,10 @@
   }
 
   // Now output ends like "%cc".  Try to unescape this.
-  int begin = last_invalid_percent_index;
+  size_t begin = static_cast<size_t>(last_invalid_percent_index);
   unsigned char temp;
-  if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
+  if (DecodeEscaped(output->data(), &begin,
+                    static_cast<size_t>(output->length()), &temp)) {
     // New escape sequence found.  Overwrite the characters following the '%'
     // with "25", and push_back() the one or two characters that were following
     // the '%' when we were called.
@@ -252,7 +255,10 @@
                            const Component& path,
                            int path_begin_in_output,
                            CanonOutput* output) {
-  int end = path.end();
+  if (!path.is_nonempty())
+    return true;
+
+  size_t end = static_cast<size_t>(path.end());
 
   // We use this variable to minimize the amount of work done when unescaping --
   // we'll only call CheckForNestedEscapes() when this points at one of the last
@@ -260,7 +266,7 @@
   int last_invalid_percent_index = INT_MIN;
 
   bool success = true;
-  for (int i = path.begin; i < end; i++) {
+  for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
     GURL_DCHECK_LT(last_invalid_percent_index, output->length());
     UCHAR uch = static_cast<UCHAR>(spec[i]);
     if (sizeof(CHAR) > 1 && uch >= 0x80) {
@@ -276,7 +282,7 @@
       unsigned char flags = kPathCharLookup[out_ch];
       if (flags & SPECIAL) {
         // Needs special handling of some sort.
-        int dotlen;
+        size_t dotlen;
         if ((dotlen = IsDot(spec, i, end)) > 0) {
           // See if this dot was preceded by a slash in the output.
           //
@@ -287,7 +293,7 @@
           if (output->length() > path_begin_in_output &&
               output->at(output->length() - 1) == '/') {
             // Slash followed by a dot, check to see if this is means relative
-            int consumed_len;
+            size_t consumed_len;
             switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
                                            &consumed_len)) {
               case NOT_A_DIRECTORY:

diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc
index e726cfb..d8d65f3 100644
--- a/url/url_canon_pathurl.cc
+++ b/url/url_canon_pathurl.cc

@@ -32,8 +32,8 @@
     // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
     // https://url.spec.whatwg.org/#c0-control-percent-encode-set
     new_component->begin = output->length();
-    int end = component.end();
-    for (int i = component.begin; i < end; i++) {
+    size_t end = static_cast<size_t>(component.end());
+    for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
       UCHAR uch = static_cast<UCHAR>(source[i]);
       if (uch < 0x20 || uch > 0x7E)
         AppendUTF8EscapedChar(source, &i, end, output);

diff --git a/url/url_canon_query.cc b/url/url_canon_query.cc
index b3a1118..53699c5 100644
--- a/url/url_canon_query.cc
+++ b/url/url_canon_query.cc

@@ -72,10 +72,12 @@
                   const Component& query,
                   CharsetConverter* converter,
                   CanonOutput* output) {
+  GURL_DCHECK(query.is_valid());
   // This function will replace any misencoded values with the invalid
   // character. This is what we want so we don't have to check for error.
   RawCanonOutputW<1024> utf16;
-  ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16);
+  ConvertUTF8ToUTF16(&spec[query.begin], static_cast<size_t>(query.len),
+                     &utf16);
   converter->ConvertFromUTF16(utf16.data(), utf16.length(), output);
 }
 
@@ -86,7 +88,9 @@
                   const Component& query,
                   CharsetConverter* converter,
                   CanonOutput* output) {
-  converter->ConvertFromUTF16(&spec[query.begin], query.len, output);
+  GURL_DCHECK(query.is_valid());
+  converter->ConvertFromUTF16(&spec[query.begin],
+                              static_cast<size_t>(query.len), output);
 }
 
 template<typename CHAR, typename UCHAR>
@@ -109,7 +113,8 @@
 
     } else {
       // No converter, do our own UTF-8 conversion.
-      AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output);
+      AppendStringOfType(&spec[query.begin], static_cast<size_t>(query.len),
+                         CHAR_QUERY, output);
     }
   }
 }

diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 0170e00..4fa31ec 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc

@@ -173,9 +173,9 @@
       out_str.clear();
       StdStringCanonOutput output(&out_str);
 
-      int input_len = static_cast<int>(strlen(utf_cases[i].input8));
+      size_t input_len = strlen(utf_cases[i].input8);
       bool success = true;
-      for (int ch = 0; ch < input_len; ch++) {
+      for (size_t ch = 0; ch < input_len; ch++) {
         success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
                                          &output);
       }
@@ -189,9 +189,9 @@
 
       std::u16string input_str(
           test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
-      int input_len = static_cast<int>(input_str.length());
+      size_t input_len = input_str.length();
       bool success = true;
-      for (int ch = 0; ch < input_len; ch++) {
+      for (size_t ch = 0; ch < input_len; ch++) {
         success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
                                          &output);
       }

diff --git a/url/url_util.cc b/url/url_util.cc
index f56323e..a6d0901 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc

@@ -811,11 +811,15 @@
                               int length,
                               DecodeURLMode mode,
                               CanonOutputW* output) {
+  if (length <= 0)
+    return;
+
   STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
-  for (int i = 0; i < length; i++) {
+  size_t length_size_t = static_cast<size_t>(length);
+  for (size_t i = 0; i < length_size_t; i++) {
     if (input[i] == '%') {
       unsigned char ch;
-      if (DecodeEscaped(input, &i, length, &ch)) {
+      if (DecodeEscaped(input, &i, length_size_t, &ch)) {
         unescaped_chars.push_back(ch);
       } else {
         // Invalid escape sequence, copy the percent literal.
@@ -830,18 +834,20 @@
   int output_initial_length = output->length();
   // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
   // JavaScript URLs, but Firefox and Safari do.
-  for (int i = 0; i < unescaped_chars.length(); i++) {
-    unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
+  size_t unescaped_length = static_cast<size_t>(unescaped_chars.length());
+  for (size_t i = 0; i < unescaped_length; i++) {
+    unsigned char uch =
+        static_cast<unsigned char>(unescaped_chars.at(static_cast<int>(i)));
     if (uch < 0x80) {
       // Non-UTF-8, just append directly
       output->push_back(uch);
     } else {
       // next_ch will point to the last character of the decoded
       // character.
-      int next_character = i;
+      size_t next_character = i;
       base_icu::UChar32 code_point;
-      if (ReadUTFChar(unescaped_chars.data(), &next_character,
-                      unescaped_chars.length(), &code_point)) {
+      if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
+                      &code_point)) {
         // Valid UTF-8 character, convert to UTF-16.
         AppendUTF16Value(code_point, output);
         i = next_character;
commit	f8b82c668b56aabd8e84c4fd8908e40539920e6c	[log] [tgz]
author	Victor Vasiliev <vasilvv@google.com>	Wed Jul 06 12:14:03 2022 -0400
committer	Victor Vasiliev <vasilvv@google.com>	Wed Jul 06 12:14:03 2022 -0400
tree	4c29dd00224bc7385877c42cbf2dc0e639103166
parent	643329026355a2ea6c076e125a508eeffd16ee58 [diff]