Update googleurl to a2e9430da59b678c058131fee8aa04370b1f8eed
This uses the latest Chromium version from Wed Jul 6 15:47:16 2022 +0000
Change-Id: I100fa197b5967a1d1098e851f1059e7e30916b24
diff --git a/AUTHORS b/AUTHORS
index a68e49b..b81d16f 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -22,6 +22,7 @@
Abhishek Agarwal <abhishek.a21@samsung.com>
Abhishek Kanike <abhishek.ka@samsung.com>
Abhishek Singh <abhi.rathore@samsung.com>
+Abin K Paul <abin.paul1@gmail.com>
Abul Hasan Md Osama <ahm.osama@samsung.com>
Adam Bonner <abonner-chromium@solscope.com>
Adam Bujalski <abujalski@gmail.com>
@@ -382,6 +383,7 @@
Ganesh Borle <ganesh.borle@samsung.com>
Gao Chun <chun.gao@intel.com>
Gao Chun <gaochun.dev@gmail.com>
+Gao Yu <wanggao@tencent.com>
Gaurav Dhol <gaurav.dhol@einfochips.com>
Gautham Banasandra <gautham.bangalore@gmail.com>
George Adams <geoada@amazon.com>
@@ -471,6 +473,7 @@
Isaac Murchie <murchieisaac@gmail.com>
Isaac Reilly <reillyi@amazon.com>
Ivan Naydonov <samogot@gmail.com>
+Ivan Pavlotskiy <ivan.pavlotskiy@lgepartner.com>
Ivan Sham <ivansham@amazon.com>
Jack Bates <jack@nottheoilrig.com>
Jacky Hu <flameddd@gmail.com>
@@ -700,6 +703,7 @@
Leo Wolf <jclw@ymail.com>
Leon Han <leon.han@intel.com>
Leung Wing Chung <lwchkg@gmail.com>
+Li Yanbo <liyanbo.monster@bytedance.com>
Li Yin <li.yin@intel.com>
Lidwine Genevet <lgenevet@cisco.com>
Lin Sun <lin.sun@intel.com>
@@ -1139,10 +1143,12 @@
Sunil Ratnu <sunil.ratnu@samsung.com>
Sunitha Srivatsa <srivats@amazon.com>
Sunwoo Nam <jegalzz88@gmail.com>
+Suresh Guttula <suresh.guttula@amd.corp-partner.google.com>
Surya K M <suryagowda590@gmail.com>
Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
Suvanjan Mukherjee <suvanjanmukherjee@gmail.com>
Suyambulingam R M <suyambu.rm@samsung.com>
+Suyash Nayan <suyashnyn1@gmail.com>
Suyash Sengar <suyash.s@samsung.com>
Swarali Raut <swarali.sr@samsung.com>
Swati Jaiswal <swa.jaiswal@samsung.com>
@@ -1158,6 +1164,7 @@
Taeseong Yu <yugeeklab@gmail.com>
Taeyeon Kim <ssg9732@gmail.com>
Tae Shin <taeshindev@gmail.com>
+Taher Ali <taher.dasten@gmail.com>
Takaaki Suzuki <takaakisuzuki.14@gmail.com>
Takahiro Aoyagi <hogehoge@gachapin.jp>
Takashi Fujita <tgfjt.mail@gmail.com>
@@ -1328,6 +1335,7 @@
Zhengkun Li <zhengkli@amazon.com>
Zhenyu Liang <zhenyu.liang@intel.com>
Zhenyu Shan <zhenyu.shan@intel.com>
+Zhibo Wang <zhibo1.wang@intel.com>
Zhifei Fang <facetothefate@gmail.com>
Zhiyuan Ye <zhiyuanye@tencent.com>
Zhuoyu Qian <zhuoyu.qian@samsung.com>
diff --git a/base/BUILD b/base/BUILD
index f86c18b..69dffea 100644
--- a/base/BUILD
+++ b/base/BUILD
@@ -34,6 +34,8 @@
"memory/raw_ptr.h",
"memory/raw_ptr_exclusion.h",
"no_destructor.h",
+ "numerics/safe_conversions.h",
+ "numerics/safe_conversions_impl.h",
"ranges/algorithm.h",
"ranges/functional.h",
"ranges/ranges.h",
diff --git a/base/memory/raw_ptr.h b/base/memory/raw_ptr.h
index f1ef810..0b378f4 100644
--- a/base/memory/raw_ptr.h
+++ b/base/memory/raw_ptr.h
@@ -62,6 +62,14 @@
// These classes/structures are part of the raw_ptr implementation.
// DO NOT USE THESE CLASSES DIRECTLY YOURSELF.
+// This type trait verifies a type can be used as a pointer offset.
+//
+// We support pointer offsets in signed (ptrdiff_t) or unsigned (size_t) values.
+// Smaller types are also allowed.
+template <typename Z>
+static constexpr bool offset_type =
+ std::is_integral_v<Z> && sizeof(Z) <= sizeof(ptrdiff_t);
+
struct RawPtrNoOpImpl {
// Wraps a pointer.
template <typename T>
@@ -105,8 +113,10 @@
}
// Advance the wrapped pointer by `delta_elems`.
- template <typename T>
- static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+ template <typename T,
+ typename Z,
+ typename = std::enable_if_t<offset_type<Z>, void>>
+ static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
return wrapped_ptr + delta_elems;
}
@@ -119,6 +129,7 @@
// This is for accounting only, used by unit tests.
static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+ static ALWAYS_INLINE void IncrementLessCountForTest() {}
static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
};
@@ -246,8 +257,10 @@
}
// Advance the wrapped pointer by `delta_elems`.
- template <typename T>
- static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+ template <typename T,
+ typename Z,
+ typename = std::enable_if_t<offset_type<Z>, void>>
+ static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
return wrapped_ptr + delta_elems;
}
@@ -260,6 +273,7 @@
// This is for accounting only, used by unit tests.
static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+ static ALWAYS_INLINE void IncrementLessCountForTest() {}
static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
private:
@@ -423,8 +437,10 @@
}
// Advance the wrapped pointer by `delta_elems`.
- template <typename T>
- static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+ template <typename T,
+ typename Z,
+ typename = std::enable_if_t<offset_type<Z>, void>>
+ static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
#if GURL_DCHECK_IS_ON() || BUILDFLAG(ENABLE_BACKUP_REF_PTR_SLOW_CHECKS)
uintptr_t address = reinterpret_cast<uintptr_t>(wrapped_ptr);
if (IsSupportedAndNotNull(address))
@@ -445,6 +461,7 @@
// This is for accounting only, used by unit tests.
static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+ static ALWAYS_INLINE void IncrementLessCountForTest() {}
static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
private:
@@ -457,8 +474,17 @@
static BASE_EXPORT NOINLINE void AcquireInternal(uintptr_t address);
static BASE_EXPORT NOINLINE void ReleaseInternal(uintptr_t address);
static BASE_EXPORT NOINLINE bool IsPointeeAlive(uintptr_t address);
- static BASE_EXPORT NOINLINE bool IsValidDelta(uintptr_t address,
- ptrdiff_t delta_in_bytes);
+ template <typename Z, typename = std::enable_if_t<offset_type<Z>, void>>
+ static ALWAYS_INLINE bool IsValidDelta(uintptr_t address, Z delta_in_bytes) {
+ if constexpr (std::is_signed_v<Z>)
+ return IsValidSignedDelta(address, ptrdiff_t{delta_in_bytes});
+ else
+ return IsValidUnsignedDelta(address, size_t{delta_in_bytes});
+ }
+ static BASE_EXPORT NOINLINE bool IsValidSignedDelta(uintptr_t address,
+ ptrdiff_t delta_in_bytes);
+ static BASE_EXPORT NOINLINE bool IsValidUnsignedDelta(uintptr_t address,
+ size_t delta_in_bytes);
};
#endif // BUILDFLAG(USE_BACKUP_REF_PTR)
@@ -510,8 +536,10 @@
}
// Advance the wrapped pointer by `delta_elems`.
- template <typename T>
- static ALWAYS_INLINE T* Advance(T* wrapped_ptr, ptrdiff_t delta_elems) {
+ template <typename T,
+ typename Z,
+ typename = std::enable_if_t<offset_type<Z>, void>>
+ static ALWAYS_INLINE T* Advance(T* wrapped_ptr, Z delta_elems) {
return wrapped_ptr + delta_elems;
}
@@ -524,6 +552,7 @@
// This is for accounting only, used by unit tests.
static ALWAYS_INLINE void IncrementSwapCountForTest() {}
+ static ALWAYS_INLINE void IncrementLessCountForTest() {}
static ALWAYS_INLINE void IncrementPointerToMemberOperatorCountForTest() {}
private:
@@ -657,7 +686,9 @@
using RawPtrBanDanglingIfSupported = internal::RawPtrNoOpImpl;
#endif
-template <typename T, typename Impl = RawPtrBanDanglingIfSupported>
+using DefaultRawPtrImpl = RawPtrBanDanglingIfSupported;
+
+template <typename T, typename Impl = DefaultRawPtrImpl>
class TRIVIAL_ABI GSL_POINTER raw_ptr {
public:
static_assert(raw_ptr_traits::IsSupportedType<T>::value,
@@ -847,11 +878,15 @@
--(*this);
return result;
}
- ALWAYS_INLINE raw_ptr& operator+=(ptrdiff_t delta_elems) {
+ template <typename Z,
+ typename = std::enable_if_t<internal::offset_type<Z>, void>>
+ ALWAYS_INLINE raw_ptr& operator+=(Z delta_elems) {
wrapped_ptr_ = Impl::Advance(wrapped_ptr_, delta_elems);
return *this;
}
- ALWAYS_INLINE raw_ptr& operator-=(ptrdiff_t delta_elems) {
+ template <typename Z,
+ typename = std::enable_if_t<internal::offset_type<Z>, void>>
+ ALWAYS_INLINE raw_ptr& operator-=(Z delta_elems) {
return *this += -delta_elems;
}
@@ -1050,6 +1085,37 @@
return lhs.GetForComparison() >= rhs.GetForComparison();
}
+// Template helpers for working with T* or raw_ptr<T>.
+template <typename T>
+struct IsPointer : std::false_type {};
+
+template <typename T>
+struct IsPointer<T*> : std::true_type {};
+
+template <typename T, typename I>
+struct IsPointer<raw_ptr<T, I>> : std::true_type {};
+
+template <typename T>
+inline constexpr bool IsPointerV = IsPointer<T>::value;
+
+template <typename T>
+struct RemovePointer {
+ using type = T;
+};
+
+template <typename T>
+struct RemovePointer<T*> {
+ using type = T;
+};
+
+template <typename T, typename I>
+struct RemovePointer<raw_ptr<T, I>> {
+ using type = T;
+};
+
+template <typename T>
+using RemovePointerT = typename RemovePointer<T>::type;
+
} // namespace base
using gurl_base::raw_ptr;
@@ -1065,21 +1131,70 @@
// never be dereferenced after becoming dangling.
using DisableDanglingPtrDetection = gurl_base::RawPtrMayDangle;
+// See `docs/dangling_ptr.md`
+// Annotates known dangling raw_ptr. Those haven't been triaged yet. All the
+// occurrences are meant to be removed. See https://cbug.com/1291138.
+using DanglingUntriaged = DisableDanglingPtrDetection;
+
+// The following template parameters are only meaningful when `raw_ptr`
+// is `MTECheckedPtr` (never the case unless a particular GN arg is set
+// true.) `raw_ptr` users need not worry about this and can refer solely
+// to `DisableDanglingPtrDetection` and `DanglingUntriaged` above.
+//
+// The `raw_ptr` definition allows users to specify an implementation.
+// When `MTECheckedPtr` is in play, we need to augment this
+// implementation setting with another layer that allows the `raw_ptr`
+// to degrade into the no-op version.
+#if defined(PA_USE_MTE_CHECKED_PTR_WITH_64_BITS_POINTERS)
+
+// Direct pass-through to no-op implementation.
+using DegradeToNoOpWhenMTE = gurl_base::internal::RawPtrNoOpImpl;
+
+// As above, but with the "untriaged dangling" annotation.
+using DanglingUntriagedDegradeToNoOpWhenMTE = gurl_base::internal::RawPtrNoOpImpl;
+
+// As above, but with the "explicitly disable protection" annotation.
+using DisableDanglingPtrDetectionDegradeToNoOpWhenMTE =
+ gurl_base::internal::RawPtrNoOpImpl;
+
+#else
+
+// Direct pass-through to default implementation specified by `raw_ptr`
+// template.
+using DegradeToNoOpWhenMTE = gurl_base::RawPtrBanDanglingIfSupported;
+
+// Direct pass-through to `DanglingUntriaged`.
+using DanglingUntriagedDegradeToNoOpWhenMTE = DanglingUntriaged;
+
+// Direct pass-through to `DisableDanglingPtrDetection`.
+using DisableDanglingPtrDetectionDegradeToNoOpWhenMTE =
+ DisableDanglingPtrDetection;
+
+#endif // defined(PA_USE_MTE_CHECKED_PTR_WITH_64_BITS_POINTERS)
+
namespace std {
// Override so set/map lookups do not create extra raw_ptr. This also allows
// dangling pointers to be used for lookup.
-template <typename T, typename I>
-struct less<raw_ptr<T, I>> {
+template <typename T, typename Impl>
+struct less<raw_ptr<T, Impl>> {
using is_transparent = void;
- bool operator()(const raw_ptr<T, I>& lhs, const raw_ptr<T, I>& rhs) const {
+ bool operator()(const raw_ptr<T, Impl>& lhs,
+ const raw_ptr<T, Impl>& rhs) const {
+ Impl::IncrementLessCountForTest();
return lhs < rhs;
}
- bool operator()(T* lhs, const raw_ptr<T, I>& rhs) const { return lhs < rhs; }
+ bool operator()(T* lhs, const raw_ptr<T, Impl>& rhs) const {
+ Impl::IncrementLessCountForTest();
+ return lhs < rhs;
+ }
- bool operator()(const raw_ptr<T, I>& lhs, T* rhs) const { return lhs < rhs; }
+ bool operator()(const raw_ptr<T, Impl>& lhs, T* rhs) const {
+ Impl::IncrementLessCountForTest();
+ return lhs < rhs;
+ }
};
} // namespace std
diff --git a/base/strings/escape.cc b/base/strings/escape.cc
index 011b79c..5cd770d 100644
--- a/base/strings/escape.cc
+++ b/base/strings/escape.cc
@@ -61,7 +61,7 @@
escaped.push_back(IntToHex(c >> 4));
escaped.push_back(IntToHex(c & 0xf));
} else {
- escaped.push_back(c);
+ escaped.push_back(static_cast<char>(c));
}
}
return escaped;
@@ -198,8 +198,8 @@
char most_sig_digit(escaped_text[index + 1]);
char least_sig_digit(escaped_text[index + 2]);
if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
- *value =
- HexDigitToInt(most_sig_digit) * 16 + HexDigitToInt(least_sig_digit);
+ *value = static_cast<unsigned char>(HexDigitToInt(most_sig_digit) * 16 +
+ HexDigitToInt(least_sig_digit));
return true;
}
return false;
@@ -236,7 +236,7 @@
}
}
- int32_t char_index = 0;
+ size_t char_index = 0;
// Check if the unicode "character" that was just unescaped is valid.
if (!ReadUnicodeCharacter(reinterpret_cast<char*>(bytes), num_bytes,
&char_index, code_point_out)) {
@@ -253,10 +253,11 @@
// This method takes a Unicode code point and returns true if it should be
// unescaped, based on |rules|.
-bool ShouldUnescapeCodePoint(UnescapeRule::Type rules, uint32_t code_point) {
+bool ShouldUnescapeCodePoint(UnescapeRule::Type rules,
+ base_icu::UChar32 code_point) {
// If this is an ASCII character, use the lookup table.
- if (code_point < 0x80) {
- return kUrlUnescape[code_point] ||
+ if (code_point >= 0 && code_point < 0x80) {
+ return kUrlUnescape[static_cast<size_t>(code_point)] ||
// Allow some additional unescaping when flags are set.
(code_point == ' ' && (rules & UnescapeRule::SPACES)) ||
// Allow any of the prohibited but non-control characters when doing
@@ -418,7 +419,7 @@
// sequences.
unsigned char non_utf8_byte;
if (UnescapeUnsignedByteAtIndex(escaped_text, i, &non_utf8_byte)) {
- result.push_back(non_utf8_byte);
+ result.push_back(static_cast<char>(non_utf8_byte));
if (adjustments)
adjustments->push_back(OffsetAdjuster::Adjustment(i, 3, 1));
i += 3;
@@ -569,7 +570,7 @@
// UnescapeUnsignedByteAtIndex does bounds checking, so this is always safe
// to call.
if (UnescapeUnsignedByteAtIndex(escaped_text, i, &byte)) {
- unescaped_text[output_index++] = byte;
+ unescaped_text[output_index++] = static_cast<char>(byte);
i += 3;
continue;
}
@@ -595,7 +596,7 @@
unescaped_text->clear();
std::set<unsigned char> illegal_encoded_bytes;
- for (char c = '\x00'; c < '\x20'; ++c) {
+ for (unsigned char c = '\x00'; c < '\x20'; ++c) {
illegal_encoded_bytes.insert(c);
}
if (fail_on_path_separators) {
@@ -632,7 +633,7 @@
std::u16string UnescapeForHTML(StringPiece16 input) {
static const struct {
const char* ampersand_code;
- const char replacement;
+ const char16_t replacement;
} kEscapeToChars[] = {
{"<", '<'}, {">", '>'}, {"&", '&'},
{""", '"'}, {"'", '\''},
@@ -648,14 +649,15 @@
++iter) {
if (*iter == '&') {
// Potential ampersand encode char.
- size_t index = iter - text.begin();
+ size_t index = static_cast<size_t>(iter - text.begin());
for (size_t i = 0; i < std::size(kEscapeToChars); i++) {
if (ampersand_chars[i].empty()) {
ampersand_chars[i] = ASCIIToUTF16(kEscapeToChars[i].ampersand_code);
}
if (text.find(ampersand_chars[i], index) == index) {
- text.replace(iter, iter + ampersand_chars[i].length(), 1,
- kEscapeToChars[i].replacement);
+ text.replace(
+ iter, iter + static_cast<ptrdiff_t>(ampersand_chars[i].length()),
+ 1, kEscapeToChars[i].replacement);
break;
}
}
diff --git a/base/strings/escape.h b/base/strings/escape.h
index 57f2f9a..02203be 100644
--- a/base/strings/escape.h
+++ b/base/strings/escape.h
@@ -74,41 +74,39 @@
// functions.
typedef uint32_t Type;
- enum {
- // Don't unescape anything at all.
- NONE = 0,
+ // Don't unescape anything at all.
+ static constexpr Type NONE = 0;
- // Don't unescape anything special, but all normal unescaping will happen.
- // This is a placeholder and can't be combined with other flags (since it's
- // just the absence of them). All other unescape rules imply "normal" in
- // addition to their special meaning. Things like escaped letters, digits,
- // and most symbols will get unescaped with this mode.
- NORMAL = 1 << 0,
+ // Don't unescape anything special, but all normal unescaping will happen.
+ // This is a placeholder and can't be combined with other flags (since it's
+ // just the absence of them). All other unescape rules imply "normal" in
+ // addition to their special meaning. Things like escaped letters, digits,
+ // and most symbols will get unescaped with this mode.
+ static constexpr Type NORMAL = 1 << 0;
- // Convert %20 to spaces. In some places where we're showing URLs, we may
- // want this. In places where the URL may be copied and pasted out, then
- // you wouldn't want this since it might not be interpreted in one piece
- // by other applications. Other UTF-8 spaces will not be unescaped.
- SPACES = 1 << 1,
+ // Convert %20 to spaces. In some places where we're showing URLs, we may
+ // want this. In places where the URL may be copied and pasted out, then
+ // you wouldn't want this since it might not be interpreted in one piece
+ // by other applications. Other UTF-8 spaces will not be unescaped.
+ static constexpr Type SPACES = 1 << 1;
- // Unescapes '/' and '\\'. If these characters were unescaped, the resulting
- // URL won't be the same as the source one. Moreover, they are dangerous to
- // unescape in strings that will be used as file paths or names. This value
- // should only be used when slashes don't have special meaning, like data
- // URLs.
- PATH_SEPARATORS = 1 << 2,
+ // Unescapes '/' and '\\'. If these characters were unescaped, the resulting
+ // URL won't be the same as the source one. Moreover, they are dangerous to
+ // unescape in strings that will be used as file paths or names. This value
+ // should only be used when slashes don't have special meaning, like data
+ // URLs.
+ static constexpr Type PATH_SEPARATORS = 1 << 2;
- // Unescapes various characters that will change the meaning of URLs,
- // including '%', '+', '&', '#'. Does not unescape path separators.
- // If these characters were unescaped, the resulting URL won't be the same
- // as the source one. This flag is used when generating final output like
- // filenames for URLs where we won't be interpreting as a URL and want to do
- // as much unescaping as possible.
- URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS = 1 << 3,
+ // Unescapes various characters that will change the meaning of URLs,
+ // including '%', '+', '&', '#'. Does not unescape path separators.
+ // If these characters were unescaped, the resulting URL won't be the same
+ // as the source one. This flag is used when generating final output like
+ // filenames for URLs where we won't be interpreting as a URL and want to do
+ // as much unescaping as possible.
+ static constexpr Type URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS = 1 << 3;
- // URL queries use "+" for space. This flag controls that replacement.
- REPLACE_PLUS_WITH_SPACE = 1 << 4,
- };
+ // URL queries use "+" for space. This flag controls that replacement.
+ static constexpr Type REPLACE_PLUS_WITH_SPACE = 1 << 4;
};
// Unescapes |escaped_text| and returns the result.
diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc
index d7c9a47..607d6d5 100644
--- a/base/strings/pattern.cc
+++ b/base/strings/pattern.cc
@@ -124,7 +124,7 @@
base_icu::UChar32 operator()(const char** p, const char* end) {
base_icu::UChar32 c;
int offset = 0;
- CBU8_NEXT(*p, offset, end - *p, c);
+ CBU8_NEXT(reinterpret_cast<const uint8_t*>(*p), offset, end - *p, c);
*p += offset;
return c;
}
diff --git a/base/strings/safe_sprintf.cc b/base/strings/safe_sprintf.cc
index 6c9aa19..26a7715 100644
--- a/base/strings/safe_sprintf.cc
+++ b/base/strings/safe_sprintf.cc
@@ -223,8 +223,13 @@
// if |pad| is ' '.
//
// Returns "false", if the |buffer_| overflowed at any time.
- bool IToASCII(bool sign, bool upcase, int64_t i, int base,
- char pad, size_t padding, const char* prefix);
+ bool IToASCII(bool sign,
+ bool upcase,
+ int64_t i,
+ size_t base,
+ char pad,
+ size_t padding,
+ const char* prefix);
private:
// Increments |count_| by |inc| unless this would cause |count_| to
@@ -275,9 +280,13 @@
size_t count_;
};
-
-bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base,
- char pad, size_t padding, const char* prefix) {
+bool Buffer::IToASCII(bool sign,
+ bool upcase,
+ int64_t i,
+ size_t base,
+ char pad,
+ size_t padding,
+ const char* prefix) {
// Sanity check for parameters. None of these should ever fail, but see
// above for the rationale why we can't call GURL_CHECK().
DEBUG_CHECK(base >= 2);
@@ -295,7 +304,7 @@
// if (sign && i < 0)
// prefix = "-";
// num = abs(i);
- int minint = 0;
+ size_t minint = 0;
uint64_t num;
if (sign && i < 0) {
prefix = "-";
@@ -335,7 +344,7 @@
}
} else
prefix = nullptr;
- const size_t prefix_length = reverse_prefix - prefix;
+ const size_t prefix_length = static_cast<size_t>(reverse_prefix - prefix);
// Loop until we have converted the entire number. Output at least one
// character (i.e. '0').
@@ -384,7 +393,8 @@
}
} else {
started = true;
- Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]);
+ Out((upcase ? kUpCaseHexDigits
+ : kDownCaseHexDigits)[num % base + minint]);
}
minint = 0;
@@ -457,13 +467,14 @@
// character from a space ' ' to a zero '0'.
pad = ch == '0' ? '0' : ' ';
for (;;) {
+ const size_t digit = static_cast<size_t>(ch - '0');
// The maximum allowed padding fills all the available address
// space and leaves just enough space to insert the trailing NUL.
const size_t max_padding = kSSizeMax - 1;
- if (padding > max_padding/10 ||
- 10*padding > max_padding - (ch - '0')) {
- DEBUG_CHECK(padding <= max_padding/10 &&
- 10*padding <= max_padding - (ch - '0'));
+ if (padding > max_padding / 10 ||
+ 10 * padding > max_padding - digit) {
+ DEBUG_CHECK(padding <= max_padding / 10 &&
+ 10 * padding <= max_padding - digit);
// Integer overflow detected. Skip the rest of the width until
// we find the format character, then do the normal error handling.
padding_overflow:
@@ -475,7 +486,7 @@
}
goto fail_to_expand;
}
- padding = 10*padding + ch - '0';
+ padding = 10 * padding + digit;
if (padding > max_padding) {
// This doesn't happen for "sane" values of kSSizeMax. But once
// kSSizeMax gets smaller than about 10, our earlier range checks
@@ -552,9 +563,9 @@
} else {
// Pointer values require an actual pointer or a string.
if (arg.type == Arg::POINTER) {
- i = reinterpret_cast<uintptr_t>(arg.ptr);
+ i = static_cast<int64_t>(reinterpret_cast<uintptr_t>(arg.ptr));
} else if (arg.type == Arg::STRING) {
- i = reinterpret_cast<uintptr_t>(arg.str);
+ i = static_cast<int64_t>(reinterpret_cast<uintptr_t>(arg.str));
} else if (arg.type == Arg::INT &&
arg.integer.width == sizeof(NULL) &&
arg.integer.i == 0) { // Allow C++'s version of NULL
diff --git a/base/strings/safe_sprintf.h b/base/strings/safe_sprintf.h
index 8e6c922..a5b242b 100644
--- a/base/strings/safe_sprintf.h
+++ b/base/strings/safe_sprintf.h
@@ -167,7 +167,7 @@
integer.width = sizeof(long);
}
Arg(unsigned long j) : type(UINT) {
- integer.i = j;
+ integer.i = static_cast<int64_t>(j);
integer.width = sizeof(long);
}
Arg(signed long long j) : type(INT) {
diff --git a/base/strings/safe_sprintf_unittest.cc b/base/strings/safe_sprintf_unittest.cc
index 71814b3..ad1cca2 100644
--- a/base/strings/safe_sprintf_unittest.cc
+++ b/base/strings/safe_sprintf_unittest.cc
@@ -337,22 +337,23 @@
// Pointer
char addr[20];
- sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf);
+ snprintf(addr, sizeof(addr), "0x%llX", (unsigned long long)(uintptr_t)buf);
SafeSPrintf(buf, "%p", buf);
EXPECT_EQ(std::string(addr), std::string(buf));
SafeSPrintf(buf, "%p", (const char *)buf);
EXPECT_EQ(std::string(addr), std::string(buf));
- sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)sprintf);
- SafeSPrintf(buf, "%p", sprintf);
+ snprintf(addr, sizeof(addr), "0x%llX",
+ (unsigned long long)(uintptr_t)snprintf);
+ SafeSPrintf(buf, "%p", snprintf);
EXPECT_EQ(std::string(addr), std::string(buf));
// Padding for pointers is a little more complicated because of the "0x"
// prefix. Padding with '0' zeros is relatively straight-forward, but
// padding with ' ' spaces requires more effort.
- sprintf(addr, "0x%017llX", (unsigned long long)(uintptr_t)buf);
+ snprintf(addr, sizeof(addr), "0x%017llX", (unsigned long long)(uintptr_t)buf);
SafeSPrintf(buf, "%019p", buf);
EXPECT_EQ(std::string(addr), std::string(buf));
- sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf);
+ snprintf(addr, sizeof(addr), "0x%llX", (unsigned long long)(uintptr_t)buf);
memset(addr, ' ',
(char*)memmove(addr + sizeof(addr) - strlen(addr) - 1,
addr, strlen(addr)+1) - addr);
@@ -427,8 +428,8 @@
EXPECT_EQ('X', tmp[i]);
// The text that was generated by SafeSPrintf() should always match the
- // equivalent text generated by sprintf(). Please note that the format
- // string for sprintf() is not complicated, as it does not have the
+ // equivalent text generated by snprintf(). Please note that the format
+ // string for snprintf() is not complicated, as it does not have the
// benefit of getting type information from the C++ compiler.
//
// N.B.: It would be so much cleaner to use snprintf(). But unfortunately,
@@ -436,10 +437,10 @@
// are all really awkward.
char ref[256];
GURL_CHECK_LE(sz, sizeof(ref));
- sprintf(ref, "A long string: %%d 00DEADBEEF %lld 0x%llX <NULL>",
- static_cast<long long>(std::numeric_limits<intptr_t>::min()),
- static_cast<unsigned long long>(
- reinterpret_cast<uintptr_t>(PrintLongString)));
+ snprintf(ref, sizeof(ref), "A long string: %%d 00DEADBEEF %lld 0x%llX <NULL>",
+ static_cast<long long>(std::numeric_limits<intptr_t>::min()),
+ static_cast<unsigned long long>(
+ reinterpret_cast<uintptr_t>(PrintLongString)));
ref[sz-1] = '\000';
#if defined(NDEBUG)
@@ -448,7 +449,7 @@
const size_t kSSizeMax = internal::GetSafeSPrintfSSizeMaxForTest();
#endif
- // Compare the output from SafeSPrintf() to the one from sprintf().
+ // Compare the output from SafeSPrintf() to the one from snprintf().
EXPECT_EQ(std::string(ref).substr(0, kSSizeMax-1), std::string(tmp.get()));
// We allocated a slightly larger buffer, so that we could perform some
diff --git a/base/strings/string_number_conversions_internal.h b/base/strings/string_number_conversions_internal.h
index 2abd324..7c1804c 100644
--- a/base/strings/string_number_conversions_internal.h
+++ b/base/strings/string_number_conversions_internal.h
@@ -61,13 +61,13 @@
absl::optional<uint8_t> CharToDigit(CHAR c) {
static_assert(1 <= BASE && BASE <= 36, "BASE needs to be in [1, 36]");
if (c >= '0' && c < '0' + std::min(BASE, 10))
- return c - '0';
+ return static_cast<uint8_t>(c - '0');
if (c >= 'a' && c < 'a' + BASE - 10)
- return c - 'a' + 10;
+ return static_cast<uint8_t>(c - 'a' + 10);
if (c >= 'A' && c < 'A' + BASE - 10)
- return c - 'A' + 10;
+ return static_cast<uint8_t>(c - 'A' + 10);
return absl::nullopt;
}
diff --git a/base/strings/string_piece.cc b/base/strings/string_piece.cc
index f9b9422..e76e6ad 100644
--- a/base/strings/string_piece.cc
+++ b/base/strings/string_piece.cc
@@ -129,7 +129,7 @@
s.begin(), s.end());
if (found == self.end())
return BasicStringPiece<CharT>::npos;
- return found - self.begin();
+ return static_cast<size_t>(found - self.begin());
}
size_t find_first_of(StringPiece16 self, StringPiece16 s, size_t pos) {
diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc
index 8729bf4..8623096 100644
--- a/base/strings/string_util.cc
+++ b/base/strings/string_util.cc
@@ -174,7 +174,8 @@
while (char_index >= 0) {
int32_t prev = char_index;
base_icu::UChar32 code_point = 0;
- CBU8_NEXT(data, char_index, truncation_length, code_point);
+ CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index,
+ truncation_length, code_point);
if (!IsValidCharacter(code_point)) {
char_index = prev - 1;
} else {
@@ -183,7 +184,7 @@
}
if (char_index >= 0 )
- *output = input.substr(0, char_index);
+ *output = input.substr(0, static_cast<size_t>(char_index));
else
output->clear();
}
@@ -254,14 +255,6 @@
return internal::DoIsStringUTF8<IsValidCodepoint>(str);
}
-bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
- return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
-}
-
-bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
- return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
-}
-
bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
}
diff --git a/base/strings/string_util.h b/base/strings/string_util.h
index 435b5c6..c3224f1 100644
--- a/base/strings/string_util.h
+++ b/base/strings/string_util.h
@@ -337,13 +337,6 @@
BASE_EXPORT bool IsStringASCII(WStringPiece str);
#endif
-// Compare the lower-case form of the given string against the given
-// previously-lower-cased ASCII string (typically a constant).
-BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
- StringPiece lowercase_ascii);
-BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
- StringPiece lowercase_ascii);
-
// Performs a case-sensitive string compare of the given 16-bit string against
// the given 8-bit ASCII string (typically a constant). The behavior is
// undefined if the |ascii| string is not ASCII.
diff --git a/base/strings/string_util_internal.h b/base/strings/string_util_internal.h
index 173cfaa..8608eba 100644
--- a/base/strings/string_util_internal.h
+++ b/base/strings/string_util_internal.h
@@ -242,28 +242,6 @@
return true;
}
-// Implementation note: Normally this function will be called with a hardcoded
-// constant for the lowercase_ascii parameter. Constructing a StringPiece from
-// a C constant requires running strlen, so the result will be two passes
-// through the buffers, one to file the length of lowercase_ascii, and one to
-// compare each letter.
-//
-// This function could have taken a const char* to avoid this and only do one
-// pass through the string. But the strlen is faster than the case-insensitive
-// compares and lets us early-exit in the case that the strings are different
-// lengths (will often be the case for non-matches). So whether one approach or
-// the other will be faster depends on the case.
-//
-// The hardcoded strings are typically very short so it doesn't matter, and the
-// string piece gives additional flexibility for the caller (doesn't have to be
-// null terminated) so we choose the StringPiece route.
-template <typename T, typename CharT = typename T::value_type>
-inline bool DoLowerCaseEqualsASCII(T str, StringPiece lowercase_ascii) {
- return std::equal(
- str.begin(), str.end(), lowercase_ascii.begin(), lowercase_ascii.end(),
- [](auto lhs, auto rhs) { return ToLowerASCII(lhs) == rhs; });
-}
-
template <typename T, typename CharT = typename T::value_type>
bool StartsWithT(T str, T search_for, CompareCase case_sensitivity) {
if (search_for.size() > str.size())
diff --git a/base/strings/string_util_unittest.cc b/base/strings/string_util_unittest.cc
index f41f6d8..4d2ac71 100644
--- a/base/strings/string_util_unittest.cc
+++ b/base/strings/string_util_unittest.cc
@@ -660,22 +660,6 @@
EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
}
-TEST(StringUtilTest, LowerCaseEqualsASCII) {
- static const struct {
- const char* src_a;
- const char* dst;
- } lowercase_cases[] = {
- { "FoO", "foo" },
- { "foo", "foo" },
- { "FOO", "foo" },
- };
-
- for (const auto& i : lowercase_cases) {
- EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(i.src_a), i.dst));
- EXPECT_TRUE(LowerCaseEqualsASCII(i.src_a, i.dst));
- }
-}
-
TEST(StringUtilTest, FormatBytesUnlocalized) {
static const struct {
int64_t bytes;
diff --git a/base/strings/string_util_win.cc b/base/strings/string_util_win.cc
index 7a9b891..7ab9061 100644
--- a/base/strings/string_util_win.cc
+++ b/base/strings/string_util_win.cc
@@ -71,10 +71,6 @@
return input.find_first_not_of(characters) == StringPiece::npos;
}
-bool LowerCaseEqualsASCII(WStringPiece str, StringPiece lowercase_ascii) {
- return internal::DoLowerCaseEqualsASCII(str, lowercase_ascii);
-}
-
bool EqualsASCII(WStringPiece str, StringPiece ascii) {
return std::equal(ascii.begin(), ascii.end(), str.begin(), str.end());
}
diff --git a/base/strings/string_util_win.h b/base/strings/string_util_win.h
index fbf815b..6b5fba3 100644
--- a/base/strings/string_util_win.h
+++ b/base/strings/string_util_win.h
@@ -157,9 +157,6 @@
BASE_EXPORT bool ContainsOnlyChars(WStringPiece input, WStringPiece characters);
-BASE_EXPORT bool LowerCaseEqualsASCII(WStringPiece str,
- StringPiece lowercase_ascii);
-
BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
BASE_EXPORT bool StartsWith(
diff --git a/base/strings/stringprintf.cc b/base/strings/stringprintf.cc
index e0c5e20..8de48f3 100644
--- a/base/strings/stringprintf.cc
+++ b/base/strings/stringprintf.cc
@@ -65,14 +65,14 @@
int result = vsnprintfT(stack_buf, std::size(stack_buf), format, ap_copy);
va_end(ap_copy);
- if (result >= 0 && result < static_cast<int>(std::size(stack_buf))) {
+ if (result >= 0 && static_cast<size_t>(result) < std::size(stack_buf)) {
// It fit.
- dst->append(stack_buf, result);
+ dst->append(stack_buf, static_cast<size_t>(result));
return;
}
// Repeatedly increase buffer size until it fits.
- int mem_length = std::size(stack_buf);
+ size_t mem_length = std::size(stack_buf);
while (true) {
if (result < 0) {
#if BUILDFLAG(IS_WIN)
@@ -88,7 +88,7 @@
#endif
} else {
// We need exactly "result + 1" characters.
- mem_length = result + 1;
+ mem_length = static_cast<size_t>(result) + 1;
}
if (mem_length > 32 * 1024 * 1024) {
@@ -107,9 +107,9 @@
result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
va_end(ap_copy);
- if ((result >= 0) && (result < mem_length)) {
+ if ((result >= 0) && (static_cast<size_t>(result) < mem_length)) {
// It fit.
- dst->append(&mem_buf[0], result);
+ dst->append(&mem_buf[0], static_cast<size_t>(result));
return;
}
}
diff --git a/base/strings/sys_string_conversions_win.cc b/base/strings/sys_string_conversions_win.cc
index 3f08956..c0b4829 100644
--- a/base/strings/sys_string_conversions_win.cc
+++ b/base/strings/sys_string_conversions_win.cc
@@ -42,7 +42,7 @@
return std::wstring();
std::wstring wide;
- wide.resize(charcount);
+ wide.resize(static_cast<size_t>(charcount));
MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount);
return wide;
@@ -61,7 +61,7 @@
return std::string();
std::string mb;
- mb.resize(charcount);
+ mb.resize(static_cast<size_t>(charcount));
WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
&mb[0], charcount, NULL, NULL);
diff --git a/base/strings/utf_offset_string_conversions.cc b/base/strings/utf_offset_string_conversions.cc
index e6bb6d6..b67b6a7 100644
--- a/base/strings/utf_offset_string_conversions.cc
+++ b/base/strings/utf_offset_string_conversions.cc
@@ -39,7 +39,8 @@
GURL_DCHECK(offset);
if (*offset == std::u16string::npos)
return;
- int adjustment = 0;
+ size_t original_lengths = 0;
+ size_t output_lengths = 0;
for (const auto& i : adjustments) {
if (*offset <= i.original_offset)
break;
@@ -47,9 +48,10 @@
*offset = std::u16string::npos;
return;
}
- adjustment += static_cast<int>(i.original_length - i.output_length);
+ original_lengths += i.original_length;
+ output_lengths += i.output_length;
}
- *offset -= adjustment;
+ *offset += output_lengths - original_lengths;
if (*offset > limit)
*offset = std::u16string::npos;
@@ -70,17 +72,20 @@
size_t* offset) {
if (*offset == std::u16string::npos)
return;
- int adjustment = 0;
+ size_t original_lengths = 0;
+ size_t output_lengths = 0;
for (const auto& i : adjustments) {
- if (*offset + adjustment <= i.original_offset)
+ if (*offset + original_lengths - output_lengths <= i.original_offset)
break;
- adjustment += static_cast<int>(i.original_length - i.output_length);
- if ((*offset + adjustment) < (i.original_offset + i.original_length)) {
+ original_lengths += i.original_length;
+ output_lengths += i.output_length;
+ if ((*offset + original_lengths - output_lengths) <
+ (i.original_offset + i.original_length)) {
*offset = std::u16string::npos;
return;
}
}
- *offset += adjustment;
+ *offset += original_lengths - output_lengths;
}
// static
@@ -149,15 +154,15 @@
// <=
// adjusted_iter->original_offset + shift +
// adjusted_iter->original_length
-
// Modify the current |adjusted_iter| to include whatever collapsing
// happened in |first_iter|, then advance to the next |first_adjustments|
// because we dealt with the current one.
- const int collapse = static_cast<int>(first_iter->original_length) -
- static_cast<int>(first_iter->output_length);
+
// This function does not know how to deal with a string that expands and
// then gets modified, only strings that collapse and then get modified.
- GURL_DCHECK_GT(collapse, 0);
+ GURL_DCHECK_GT(first_iter->original_length, first_iter->output_length);
+ const size_t collapse =
+ first_iter->original_length - first_iter->output_length;
adjusted_iter->original_length += collapse;
currently_collapsing += collapse;
++first_iter;
@@ -188,14 +193,12 @@
OffsetAdjuster::Adjustments* adjustments) {
if (adjustments)
adjustments->clear();
- // ICU requires 32-bit numbers.
bool success = true;
- int32_t src_len32 = static_cast<int32_t>(src_len);
- for (int32_t i = 0; i < src_len32; i++) {
+ for (size_t i = 0; i < src_len; i++) {
base_icu::UChar32 code_point;
size_t original_i = i;
size_t chars_written = 0;
- if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
+ if (ReadUnicodeCharacter(src, src_len, &i, &code_point)) {
chars_written = WriteUnicodeCharacter(code_point, output);
} else {
chars_written = WriteUnicodeCharacter(0xFFFD, output);
diff --git a/base/strings/utf_string_conversion_utils.cc b/base/strings/utf_string_conversion_utils.cc
index 76dd725..3ddcc7b 100644
--- a/base/strings/utf_string_conversion_utils.cc
+++ b/base/strings/utf_string_conversion_utils.cc
@@ -12,11 +12,12 @@
// ReadUnicodeCharacter --------------------------------------------------------
bool ReadUnicodeCharacter(const char* src,
- int32_t src_len,
- int32_t* char_index,
+ size_t src_len,
+ size_t* char_index,
base_icu::UChar32* code_point_out) {
base_icu::UChar32 code_point;
- CBU8_NEXT(src, *char_index, src_len, code_point);
+ CBU8_NEXT(reinterpret_cast<const uint8_t*>(src), *char_index, src_len,
+ code_point);
*code_point_out = code_point;
// The ICU macro above moves to the next char, we want to point to the last
@@ -28,13 +29,12 @@
}
bool ReadUnicodeCharacter(const char16_t* src,
- int32_t src_len,
- int32_t* char_index,
+ size_t src_len,
+ size_t* char_index,
base_icu::UChar32* code_point) {
if (CBU16_IS_SURROGATE(src[*char_index])) {
- if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
- *char_index + 1 >= src_len ||
- !CBU16_IS_TRAIL(src[*char_index + 1])) {
+ if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) || !src_len ||
+ *char_index >= src_len - 1 || !CBU16_IS_TRAIL(src[*char_index + 1])) {
// Invalid surrogate pair.
return false;
}
@@ -53,8 +53,8 @@
#if defined(WCHAR_T_IS_UTF32)
bool ReadUnicodeCharacter(const wchar_t* src,
- int32_t src_len,
- int32_t* char_index,
+ size_t src_len,
+ size_t* char_index,
base_icu::UChar32* code_point) {
// Conversion is easy since the source is 32-bit.
*code_point = src[*char_index];
@@ -66,20 +66,21 @@
// WriteUnicodeCharacter -------------------------------------------------------
-size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) {
- if (code_point <= 0x7f) {
+size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
+ std::string* output) {
+ if (code_point >= 0 && code_point <= 0x7f) {
// Fast path the common case of one byte.
output->push_back(static_cast<char>(code_point));
return 1;
}
-
// CBU8_APPEND_UNSAFE can append up to 4 bytes.
size_t char_offset = output->length();
size_t original_char_offset = char_offset;
output->resize(char_offset + CBU8_MAX_LENGTH);
- CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
+ CBU8_APPEND_UNSAFE(reinterpret_cast<uint8_t*>(output->data()), char_offset,
+ code_point);
// CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
// it will represent the new length of the string.
@@ -87,9 +88,10 @@
return char_offset - original_char_offset;
}
-size_t WriteUnicodeCharacter(uint32_t code_point, std::u16string* output) {
+size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
+ std::u16string* output) {
if (CBU16_LENGTH(code_point) == 1) {
- // Thie code point is in the Basic Multilingual Plane (BMP).
+ // The code point is in the Basic Multilingual Plane (BMP).
output->push_back(static_cast<char16_t>(code_point));
return 1;
}
diff --git a/base/strings/utf_string_conversion_utils.h b/base/strings/utf_string_conversion_utils.h
index 877d264..8c209a2 100644
--- a/base/strings/utf_string_conversion_utils.h
+++ b/base/strings/utf_string_conversion_utils.h
@@ -49,21 +49,21 @@
//
// Returns true on success. On false, |*code_point| will be invalid.
BASE_EXPORT bool ReadUnicodeCharacter(const char* src,
- int32_t src_len,
- int32_t* char_index,
+ size_t src_len,
+ size_t* char_index,
base_icu::UChar32* code_point_out);
// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
BASE_EXPORT bool ReadUnicodeCharacter(const char16_t* src,
- int32_t src_len,
- int32_t* char_index,
+ size_t src_len,
+ size_t* char_index,
base_icu::UChar32* code_point);
#if defined(WCHAR_T_IS_UTF32)
// Reads UTF-32 character. The usage is the same as the 8-bit version above.
BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src,
- int32_t src_len,
- int32_t* char_index,
+ size_t src_len,
+ size_t* char_index,
base_icu::UChar32* code_point);
#endif // defined(WCHAR_T_IS_UTF32)
@@ -71,20 +71,21 @@
// Appends a UTF-8 character to the given 8-bit string. Returns the number of
// bytes written.
-BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
+BASE_EXPORT size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
std::string* output);
// Appends the given code point as a UTF-16 character to the given 16-bit
// string. Returns the number of 16-bit values written.
-BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
+BASE_EXPORT size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
std::u16string* output);
#if defined(WCHAR_T_IS_UTF32)
// Appends the given UTF-32 character to the given 32-bit string. Returns the
// number of 32-bit values written.
-inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) {
+inline size_t WriteUnicodeCharacter(base_icu::UChar32 code_point,
+ std::wstring* output) {
// This is the easy case, just append the character.
- output->push_back(code_point);
+ output->push_back(static_cast<wchar_t>(code_point));
return 1;
}
#endif // defined(WCHAR_T_IS_UTF32)
diff --git a/base/third_party/icu/icu_utf.h b/base/third_party/icu/icu_utf.h
index 16792c4..0b50b71 100644
--- a/base/third_party/icu/icu_utf.h
+++ b/base/third_party/icu/icu_utf.h
@@ -118,7 +118,7 @@
* @return TRUE or FALSE
* @stable ICU 2.4
*/
-#define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+#define CBU_IS_SURROGATE(c) (((uint32_t)(c)&0xfffff800) == 0xd800)
/**
* Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
@@ -278,25 +278,27 @@
* @see U8_APPEND
* @stable ICU 2.4
*/
-#define CBU8_APPEND_UNSAFE(s, i, c) CBUPRV_BLOCK_MACRO_BEGIN { \
- uint32_t __uc=(c); \
- if(__uc<=0x7f) { \
- (s)[(i)++]=(uint8_t)__uc; \
- } else { \
- if(__uc<=0x7ff) { \
- (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
- } else { \
- if(__uc<=0xffff) { \
- (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
- } else { \
- (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
- (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
- } \
- (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
- } \
- (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
- } \
-} CBUPRV_BLOCK_MACRO_END
+#define CBU8_APPEND_UNSAFE(s, i, c) \
+ CBUPRV_BLOCK_MACRO_BEGIN { \
+ uint32_t __uc = (uint32_t)(c); \
+ if (__uc <= 0x7f) { \
+ (s)[(i)++] = (uint8_t)__uc; \
+ } else { \
+ if (__uc <= 0x7ff) { \
+ (s)[(i)++] = (uint8_t)((__uc >> 6) | 0xc0); \
+ } else { \
+ if (__uc <= 0xffff) { \
+ (s)[(i)++] = (uint8_t)((__uc >> 12) | 0xe0); \
+ } else { \
+ (s)[(i)++] = (uint8_t)((__uc >> 18) | 0xf0); \
+ (s)[(i)++] = (uint8_t)(((__uc >> 12) & 0x3f) | 0x80); \
+ } \
+ (s)[(i)++] = (uint8_t)(((__uc >> 6) & 0x3f) | 0x80); \
+ } \
+ (s)[(i)++] = (uint8_t)((__uc & 0x3f) | 0x80); \
+ } \
+ } \
+ CBUPRV_BLOCK_MACRO_END
// source/common/unicode/utf16.h
@@ -314,7 +316,7 @@
* @return TRUE or FALSE
* @stable ICU 2.4
*/
-#define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+#define CBU16_IS_LEAD(c) (((uint32_t)(c)&0xfffffc00) == 0xd800)
/**
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
@@ -322,7 +324,7 @@
* @return TRUE or FALSE
* @stable ICU 2.4
*/
-#define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+#define CBU16_IS_TRAIL(c) (((uint32_t)(c)&0xfffffc00) == 0xdc00)
/**
* Is this code unit a surrogate (U+d800..U+dfff)?
diff --git a/copy.bara.sky b/copy.bara.sky
index b1d8df5..a0c899b 100644
--- a/copy.bara.sky
+++ b/copy.bara.sky
@@ -27,6 +27,8 @@
"base/i18n/uchar.h",
"base/memory/raw_ptr.h",
"base/memory/raw_ptr_exclusion.h",
+ "base/numerics/safe_conversions.h",
+ "base/numerics/safe_conversions_impl.h",
"base/no_destructor.h",
"base/ranges/*.h",
"base/stl_util.h",
diff --git a/url/gurl.h b/url/gurl.h
index 10ba992..97a60ab 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -470,14 +470,16 @@
// Returns the substring of the input identified by the given component.
std::string ComponentString(const url::Component& comp) const {
- if (comp.len <= 0)
+ if (!comp.is_nonempty())
return std::string();
- return std::string(spec_, comp.begin, comp.len);
+ return std::string(spec_, static_cast<size_t>(comp.begin),
+ static_cast<size_t>(comp.len));
}
gurl_base::StringPiece ComponentStringPiece(const url::Component& comp) const {
- if (comp.len <= 0)
+ if (!comp.is_nonempty())
return gurl_base::StringPiece();
- return gurl_base::StringPiece(&spec_[comp.begin], comp.len);
+ return gurl_base::StringPiece(&spec_[static_cast<size_t>(comp.begin)],
+ static_cast<size_t>(comp.len));
}
void ProcessFileSystemURLAfterReplaceComponents();
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index b45cea0..c3ebddd 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc
@@ -101,7 +101,7 @@
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- if (scheme.len <= 0) {
+ if (!scheme.is_nonempty()) {
// Scheme is unspecified or empty, convert to empty by appending a colon.
*out_scheme = Component(output->length(), 0);
output->push_back(':');
@@ -117,12 +117,13 @@
// FindAndCompareScheme, which could cause some security checks on
// schemes to be incorrect.
bool success = true;
- int end = scheme.end();
- for (int i = scheme.begin; i < end; i++) {
+ size_t begin = static_cast<size_t>(scheme.begin);
+ size_t end = static_cast<size_t>(scheme.end());
+ for (size_t i = begin; i < end; i++) {
UCHAR ch = static_cast<UCHAR>(spec[i]);
char replacement = 0;
if (ch < 0x80) {
- if (i == scheme.begin) {
+ if (i == begin) {
// Need to do a special check for the first letter of the scheme.
if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
replacement = kSchemeCanonical[ch];
@@ -179,8 +180,9 @@
out_username->begin = output->length();
if (username.len > 0) {
// This will escape characters not valid for the username.
- AppendStringOfType(&username_spec[username.begin], username.len,
- CHAR_USERINFO, output);
+ AppendStringOfType(&username_spec[username.begin],
+ static_cast<size_t>(username.len), CHAR_USERINFO,
+ output);
}
out_username->len = output->length() - out_username->begin;
@@ -189,8 +191,9 @@
if (password.len > 0) {
output->push_back(':');
out_password->begin = output->length();
- AppendStringOfType(&password_spec[password.begin], password.len,
- CHAR_USERINFO, output);
+ AppendStringOfType(&password_spec[password.begin],
+ static_cast<size_t>(password.len), CHAR_USERINFO,
+ output);
out_password->len = output->length() - out_password->begin;
} else {
*out_password = Component();
@@ -223,7 +226,8 @@
// what the error was, and mark the URL as invalid by returning false.
output->push_back(':');
out_port->begin = output->length();
- AppendInvalidNarrowString(spec, port.begin, port.end(), output);
+ AppendInvalidNarrowString(spec, static_cast<size_t>(port.begin),
+ static_cast<size_t>(port.end()), output);
out_port->len = output->length() - out_port->begin;
return false;
}
@@ -285,7 +289,7 @@
const Component& ref,
CanonOutput* output,
Component* out_ref) {
- if (ref.len < 0) {
+ if (!ref.is_valid()) {
// Common case of no ref.
*out_ref = Component();
return;
@@ -297,8 +301,8 @@
out_ref->begin = output->length();
// Now iterate through all the characters, converting to UTF-8 and validating.
- int end = ref.end();
- for (int i = ref.begin; i < end; i++) {
+ size_t end = static_cast<size_t>(ref.end());
+ for (size_t i = static_cast<size_t>(ref.begin); i < end; i++) {
UCHAR current_char = static_cast<UCHAR>(spec[i]);
if (current_char < 0x80) {
if (kShouldEscapeCharInFragment[current_char])
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index 370dd77..edc9d67 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -123,15 +123,15 @@
// |*has_non_ascii| flag.
//
// The return value indicates if the output is a potentially valid host name.
-template<typename INCHAR, typename OUTCHAR>
+template <typename INCHAR, typename OUTCHAR>
bool DoSimpleHost(const INCHAR* host,
- int host_len,
+ size_t host_len,
CanonOutputT<OUTCHAR>* output,
bool* has_non_ascii) {
*has_non_ascii = false;
bool success = true;
- for (int i = 0; i < host_len; ++i) {
+ for (size_t i = 0; i < host_len; ++i) {
unsigned int source = host[i];
if (source == '%') {
// Unescape first, if possible.
@@ -175,7 +175,7 @@
}
// Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const char16_t* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
int original_output_len = output->length(); // So we can rewind below.
// We need to escape URL before doing IDN conversion, since punicode strings
@@ -202,8 +202,8 @@
// unescaping. Although we unescaped everything before this function call, if
// somebody does %00 as fullwidth, ICU will convert this to ASCII.
bool success = DoSimpleHost(wide_output.data(),
- wide_output.length(),
- output, &has_non_ascii);
+ static_cast<size_t>(wide_output.length()), output,
+ &has_non_ascii);
if (has_non_ascii) {
// ICU generated something that DoSimpleHost didn't think looked like
// ASCII. This is quite rare, but ICU might convert some characters to
@@ -220,7 +220,8 @@
// ASCII isn't strictly necessary, but DoSimpleHost handles this case
// anyway so we handle it/
output->set_length(original_output_len);
- AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
+ AppendInvalidNarrowString(wide_output.data(), 0,
+ static_cast<size_t>(wide_output.length()),
output);
return false;
}
@@ -230,8 +231,11 @@
// 8-bit convert host to its ASCII version: this converts the UTF-8 input to
// UTF-16. The has_escaped flag should be set if the input string requires
// unescaping.
-bool DoComplexHost(const char* host, int host_len,
- bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char* host,
+ size_t host_len,
+ bool has_non_ascii,
+ bool has_escaped,
+ CanonOutput* output) {
// Save the current position in the output. We may write stuff and rewind it
// below, so we need to know where to rewind to.
int begin_length = output->length();
@@ -239,7 +243,7 @@
// Points to the UTF-8 data we want to convert. This will either be the
// input or the unescaped version written to |*output| if necessary.
const char* utf8_source;
- int utf8_source_len;
+ size_t utf8_source_len;
bool are_all_escaped_valid = true;
if (has_escaped) {
// Unescape before converting to UTF-16 for IDN. We write this into the
@@ -264,7 +268,7 @@
// Save the pointer into the data was just converted (it may be appended to
// other data in the output buffer).
utf8_source = &output->data()[begin_length];
- utf8_source_len = output->length() - begin_length;
+ utf8_source_len = static_cast<size_t>(output->length() - begin_length);
} else {
// We don't need to unescape, use input for IDNization later. (We know the
// input has non-ASCII, or the simple version would have been called
@@ -280,17 +284,18 @@
if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
// In this error case, the input may or may not be the output.
StackBuffer utf8;
- for (int i = 0; i < utf8_source_len; i++)
+ for (size_t i = 0; i < utf8_source_len; i++)
utf8.push_back(utf8_source[i]);
output->set_length(begin_length);
- AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
+ AppendInvalidNarrowString(utf8.data(), 0,
+ static_cast<size_t>(utf8.length()), output);
return false;
}
output->set_length(begin_length);
// This will call DoSimpleHost which will do normal ASCII canonicalization
// and also check for IP addresses in the outpt.
- return DoIDNHost(utf16.data(), utf16.length(), output) &&
+ return DoIDNHost(utf16.data(), static_cast<size_t>(utf16.length()), output) &&
are_all_escaped_valid;
}
@@ -298,7 +303,7 @@
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
bool DoComplexHost(const char16_t* host,
- int host_len,
+ size_t host_len,
bool has_non_ascii,
bool has_escaped,
CanonOutput* output) {
@@ -319,8 +324,8 @@
// Once we convert to UTF-8, we can use the 8-bit version of the complex
// host handling code above.
- return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii,
- has_escaped, output);
+ return DoComplexHost(utf8.data(), static_cast<size_t>(utf8.length()),
+ has_non_ascii, has_escaped, output);
}
// No unescaping necessary, we can safely pass the input to ICU. This
@@ -334,16 +339,18 @@
bool DoHostSubstring(const CHAR* spec,
const Component& host,
CanonOutput* output) {
+ GURL_DCHECK(host.is_valid());
+
bool has_non_ascii, has_escaped;
ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
if (has_non_ascii || has_escaped) {
- return DoComplexHost(&spec[host.begin], host.len, has_non_ascii,
- has_escaped, output);
+ return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
+ has_non_ascii, has_escaped, output);
}
- const bool success =
- DoSimpleHost(&spec[host.begin], host.len, output, &has_non_ascii);
+ const bool success = DoSimpleHost(
+ &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
GURL_DCHECK(!has_non_ascii);
return success;
}
@@ -353,7 +360,7 @@
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- if (host.len <= 0) {
+ if (!host.is_nonempty()) {
// Empty hosts don't need anything.
host_info->family = CanonHostInfo::NEUTRAL;
host_info->out_host = Component();
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index bbb27a0..f6b4b03 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -11,17 +11,19 @@
#include <cstdio>
#include <string>
+#include "base/numerics/safe_conversions.h"
#include "base/strings/utf_string_conversion_utils.h"
namespace url {
namespace {
-template<typename CHAR, typename UCHAR>
-void DoAppendStringOfType(const CHAR* source, int length,
+template <typename CHAR, typename UCHAR>
+void DoAppendStringOfType(const CHAR* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
- for (int i = 0; i < length; i++) {
+ for (size_t i = 0; i < length; i++) {
if (static_cast<UCHAR>(source[i]) >= 0x80) {
// ReadChar will fill the code point with kUnicodeReplacementCharacter
// when the input is invalid, which is what we want.
@@ -41,10 +43,12 @@
// This function assumes the input values are all contained in 8-bit,
// although it allows any type. Returns true if input is valid, false if not.
-template<typename CHAR, typename UCHAR>
-void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
+template <typename CHAR, typename UCHAR>
+void DoAppendInvalidNarrowString(const CHAR* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
- for (int i = begin; i < end; i++) {
+ for (size_t i = begin; i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (uch >= 0x80) {
// Handle UTF-8/16 encodings. This call will correctly handle the error
@@ -98,7 +102,8 @@
// Convert to UTF-8.
dest_component->begin = utf8_buffer->length();
success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
- override_component.len, utf8_buffer);
+ static_cast<size_t>(override_component.len),
+ utf8_buffer);
dest_component->len = utf8_buffer->length() - dest_component->begin;
}
}
@@ -235,26 +240,24 @@
const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
}
void AppendStringOfType(const char16_t* source,
- int length,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
bool ReadUTFChar(const char* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out) {
- // This depends on ints and int32s being the same thing. If they're not, it
- // will fail to compile.
- // TODO(mmenke): This should probably be fixed.
if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
!gurl_base::IsValidCharacter(*code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
@@ -264,12 +267,9 @@
}
bool ReadUTFChar(const char16_t* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out) {
- // This depends on ints and int32s being the same thing. If they're not, it
- // will fail to compile.
- // TODO(mmenke): This should probably be fixed.
if (!gurl_base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
!gurl_base::IsValidCharacter(*code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
@@ -278,23 +278,25 @@
return true;
}
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
}
void AppendInvalidNarrowString(const char16_t* spec,
- int begin,
- int end,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
}
bool ConvertUTF16ToUTF8(const char16_t* input,
- int input_len,
+ size_t input_len,
CanonOutput* output) {
bool success = true;
- for (int i = 0; i < input_len; i++) {
+ for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
success &= ReadUTFChar(input, &i, input_len, &code_point);
AppendUTF8Value(code_point, output);
@@ -303,10 +305,10 @@
}
bool ConvertUTF8ToUTF16(const char* input,
- int input_len,
+ size_t input_len,
CanonOutputT<char16_t>* output) {
bool success = true;
- for (int i = 0; i < input_len; i++) {
+ for (size_t i = 0; i < input_len; i++) {
base_icu::UChar32 code_point;
success &= ReadUTFChar(input, &i, input_len, &code_point);
AppendUTF16Value(code_point, output);
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 807ddc5..a41a771 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -77,11 +77,12 @@
// Appends the given string to the output, escaping characters that do not
// match the given |type| in SharedCharTypes.
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output);
void AppendStringOfType(const char16_t* source,
- int length,
+ size_t length,
SharedCharTypes type,
CanonOutput* output);
@@ -107,8 +108,8 @@
// Indicates if the given character is a dot or dot equivalent, returning the
// number of characters taken by it. This will be one for a literal dot, 3 for
// an escaped dot. If the character is not a dot, this will return 0.
-template<typename CHAR>
-inline int IsDot(const CHAR* spec, int offset, int end) {
+template <typename CHAR>
+inline size_t IsDot(const CHAR* spec, size_t offset, size_t end) {
if (spec[offset] == '.') {
return 1;
} else if (spec[offset] == '%' && offset + 3 <= end &&
@@ -154,8 +155,8 @@
// (for a single-byte ASCII character, it will not be changed).
COMPONENT_EXPORT(URL)
bool ReadUTFChar(const char* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out);
// Generic To-UTF-8 converter. This will call the given append method for each
@@ -231,8 +232,8 @@
// (for a single-16-bit-word character, it will not be changed).
COMPONENT_EXPORT(URL)
bool ReadUTFChar(const char16_t* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
base_icu::UChar32* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
@@ -268,8 +269,8 @@
// Assumes that ch[begin] is within range in the array, but does not assume
// that any following characters are.
inline bool AppendUTF8EscapedChar(const char16_t* str,
- int* begin,
- int length,
+ size_t* begin,
+ size_t length,
CanonOutput* output) {
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
@@ -281,7 +282,9 @@
}
// Handles UTF-8 input. See the wide version above for usage.
-inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
+inline bool AppendUTF8EscapedChar(const char* str,
+ size_t* begin,
+ size_t length,
CanonOutput* output) {
// ReadUTF8Char will handle invalid characters for us and give us the
// kUnicodeReplacementCharacter, so we don't have to do special checking
@@ -308,8 +311,10 @@
return c <= 255;
}
-template<typename CHAR>
-inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
+template <typename CHAR>
+inline bool DecodeEscaped(const CHAR* spec,
+ size_t* begin,
+ size_t end,
unsigned char* unescaped_value) {
if (*begin + 3 > end ||
!Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
@@ -338,11 +343,13 @@
// This is used in error cases to append invalid output so that it looks
// approximately correct. Non-error cases should not call this function since
// the escaping rules are not guaranteed!
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output);
void AppendInvalidNarrowString(const char16_t* spec,
- int begin,
- int end,
+ size_t begin,
+ size_t end,
CanonOutput* output);
// Misc canonicalization helpers ----------------------------------------------
@@ -357,11 +364,11 @@
// normal.
COMPONENT_EXPORT(URL)
bool ConvertUTF16ToUTF8(const char16_t* input,
- int input_len,
+ size_t input_len,
CanonOutput* output);
COMPONENT_EXPORT(URL)
bool ConvertUTF8ToUTF16(const char* input,
- int input_len,
+ size_t input_len,
CanonOutputT<char16_t>* output);
// Converts from UTF-16 to 8-bit using the character set converter. If the
diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc
index f4fe2b4..ff62bea 100644
--- a/url/url_canon_mailtourl.cc
+++ b/url/url_canon_mailtourl.cc
@@ -57,8 +57,8 @@
// Copy the path using path URL's more lax escaping rules.
// We convert to UTF-8 and escape non-ASCII, but leave most
// ASCII characters alone.
- int end = parsed.path.end();
- for (int i = parsed.path.begin; i < end; ++i) {
+ size_t end = static_cast<size_t>(parsed.path.end());
+ for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
UCHAR uch = static_cast<UCHAR>(source.path[i]);
if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
success &= AppendUTF8EscapedChar(source.path, &i, end, output);
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index e043043..32cb5f3 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -101,9 +101,11 @@
// If the input is "../foo", |after_dot| = 1, |end| = 6, and
// at the end, |*consumed_len| = 2 for the "./" this function consumed. The
// original dot length should be handled by the caller.
-template<typename CHAR>
-DotDisposition ClassifyAfterDot(const CHAR* spec, int after_dot,
- int end, int* consumed_len) {
+template <typename CHAR>
+DotDisposition ClassifyAfterDot(const CHAR* spec,
+ size_t after_dot,
+ size_t end,
+ size_t* consumed_len) {
if (after_dot == end) {
// Single dot at the end.
*consumed_len = 0;
@@ -115,9 +117,9 @@
return DIRECTORY_CUR;
}
- int second_dot_len = IsDot(spec, after_dot, end);
+ size_t second_dot_len = IsDot(spec, after_dot, end);
if (second_dot_len) {
- int after_second_dot = after_dot + second_dot_len;
+ size_t after_second_dot = after_dot + second_dot_len;
if (after_second_dot == end) {
// Double dot at the end.
*consumed_len = second_dot_len;
@@ -193,10 +195,10 @@
// ends with a '%' followed by one or two characters, and the '%' is the one
// pointed to by |last_invalid_percent_index|. The last character in the string
// was just unescaped.
-template<typename CHAR>
+template <typename CHAR>
void CheckForNestedEscapes(const CHAR* spec,
- int next_input_index,
- int input_len,
+ size_t next_input_index,
+ size_t input_len,
int last_invalid_percent_index,
CanonOutput* output) {
const int length = output->length();
@@ -218,9 +220,10 @@
}
// Now output ends like "%cc". Try to unescape this.
- int begin = last_invalid_percent_index;
+ size_t begin = static_cast<size_t>(last_invalid_percent_index);
unsigned char temp;
- if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
+ if (DecodeEscaped(output->data(), &begin,
+ static_cast<size_t>(output->length()), &temp)) {
// New escape sequence found. Overwrite the characters following the '%'
// with "25", and push_back() the one or two characters that were following
// the '%' when we were called.
@@ -252,7 +255,10 @@
const Component& path,
int path_begin_in_output,
CanonOutput* output) {
- int end = path.end();
+ if (!path.is_nonempty())
+ return true;
+
+ size_t end = static_cast<size_t>(path.end());
// We use this variable to minimize the amount of work done when unescaping --
// we'll only call CheckForNestedEscapes() when this points at one of the last
@@ -260,7 +266,7 @@
int last_invalid_percent_index = INT_MIN;
bool success = true;
- for (int i = path.begin; i < end; i++) {
+ for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
GURL_DCHECK_LT(last_invalid_percent_index, output->length());
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > 1 && uch >= 0x80) {
@@ -276,7 +282,7 @@
unsigned char flags = kPathCharLookup[out_ch];
if (flags & SPECIAL) {
// Needs special handling of some sort.
- int dotlen;
+ size_t dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
// See if this dot was preceded by a slash in the output.
//
@@ -287,7 +293,7 @@
if (output->length() > path_begin_in_output &&
output->at(output->length() - 1) == '/') {
// Slash followed by a dot, check to see if this is means relative
- int consumed_len;
+ size_t consumed_len;
switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
&consumed_len)) {
case NOT_A_DIRECTORY:
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc
index e726cfb..d8d65f3 100644
--- a/url/url_canon_pathurl.cc
+++ b/url/url_canon_pathurl.cc
@@ -32,8 +32,8 @@
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
new_component->begin = output->length();
- int end = component.end();
- for (int i = component.begin; i < end; i++) {
+ size_t end = static_cast<size_t>(component.end());
+ for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(source[i]);
if (uch < 0x20 || uch > 0x7E)
AppendUTF8EscapedChar(source, &i, end, output);
diff --git a/url/url_canon_query.cc b/url/url_canon_query.cc
index b3a1118..53699c5 100644
--- a/url/url_canon_query.cc
+++ b/url/url_canon_query.cc
@@ -72,10 +72,12 @@
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
+ GURL_DCHECK(query.is_valid());
// This function will replace any misencoded values with the invalid
// character. This is what we want so we don't have to check for error.
RawCanonOutputW<1024> utf16;
- ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16);
+ ConvertUTF8ToUTF16(&spec[query.begin], static_cast<size_t>(query.len),
+ &utf16);
converter->ConvertFromUTF16(utf16.data(), utf16.length(), output);
}
@@ -86,7 +88,9 @@
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- converter->ConvertFromUTF16(&spec[query.begin], query.len, output);
+ GURL_DCHECK(query.is_valid());
+ converter->ConvertFromUTF16(&spec[query.begin],
+ static_cast<size_t>(query.len), output);
}
template<typename CHAR, typename UCHAR>
@@ -109,7 +113,8 @@
} else {
// No converter, do our own UTF-8 conversion.
- AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output);
+ AppendStringOfType(&spec[query.begin], static_cast<size_t>(query.len),
+ CHAR_QUERY, output);
}
}
}
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 0170e00..4fa31ec 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -173,9 +173,9 @@
out_str.clear();
StdStringCanonOutput output(&out_str);
- int input_len = static_cast<int>(strlen(utf_cases[i].input8));
+ size_t input_len = strlen(utf_cases[i].input8);
bool success = true;
- for (int ch = 0; ch < input_len; ch++) {
+ for (size_t ch = 0; ch < input_len; ch++) {
success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
&output);
}
@@ -189,9 +189,9 @@
std::u16string input_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
- int input_len = static_cast<int>(input_str.length());
+ size_t input_len = input_str.length();
bool success = true;
- for (int ch = 0; ch < input_len; ch++) {
+ for (size_t ch = 0; ch < input_len; ch++) {
success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
&output);
}
diff --git a/url/url_util.cc b/url/url_util.cc
index f56323e..a6d0901 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -811,11 +811,15 @@
int length,
DecodeURLMode mode,
CanonOutputW* output) {
+ if (length <= 0)
+ return;
+
STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
- for (int i = 0; i < length; i++) {
+ size_t length_size_t = static_cast<size_t>(length);
+ for (size_t i = 0; i < length_size_t; i++) {
if (input[i] == '%') {
unsigned char ch;
- if (DecodeEscaped(input, &i, length, &ch)) {
+ if (DecodeEscaped(input, &i, length_size_t, &ch)) {
unescaped_chars.push_back(ch);
} else {
// Invalid escape sequence, copy the percent literal.
@@ -830,18 +834,20 @@
int output_initial_length = output->length();
// Convert that 8-bit to UTF-16. It's not clear IE does this at all to
// JavaScript URLs, but Firefox and Safari do.
- for (int i = 0; i < unescaped_chars.length(); i++) {
- unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
+ size_t unescaped_length = static_cast<size_t>(unescaped_chars.length());
+ for (size_t i = 0; i < unescaped_length; i++) {
+ unsigned char uch =
+ static_cast<unsigned char>(unescaped_chars.at(static_cast<int>(i)));
if (uch < 0x80) {
// Non-UTF-8, just append directly
output->push_back(uch);
} else {
// next_ch will point to the last character of the decoded
// character.
- int next_character = i;
+ size_t next_character = i;
base_icu::UChar32 code_point;
- if (ReadUTFChar(unescaped_chars.data(), &next_character,
- unescaped_chars.length(), &code_point)) {
+ if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
+ &code_point)) {
// Valid UTF-8 character, convert to UTF-16.
AppendUTF16Value(code_point, output);
i = next_character;