blob: 8980dbbc40cef3d050f4f8aaf7de42f782dea07d [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_URL_CANON_IP_H_
#define URL_URL_CANON_IP_H_
#include "polyfills/base/component_export.h"
#include "base/strings/string_piece_forward.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
namespace url {
// Writes the given IPv4 address to |output|.
COMPONENT_EXPORT(URL)
void AppendIPv4Address(const unsigned char address[4], CanonOutput* output);
// Writes the given IPv6 address to |output|.
COMPONENT_EXPORT(URL)
void AppendIPv6Address(const unsigned char address[16], CanonOutput* output);
// Searches the host name for the portions of the IPv4 address. On success,
// each component will be placed into |components| and it will return true.
// It will return false if the host can not be separated as an IPv4 address
// or if there are any non-7-bit characters or other characters that can not
// be in an IP address. (This is important so we fail as early as possible for
// common non-IP hostnames.)
//
// Not all components may exist. If there are only 3 components, for example,
// the last one will have a length of -1 or 0 to indicate it does not exist.
//
// Note that many platforms' inet_addr will ignore everything after a space
// in certain circumstances if the stuff before the space looks like an IP
// address. IE6 is included in this. We do NOT handle this case. In many cases,
// the browser's canonicalization will get run before this which converts
// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
// so this code path never gets hit. Our host canonicalization will notice
// these spaces and escape them, which will make IP address finding fail. This
// seems like better behavior than stripping after a space.
COMPONENT_EXPORT(URL)
bool FindIPv4Components(const char* spec,
const Component& host,
Component components[4]);
COMPONENT_EXPORT(URL)
bool FindIPv4Components(const char16_t* spec,
const Component& host,
Component components[4]);
// Converts an IPv4 address to a 32-bit number (network byte order).
//
// Possible return values:
// IPV4 - IPv4 address was successfully parsed.
// BROKEN - Input was formatted like an IPv4 address, but overflow occurred
// during parsing.
// NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
// It might be an IPv6 address, or a hostname.
//
// On success, |num_ipv4_components| will be populated with the number of
// components in the IPv4 address.
COMPONENT_EXPORT(URL)
CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components);
COMPONENT_EXPORT(URL)
CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components);
// Converts an IPv6 address to a 128-bit number (network byte order), returning
// true on success. False means that the input was not a valid IPv6 address.
//
// NOTE that |host| is expected to be surrounded by square brackets.
// i.e. "[::1]" rather than "::1".
COMPONENT_EXPORT(URL)
bool IPv6AddressToNumber(const char* spec,
const Component& host,
unsigned char address[16]);
COMPONENT_EXPORT(URL)
bool IPv6AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[16]);
// Temporary enum for collecting histograms at the DNS and URL level about
// hostname validity, for potentially updating the URL spec.
//
// This is used in histograms, so old values should not be reused, and new
// values should be added at the bottom.
//
// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
enum class HostSafetyStatus {
// Any canonical hostname that doesn't fit into any other class. IPv4
// hostnames, hostnames that don't have numeric eTLDs, etc. Hostnames that are
// broken are also considered OK.
kOk = 0,
// The top level domain looks numeric. This is basically means it either
// parses as a number per the URL spec, or is entirely numeric ("09" doesn't
// currently parse as a number, since the leading "0" indicates an octal
// value).
kTopLevelDomainIsNumeric = 1,
// Both the top level domain and the next level domain look like a number,
// using the above definition. This is the case that is actually concerning -
// for these domains, the eTLD+1 is purely numeric, which means putting it as
// the hostname of a URL will potentially result in an IPv4 hostname. This is
// logically a subset of kTopLevelDomainIsNumeric, but when both apply, this
// label will be returned instead.
kTwoHighestLevelDomainsAreNumeric = 2,
kMaxValue = kTwoHighestLevelDomainsAreNumeric,
};
// Calculates the HostSafetyStatus of a hostname. Hostname should have been
// canonicalized. This function is only intended to be temporary, to inform
// decisions around tightening up what the URL parser considers valid hostnames.
//
// TODO(https://crbug.com/1149194): Remove this once the bug is fixed.
COMPONENT_EXPORT(URL)
HostSafetyStatus CheckHostnameSafety(const char* hostname,
const Component& host);
COMPONENT_EXPORT(URL)
HostSafetyStatus CheckHostnameSafety(const char16_t* hostname,
const Component& host);
} // namespace url
#endif // URL_URL_CANON_IP_H_