blob: 19ac83e61cc0a57b3564acfb4da896c468a3d8b9 [file] [log] [blame]
// Copyright (c) 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "quiche/common/platform/api/quiche_hostname_utils.h"
#include <string>
#include "absl/strings/string_view.h"
#include "url/url_canon.h"
#include "url/url_canon_stdstring.h"
#include "quiche/common/platform/api/quiche_logging.h"
namespace quiche {
// TODO(vasilvv): the functions below are forked from Chromium's
// net/base/url_util.h; those should be moved to googleurl.
namespace {
std::string CanonicalizeHost(absl::string_view host,
url::CanonHostInfo* host_info) {
// Try to canonicalize the host.
const url::Component raw_host_component(0, static_cast<int>(host.length()));
std::string canon_host;
url::StdStringCanonOutput canon_host_output(&canon_host);
url::CanonicalizeHostVerbose(host.data(), raw_host_component,
&canon_host_output, host_info);
if (host_info->out_host.is_nonempty() &&
host_info->family != url::CanonHostInfo::BROKEN) {
// Success! Assert that there's no extra garbage.
canon_host_output.Complete();
QUICHE_DCHECK_EQ(host_info->out_host.len,
static_cast<int>(canon_host.length()));
} else {
// Empty host, or canonicalization failed. We'll return empty.
canon_host.clear();
}
return canon_host;
}
bool IsHostCharAlphanumeric(char c) {
// We can just check lowercase because uppercase characters have already been
// normalized.
return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9'));
}
bool IsCanonicalizedHostCompliant(const std::string& host) {
if (host.empty()) {
return false;
}
bool in_component = false;
bool most_recent_component_started_alphanumeric = false;
for (char c : host) {
if (!in_component) {
most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c);
if (!most_recent_component_started_alphanumeric && (c != '-') &&
(c != '_')) {
return false;
}
in_component = true;
} else if (c == '.') {
in_component = false;
} else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) {
return false;
}
}
return most_recent_component_started_alphanumeric;
}
} // namespace
// static
bool QuicheHostnameUtils::IsValidSNI(absl::string_view sni) {
// TODO(rtenneti): Support RFC2396 hostname.
// NOTE: Microsoft does NOT enforce this spec, so if we throw away hostnames
// based on the above spec, we may be losing some hostnames that windows
// would consider valid. By far the most common hostname character NOT
// accepted by the above spec is '_'.
url::CanonHostInfo host_info;
std::string canonicalized_host = CanonicalizeHost(sni, &host_info);
return !host_info.IsIPAddress() &&
IsCanonicalizedHostCompliant(canonicalized_host);
}
// static
std::string QuicheHostnameUtils::NormalizeHostname(absl::string_view hostname) {
url::CanonHostInfo host_info;
std::string host = CanonicalizeHost(hostname, &host_info);
// Walk backwards over the string, stopping at the first trailing dot.
size_t host_end = host.length();
while (host_end != 0 && host[host_end - 1] == '.') {
host_end--;
}
// Erase the trailing dots.
if (host_end != host.length()) {
host.erase(host_end, host.length() - host_end);
}
return host;
}
} // namespace quiche