blob: 02c963ce20a213c387b0d1dd509f351d6e603b88 [file] [log] [blame]
danzh2a930462019-07-03 07:28:06 -07001// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/third_party/quiche/src/quic/core/http/spdy_server_push_utils.h"
6
7#include "url/gurl.h"
8
9using spdy::SpdyHeaderBlock;
10
11namespace quic {
12
13// static
14std::string SpdyServerPushUtils::GetPromisedUrlFromHeaders(
15 const SpdyHeaderBlock& headers) {
16 // RFC 7540, Section 8.1.2.3: All HTTP/2 requests MUST include exactly
17 // one valid value for the ":method", ":scheme", and ":path" pseudo-header
18 // fields, unless it is a CONNECT request.
19
20 // RFC 7540, Section 8.2.1: The header fields in PUSH_PROMISE and any
21 // subsequent CONTINUATION frames MUST be a valid and complete set of request
22 // header fields (Section 8.1.2.3). The server MUST include a method in the
23 // ":method" pseudo-header field that is safe and cacheable.
24 //
25 // RFC 7231, Section 4.2.1: Of the request methods defined by this
26 // specification, the GET, HEAD, OPTIONS, and TRACE methods are defined to be
27 // safe.
28 //
29 // RFC 7231, Section 4.2.1: ... this specification defines GET, HEAD, and
30 // POST as cacheable, ...
31 //
32 // So the only methods allowed in a PUSH_PROMISE are GET and HEAD.
33 SpdyHeaderBlock::const_iterator it = headers.find(":method");
34 if (it == headers.end() || (it->second != "GET" && it->second != "HEAD")) {
35 return std::string();
36 }
37
38 it = headers.find(":scheme");
39 if (it == headers.end() || it->second.empty()) {
40 return std::string();
41 }
42 QuicStringPiece scheme = it->second;
43
44 // RFC 7540, Section 8.2: The server MUST include a value in the
45 // ":authority" pseudo-header field for which the server is authoritative
46 // (see Section 10.1).
47 it = headers.find(":authority");
48 if (it == headers.end() || it->second.empty()) {
49 return std::string();
50 }
51 QuicStringPiece authority = it->second;
52
53 // RFC 7540, Section 8.1.2.3 requires that the ":path" pseudo-header MUST
54 // NOT be empty for "http" or "https" URIs;
55 //
56 // However, to ensure the scheme is consistently canonicalized, that check
57 // is deferred to implementations in QuicUrlUtils::GetPushPromiseUrl().
58 it = headers.find(":path");
59 if (it == headers.end()) {
60 return std::string();
61 }
62 QuicStringPiece path = it->second;
63
64 return GetPushPromiseUrl(scheme, authority, path);
65}
66
67// static
68std::string SpdyServerPushUtils::GetPromisedHostNameFromHeaders(
69 const SpdyHeaderBlock& headers) {
70 // TODO(fayang): Consider just checking out the value of the ":authority" key
71 // in headers.
72 return GURL(GetPromisedUrlFromHeaders(headers)).host();
73}
74
75// static
76bool SpdyServerPushUtils::PromisedUrlIsValid(const SpdyHeaderBlock& headers) {
77 std::string url(GetPromisedUrlFromHeaders(headers));
78 return !url.empty() && GURL(url).is_valid();
79}
80
81// static
82std::string SpdyServerPushUtils::GetPushPromiseUrl(QuicStringPiece scheme,
83 QuicStringPiece authority,
84 QuicStringPiece path) {
85 // RFC 7540, Section 8.1.2.3: The ":path" pseudo-header field includes the
86 // path and query parts of the target URI (the "path-absolute" production
87 // and optionally a '?' character followed by the "query" production (see
88 // Sections 3.3 and 3.4 of RFC3986). A request in asterisk form includes the
89 // value '*' for the ":path" pseudo-header field.
90 //
91 // This pseudo-header field MUST NOT be empty for "http" or "https" URIs;
92 // "http" or "https" URIs that do not contain a path MUST include a value of
93 // '/'. The exception to this rule is an OPTIONS request for an "http" or
94 // "https" URI that does not include a path component; these MUST include a
95 // ":path" pseudo-header with a value of '*' (see RFC7230, Section 5.3.4).
96 //
97 // In addition to the above restriction from RFC 7540, note that RFC3986
98 // defines the "path-absolute" construction as starting with "/" but not "//".
99 //
100 // RFC 7540, Section 8.2.1: The header fields in PUSH_PROMISE and any
101 // subsequent CONTINUATION frames MUST be a valid and complete set of request
102 // header fields (Section 8.1.2.3). The server MUST include a method in the
103 // ":method" pseudo-header field that is safe and cacheable.
104 //
105 // RFC 7231, Section 4.2.1:
106 // ... this specification defines GET, HEAD, and POST as cacheable, ...
107 //
108 // Since the OPTIONS method is not cacheable, it cannot be the method of a
109 // PUSH_PROMISE. Therefore, the exception mentioned in RFC 7540, Section
110 // 8.1.2.3 about OPTIONS requests does not apply here (i.e. ":path" cannot be
111 // "*").
112 if (path.empty() || path[0] != '/' || (path.size() >= 2 && path[1] == '/')) {
113 return std::string();
114 }
115
116 // Validate the scheme; this is to ensure a scheme of "foo://bar" is not
117 // parsed as a URL of "foo://bar://baz" when combined with a host of "baz".
118 std::string canonical_scheme;
119 url::StdStringCanonOutput canon_scheme_output(&canonical_scheme);
120 url::Component canon_component;
121 url::Component scheme_component(0, scheme.size());
122
123 if (!url::CanonicalizeScheme(scheme.data(), scheme_component,
124 &canon_scheme_output, &canon_component) ||
125 !canon_component.is_nonempty() || canon_component.begin != 0) {
126 return std::string();
127 }
128 canonical_scheme.resize(canon_component.len + 1);
129
130 // Validate the authority; this is to ensure an authority such as
131 // "host/path" is not accepted, as when combined with a scheme like
132 // "http://", could result in a URL of "http://host/path".
133 url::Component auth_component(0, authority.size());
134 url::Component username_component;
135 url::Component password_component;
136 url::Component host_component;
137 url::Component port_component;
138
139 url::ParseAuthority(authority.data(), auth_component, &username_component,
140 &password_component, &host_component, &port_component);
141
142 // RFC 7540, Section 8.1.2.3: The authority MUST NOT include the deprecated
143 // "userinfo" subcomponent for "http" or "https" schemed URIs.
144 //
145 // Note: Although |canonical_scheme| has not yet been checked for that, as
146 // it is performed later in processing, only "http" and "https" schemed
147 // URIs are supported for PUSH.
148 if (username_component.is_valid() || password_component.is_valid()) {
149 return std::string();
150 }
151
152 // Failed parsing or no host present. ParseAuthority() will ensure that
153 // host_component + port_component cover the entire string, if
154 // username_component and password_component are not present.
155 if (!host_component.is_nonempty()) {
156 return std::string();
157 }
158
159 // Validate the port (if present; it's optional).
160 int parsed_port_number = url::PORT_INVALID;
161 if (port_component.is_nonempty()) {
162 parsed_port_number = url::ParsePort(authority.data(), port_component);
163 if (parsed_port_number < 0 && parsed_port_number != url::PORT_UNSPECIFIED) {
164 return std::string();
165 }
166 }
167
168 // Validate the host by attempting to canonicalize it. Invalid characters
169 // will result in a canonicalization failure (e.g. '/')
170 std::string canon_host;
171 url::StdStringCanonOutput canon_host_output(&canon_host);
172 canon_component.reset();
173 if (!url::CanonicalizeHost(authority.data(), host_component,
174 &canon_host_output, &canon_component) ||
175 !canon_component.is_nonempty() || canon_component.begin != 0) {
176 return std::string();
177 }
178
179 // At this point, "authority" has been validated to either be of the form
180 // 'host:port' or 'host', with 'host' being a valid domain or IP address,
181 // and 'port' (if present), being a valid port. Attempt to construct a
182 // URL of just the (scheme, host, port), which should be safe and will not
183 // result in ambiguous parsing.
184 //
185 // This also enforces that all PUSHed URLs are either HTTP or HTTPS-schemed
186 // URIs, consistent with the other restrictions enforced above.
187 //
188 // Note: url::CanonicalizeScheme() will have added the ':' to
189 // |canonical_scheme|.
190 GURL origin_url(canonical_scheme + "//" + std::string(authority));
191 if (!origin_url.is_valid() || !origin_url.SchemeIsHTTPOrHTTPS() ||
192 // The following checks are merely defense in depth.
193 origin_url.has_username() || origin_url.has_password() ||
194 (origin_url.has_path() && origin_url.path_piece() != "/") ||
195 origin_url.has_query() || origin_url.has_ref()) {
196 return std::string();
197 }
198
199 // Attempt to parse the path.
200 std::string spec = origin_url.GetWithEmptyPath().spec();
201 spec.pop_back(); // Remove the '/', as ":path" must contain it.
202 spec.append(std::string(path));
203
204 // Attempt to parse the full URL, with the path as well. Ensure there is no
205 // fragment to the query.
206 GURL full_url(spec);
207 if (!full_url.is_valid() || full_url.has_ref()) {
208 return std::string();
209 }
210
211 return full_url.spec();
212}
213
214} // namespace quic