Enable send and receive of ECN bits on UDP sockets.
QuicUdpPacketInfo passes ECN information to and from the socket for both ingress and egress.
Protected by FLAGS_quic_restart_flag_quic_quiche_ecn_sockets.
PiperOrigin-RevId: 500789567
diff --git a/quiche/quic/core/quic_flags_list.h b/quiche/quic/core/quic_flags_list.h
index 3287bae..b205e3e 100644
--- a/quiche/quic/core/quic_flags_list.h
+++ b/quiche/quic/core/quic_flags_list.h
@@ -85,6 +85,8 @@
QUIC_FLAG(quic_reloadable_flag_quic_send_placeholder_ticket_when_encrypt_ticket_fails, true)
// When true, defaults to BBR congestion control instead of Cubic.
QUIC_FLAG(quic_reloadable_flag_quic_default_to_bbr, false)
+// When true, quiche UDP sockets report Explicit Congestion Notification (ECN) [RFC3168, RFC9330] results.
+QUIC_FLAG(quic_restart_flag_quic_quiche_ecn_sockets, false)
// When true, support draft-ietf-quic-v2-08
QUIC_FLAG(quic_reloadable_flag_quic_enable_version_2_draft_08, false)
// When true, the BB2U copt causes BBR2 to wait two rounds with out draining the queue before exiting PROBE_UP and BB2S has the same effect in STARTUP.
diff --git a/quiche/quic/core/quic_types.h b/quiche/quic/core/quic_types.h
index d2f55b9..f4a2394 100644
--- a/quiche/quic/core/quic_types.h
+++ b/quiche/quic/core/quic_types.h
@@ -706,10 +706,10 @@
// Indicates the fate of a serialized packet in WritePacket().
enum SerializedPacketFate : uint8_t {
- DISCARD, // Discard the packet.
- COALESCE, // Try to coalesce packet.
- BUFFER, // Buffer packet in buffered_packets_.
- SEND_TO_WRITER, // Send packet to writer.
+ DISCARD, // Discard the packet.
+ COALESCE, // Try to coalesce packet.
+ BUFFER, // Buffer packet in buffered_packets_.
+ SEND_TO_WRITER, // Send packet to writer.
};
QUIC_EXPORT_PRIVATE std::string SerializedPacketFateToString(
@@ -861,6 +861,23 @@
QUIC_EXPORT_PRIVATE std::ostream& operator<<(
std::ostream& os, const ParsedClientHello& parsed_chlo);
+// The two bits in the IP header for Explicit Congestion Notification can take
+// one of four values.
+enum QuicEcnCodepoint {
+ // The NOT-ECT codepoint, indicating the packet sender is not using (or the
+ // network has disabled) ECN.
+ ECN_NOT_ECT = 0,
+ // The ECT(0) codepoint, indicating the packet sender is using classic ECN
+ // (RFC3168).
+ ECN_ECT0 = 1,
+ // The ECT(1) codepoint, indicating the packet sender is using Low Latency,
+ // Low Loss, Scalable Throughput (L4S) ECN (RFC9330).
+ ECN_ECT1 = 2,
+ // The CE ("Congestion Experienced") codepoint, indicating the packet sender
+ // is using ECN, and a router is experiencing congestion.
+ ECN_CE = 3,
+};
+
} // namespace quic
#endif // QUICHE_QUIC_CORE_QUIC_TYPES_H_
diff --git a/quiche/quic/core/quic_udp_socket.h b/quiche/quic/core/quic_udp_socket.h
index 2ae722e..08a2595 100644
--- a/quiche/quic/core/quic_udp_socket.h
+++ b/quiche/quic/core/quic_udp_socket.h
@@ -33,6 +33,7 @@
PEER_ADDRESS, // Read & Write
RECV_TIMESTAMP, // Read
TTL, // Read & Write
+ ECN, // Read
GOOGLE_PACKET_HEADER, // Read
NUM_BITS,
IS_GRO, // Read
@@ -150,6 +151,13 @@
bitmask_.Set(QuicUdpPacketInfoBit::GOOGLE_PACKET_HEADER);
}
+ QuicEcnCodepoint ecn_codepoint() const { return ecn_codepoint_; }
+
+ void SetEcnCodepoint(const QuicEcnCodepoint ecn_codepoint) {
+ ecn_codepoint_ = ecn_codepoint;
+ bitmask_.Set(QuicUdpPacketInfoBit::ECN);
+ }
+
private:
BitMask64 bitmask_;
QuicPacketCount dropped_packets_;
@@ -160,6 +168,7 @@
int ttl_;
BufferSpan google_packet_headers_;
size_t gso_size_ = 0;
+ QuicEcnCodepoint ecn_codepoint_ = ECN_NOT_ECT;
};
// QuicUdpSocketApi provides a minimal set of apis for sending and receiving
diff --git a/quiche/quic/core/quic_udp_socket_posix.cc b/quiche/quic/core/quic_udp_socket_posix.cc
index 5335723..3f331e8 100644
--- a/quiche/quic/core/quic_udp_socket_posix.cc
+++ b/quiche/quic/core/quic_udp_socket_posix.cc
@@ -19,6 +19,7 @@
#include "quiche/quic/core/io/socket.h"
#include "quiche/quic/core/quic_udp_socket.h"
#include "quiche/quic/platform/api/quic_bug_tracker.h"
+#include "quiche/quic/platform/api/quic_flag_utils.h"
#include "quiche/quic/platform/api/quic_ip_address_family.h"
#include "quiche/quic/platform/api/quic_udp_socket_platform_api.h"
@@ -39,6 +40,9 @@
namespace quic {
namespace {
+// Explicit Congestion Notification is the last two bits of the TOS byte.
+constexpr uint8_t kEcnMask = 0x03;
+
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 21)
#define QUIC_UDP_SOCKET_SUPPORT_LINUX_TIMESTAMPING 1
// This is the structure that SO_TIMESTAMPING fills into the cmsg header.
@@ -159,6 +163,14 @@
return;
}
+ if ((cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_TOS) ||
+ (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_TCLASS)) {
+ if (packet_info_interested.IsSet(QuicUdpPacketInfoBit::ECN)) {
+ packet_info->SetEcnCodepoint(QuicEcnCodepoint(
+ *(reinterpret_cast<uint8_t*>(CMSG_DATA(cmsg))) & kEcnMask));
+ }
+ }
+
if (packet_info_interested.IsSet(
QuicUdpPacketInfoBit::GOOGLE_PACKET_HEADER)) {
BufferSpan google_packet_headers;
@@ -250,6 +262,23 @@
return false;
}
+ if (GetQuicRestartFlag(quic_quiche_ecn_sockets)) {
+ QUIC_RESTART_FLAG_COUNT(quic_quiche_ecn_sockets);
+ unsigned int set = 1;
+ if (address_family == AF_INET &&
+ setsockopt(fd, IPPROTO_IP, IP_RECVTOS, &set, sizeof(set)) != 0) {
+ QUIC_LOG_FIRST_N(ERROR, 100) << "Failed to request to receive ECN on "
+ << "socket";
+ return false;
+ }
+ if (address_family == AF_INET6 &&
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVTCLASS, &set, sizeof(set)) != 0) {
+ QUIC_LOG_FIRST_N(ERROR, 100) << "Failed to request to receive ECN on "
+ << "socket";
+ return false;
+ }
+ }
+
if (!(address_family == AF_INET6 && ipv6_only)) {
if (!EnableReceiveSelfIpAddressForV4(fd)) {
QUIC_LOG_FIRST_N(ERROR, 100)
@@ -440,6 +469,24 @@
packet_info->SetPeerAddress(QuicSocketAddress(raw_peer_address));
}
+ if (packet_info_interested.IsSet(QuicUdpPacketInfoBit::ECN)) {
+ int ecn;
+ socklen_t optlen = sizeof(ecn);
+ if (raw_peer_address.ss_family == AF_INET &&
+ getsockopt(fd, IPPROTO_IP, IP_TOS, (void*)&ecn, &optlen) == 0) {
+ packet_info->SetEcnCodepoint(
+ QuicEcnCodepoint(static_cast<uint8_t>(ecn) & kEcnMask));
+ } else if (raw_peer_address.ss_family == AF_INET6 &&
+ getsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ecn,
+ &optlen) == 0) {
+ packet_info->SetEcnCodepoint(
+ QuicEcnCodepoint(static_cast<uint8_t>(ecn) & kEcnMask));
+ } else {
+ // Fail back to not reporting ECN marks.
+ packet_info->SetEcnCodepoint(ECN_NOT_ECT);
+ }
+ }
+
if (hdr.msg_controllen > 0) {
for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(&hdr); cmsg != nullptr;
cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
@@ -635,6 +682,20 @@
}
#endif
+ if (packet_info.HasValue(QuicUdpPacketInfoBit::ECN)) {
+ int cmsg_level =
+ packet_info.peer_address().host().IsIPv4() ? IPPROTO_IP : IPPROTO_IPV6;
+ int cmsg_type =
+ packet_info.peer_address().host().IsIPv4() ? IP_TOS : IPV6_TCLASS;
+ if (!NextCmsg(&hdr, control_buffer, sizeof(control_buffer), cmsg_level,
+ cmsg_type, sizeof(int), &cmsg)) {
+ QUIC_LOG_FIRST_N(ERROR, 100) << "Not enough buffer to set ECN.";
+ return WriteResult(WRITE_STATUS_ERROR, EINVAL);
+ }
+ *reinterpret_cast<int*>(CMSG_DATA(cmsg)) =
+ static_cast<int>(packet_info.ecn_codepoint());
+ }
+
int rc;
do {
rc = sendmsg(fd, &hdr, 0);