Enable send and receive of ECN bits on UDP sockets. QuicUdpPacketInfo passes ECN information to and from the socket for both ingress and egress. Protected by FLAGS_quic_restart_flag_quic_quiche_ecn_sockets. PiperOrigin-RevId: 500789567
diff --git a/quiche/quic/core/quic_flags_list.h b/quiche/quic/core/quic_flags_list.h index 3287bae..b205e3e 100644 --- a/quiche/quic/core/quic_flags_list.h +++ b/quiche/quic/core/quic_flags_list.h
@@ -85,6 +85,8 @@ QUIC_FLAG(quic_reloadable_flag_quic_send_placeholder_ticket_when_encrypt_ticket_fails, true) // When true, defaults to BBR congestion control instead of Cubic. QUIC_FLAG(quic_reloadable_flag_quic_default_to_bbr, false) +// When true, quiche UDP sockets report Explicit Congestion Notification (ECN) [RFC3168, RFC9330] results. +QUIC_FLAG(quic_restart_flag_quic_quiche_ecn_sockets, false) // When true, support draft-ietf-quic-v2-08 QUIC_FLAG(quic_reloadable_flag_quic_enable_version_2_draft_08, false) // When true, the BB2U copt causes BBR2 to wait two rounds with out draining the queue before exiting PROBE_UP and BB2S has the same effect in STARTUP.
diff --git a/quiche/quic/core/quic_types.h b/quiche/quic/core/quic_types.h index d2f55b9..f4a2394 100644 --- a/quiche/quic/core/quic_types.h +++ b/quiche/quic/core/quic_types.h
@@ -706,10 +706,10 @@ // Indicates the fate of a serialized packet in WritePacket(). enum SerializedPacketFate : uint8_t { - DISCARD, // Discard the packet. - COALESCE, // Try to coalesce packet. - BUFFER, // Buffer packet in buffered_packets_. - SEND_TO_WRITER, // Send packet to writer. + DISCARD, // Discard the packet. + COALESCE, // Try to coalesce packet. + BUFFER, // Buffer packet in buffered_packets_. + SEND_TO_WRITER, // Send packet to writer. }; QUIC_EXPORT_PRIVATE std::string SerializedPacketFateToString( @@ -861,6 +861,23 @@ QUIC_EXPORT_PRIVATE std::ostream& operator<<( std::ostream& os, const ParsedClientHello& parsed_chlo); +// The two bits in the IP header for Explicit Congestion Notification can take +// one of four values. +enum QuicEcnCodepoint { + // The NOT-ECT codepoint, indicating the packet sender is not using (or the + // network has disabled) ECN. + ECN_NOT_ECT = 0, + // The ECT(0) codepoint, indicating the packet sender is using classic ECN + // (RFC3168). + ECN_ECT0 = 1, + // The ECT(1) codepoint, indicating the packet sender is using Low Latency, + // Low Loss, Scalable Throughput (L4S) ECN (RFC9330). + ECN_ECT1 = 2, + // The CE ("Congestion Experienced") codepoint, indicating the packet sender + // is using ECN, and a router is experiencing congestion. + ECN_CE = 3, +}; + } // namespace quic #endif // QUICHE_QUIC_CORE_QUIC_TYPES_H_
diff --git a/quiche/quic/core/quic_udp_socket.h b/quiche/quic/core/quic_udp_socket.h index 2ae722e..08a2595 100644 --- a/quiche/quic/core/quic_udp_socket.h +++ b/quiche/quic/core/quic_udp_socket.h
@@ -33,6 +33,7 @@ PEER_ADDRESS, // Read & Write RECV_TIMESTAMP, // Read TTL, // Read & Write + ECN, // Read GOOGLE_PACKET_HEADER, // Read NUM_BITS, IS_GRO, // Read @@ -150,6 +151,13 @@ bitmask_.Set(QuicUdpPacketInfoBit::GOOGLE_PACKET_HEADER); } + QuicEcnCodepoint ecn_codepoint() const { return ecn_codepoint_; } + + void SetEcnCodepoint(const QuicEcnCodepoint ecn_codepoint) { + ecn_codepoint_ = ecn_codepoint; + bitmask_.Set(QuicUdpPacketInfoBit::ECN); + } + private: BitMask64 bitmask_; QuicPacketCount dropped_packets_; @@ -160,6 +168,7 @@ int ttl_; BufferSpan google_packet_headers_; size_t gso_size_ = 0; + QuicEcnCodepoint ecn_codepoint_ = ECN_NOT_ECT; }; // QuicUdpSocketApi provides a minimal set of apis for sending and receiving
diff --git a/quiche/quic/core/quic_udp_socket_posix.cc b/quiche/quic/core/quic_udp_socket_posix.cc index 5335723..3f331e8 100644 --- a/quiche/quic/core/quic_udp_socket_posix.cc +++ b/quiche/quic/core/quic_udp_socket_posix.cc
@@ -19,6 +19,7 @@ #include "quiche/quic/core/io/socket.h" #include "quiche/quic/core/quic_udp_socket.h" #include "quiche/quic/platform/api/quic_bug_tracker.h" +#include "quiche/quic/platform/api/quic_flag_utils.h" #include "quiche/quic/platform/api/quic_ip_address_family.h" #include "quiche/quic/platform/api/quic_udp_socket_platform_api.h" @@ -39,6 +40,9 @@ namespace quic { namespace { +// Explicit Congestion Notification is the last two bits of the TOS byte. +constexpr uint8_t kEcnMask = 0x03; + #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 21) #define QUIC_UDP_SOCKET_SUPPORT_LINUX_TIMESTAMPING 1 // This is the structure that SO_TIMESTAMPING fills into the cmsg header. @@ -159,6 +163,14 @@ return; } + if ((cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_TOS) || + (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_TCLASS)) { + if (packet_info_interested.IsSet(QuicUdpPacketInfoBit::ECN)) { + packet_info->SetEcnCodepoint(QuicEcnCodepoint( + *(reinterpret_cast<uint8_t*>(CMSG_DATA(cmsg))) & kEcnMask)); + } + } + if (packet_info_interested.IsSet( QuicUdpPacketInfoBit::GOOGLE_PACKET_HEADER)) { BufferSpan google_packet_headers; @@ -250,6 +262,23 @@ return false; } + if (GetQuicRestartFlag(quic_quiche_ecn_sockets)) { + QUIC_RESTART_FLAG_COUNT(quic_quiche_ecn_sockets); + unsigned int set = 1; + if (address_family == AF_INET && + setsockopt(fd, IPPROTO_IP, IP_RECVTOS, &set, sizeof(set)) != 0) { + QUIC_LOG_FIRST_N(ERROR, 100) << "Failed to request to receive ECN on " + << "socket"; + return false; + } + if (address_family == AF_INET6 && + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVTCLASS, &set, sizeof(set)) != 0) { + QUIC_LOG_FIRST_N(ERROR, 100) << "Failed to request to receive ECN on " + << "socket"; + return false; + } + } + if (!(address_family == AF_INET6 && ipv6_only)) { if (!EnableReceiveSelfIpAddressForV4(fd)) { QUIC_LOG_FIRST_N(ERROR, 100) @@ -440,6 +469,24 @@ packet_info->SetPeerAddress(QuicSocketAddress(raw_peer_address)); } + if (packet_info_interested.IsSet(QuicUdpPacketInfoBit::ECN)) { + int ecn; + socklen_t optlen = sizeof(ecn); + if (raw_peer_address.ss_family == AF_INET && + getsockopt(fd, IPPROTO_IP, IP_TOS, (void*)&ecn, &optlen) == 0) { + packet_info->SetEcnCodepoint( + QuicEcnCodepoint(static_cast<uint8_t>(ecn) & kEcnMask)); + } else if (raw_peer_address.ss_family == AF_INET6 && + getsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ecn, + &optlen) == 0) { + packet_info->SetEcnCodepoint( + QuicEcnCodepoint(static_cast<uint8_t>(ecn) & kEcnMask)); + } else { + // Fail back to not reporting ECN marks. + packet_info->SetEcnCodepoint(ECN_NOT_ECT); + } + } + if (hdr.msg_controllen > 0) { for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(&hdr); cmsg != nullptr; cmsg = CMSG_NXTHDR(&hdr, cmsg)) { @@ -635,6 +682,20 @@ } #endif + if (packet_info.HasValue(QuicUdpPacketInfoBit::ECN)) { + int cmsg_level = + packet_info.peer_address().host().IsIPv4() ? IPPROTO_IP : IPPROTO_IPV6; + int cmsg_type = + packet_info.peer_address().host().IsIPv4() ? IP_TOS : IPV6_TCLASS; + if (!NextCmsg(&hdr, control_buffer, sizeof(control_buffer), cmsg_level, + cmsg_type, sizeof(int), &cmsg)) { + QUIC_LOG_FIRST_N(ERROR, 100) << "Not enough buffer to set ECN."; + return WriteResult(WRITE_STATUS_ERROR, EINVAL); + } + *reinterpret_cast<int*>(CMSG_DATA(cmsg)) = + static_cast<int>(packet_info.ecn_codepoint()); + } + int rc; do { rc = sendmsg(fd, &hdr, 0);