blob: b40ec380bbfaac3eb2c7d4054a62a696fade627a [file] [log] [blame]
// Copyright (c) 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "quiche/quic/qbone/platform/netlink.h"
#include <linux/fib_rules.h>
#include <utility>
#include "absl/base/attributes.h"
#include "absl/strings/str_cat.h"
#include "quiche/quic/core/crypto/quic_random.h"
#include "quiche/quic/platform/api/quic_ip_address.h"
#include "quiche/quic/platform/api/quic_logging.h"
#include "quiche/quic/qbone/platform/rtnetlink_message.h"
#include "quiche/quic/qbone/qbone_constants.h"
namespace quic {
Netlink::Netlink(KernelInterface* kernel) : kernel_(kernel) {
seq_ = QuicRandom::GetInstance()->RandUint64();
}
Netlink::~Netlink() { CloseSocket(); }
void Netlink::ResetRecvBuf(size_t size) {
if (size != 0) {
recvbuf_ = std::make_unique<char[]>(size);
} else {
recvbuf_ = nullptr;
}
recvbuf_length_ = size;
}
bool Netlink::OpenSocket() {
if (socket_fd_ >= 0) {
return true;
}
socket_fd_ = kernel_->socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (socket_fd_ < 0) {
QUIC_PLOG(ERROR) << "can't open netlink socket";
return false;
}
QUIC_LOG(INFO) << "Opened a new netlink socket fd = " << socket_fd_;
// bind a local address to the socket
sockaddr_nl myaddr;
memset(&myaddr, 0, sizeof(myaddr));
myaddr.nl_family = AF_NETLINK;
if (kernel_->bind(socket_fd_, reinterpret_cast<struct sockaddr*>(&myaddr),
sizeof(myaddr)) < 0) {
QUIC_LOG(INFO) << "can't bind address to socket";
CloseSocket();
return false;
}
return true;
}
void Netlink::CloseSocket() {
if (socket_fd_ >= 0) {
QUIC_LOG(INFO) << "Closing netlink socket fd = " << socket_fd_;
kernel_->close(socket_fd_);
}
ResetRecvBuf(0);
socket_fd_ = -1;
}
namespace {
class LinkInfoParser : public NetlinkParserInterface {
public:
LinkInfoParser(std::string interface_name, Netlink::LinkInfo* link_info)
: interface_name_(std::move(interface_name)), link_info_(link_info) {}
void Run(struct nlmsghdr* netlink_message) override {
if (netlink_message->nlmsg_type != RTM_NEWLINK) {
QUIC_LOG(INFO) << absl::StrCat(
"Unexpected nlmsg_type: ", netlink_message->nlmsg_type,
" expected: ", RTM_NEWLINK);
return;
}
struct ifinfomsg* interface_info =
reinterpret_cast<struct ifinfomsg*>(NLMSG_DATA(netlink_message));
// make sure interface_info is what we asked for.
if (interface_info->ifi_family != AF_UNSPEC) {
QUIC_LOG(INFO) << absl::StrCat(
"Unexpected ifi_family: ", interface_info->ifi_family,
" expected: ", AF_UNSPEC);
return;
}
char hardware_address[kHwAddrSize];
size_t hardware_address_length = 0;
char broadcast_address[kHwAddrSize];
size_t broadcast_address_length = 0;
std::string name;
// loop through the attributes
struct rtattr* rta;
int payload_length = IFLA_PAYLOAD(netlink_message);
for (rta = IFLA_RTA(interface_info); RTA_OK(rta, payload_length);
rta = RTA_NEXT(rta, payload_length)) {
int attribute_length;
switch (rta->rta_type) {
case IFLA_ADDRESS: {
attribute_length = RTA_PAYLOAD(rta);
if (attribute_length > kHwAddrSize) {
QUIC_VLOG(2) << "IFLA_ADDRESS too long: " << attribute_length;
break;
}
memmove(hardware_address, RTA_DATA(rta), attribute_length);
hardware_address_length = attribute_length;
break;
}
case IFLA_BROADCAST: {
attribute_length = RTA_PAYLOAD(rta);
if (attribute_length > kHwAddrSize) {
QUIC_VLOG(2) << "IFLA_BROADCAST too long: " << attribute_length;
break;
}
memmove(broadcast_address, RTA_DATA(rta), attribute_length);
broadcast_address_length = attribute_length;
break;
}
case IFLA_IFNAME: {
name = std::string(reinterpret_cast<char*>(RTA_DATA(rta)),
RTA_PAYLOAD(rta));
// The name maybe a 0 terminated c string.
name = name.substr(0, name.find('\0'));
break;
}
}
}
QUIC_VLOG(2) << "interface name: " << name
<< ", index: " << interface_info->ifi_index;
if (name == interface_name_) {
link_info_->index = interface_info->ifi_index;
link_info_->type = interface_info->ifi_type;
link_info_->hardware_address_length = hardware_address_length;
if (hardware_address_length > 0) {
memmove(&link_info_->hardware_address, hardware_address,
hardware_address_length);
}
link_info_->broadcast_address_length = broadcast_address_length;
if (broadcast_address_length > 0) {
memmove(&link_info_->broadcast_address, broadcast_address,
broadcast_address_length);
}
found_link_ = true;
}
}
bool found_link() { return found_link_; }
private:
const std::string interface_name_;
Netlink::LinkInfo* const link_info_;
bool found_link_ = false;
};
} // namespace
bool Netlink::GetLinkInfo(const std::string& interface_name,
LinkInfo* link_info) {
auto message = LinkMessage::New(RtnetlinkMessage::Operation::GET,
NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST,
seq_, getpid(), nullptr);
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed.";
return false;
}
// Pass the parser to the receive routine. It may be called multiple times
// since there may be multiple reply packets each with multiple reply
// messages.
LinkInfoParser parser(interface_name, link_info);
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "recv failed.";
return false;
}
return parser.found_link();
}
namespace {
class LocalAddressParser : public NetlinkParserInterface {
public:
LocalAddressParser(int interface_index, uint8_t unwanted_flags,
std::vector<Netlink::AddressInfo>* local_addresses,
int* num_ipv6_nodad_dadfailed_addresses)
: interface_index_(interface_index),
unwanted_flags_(unwanted_flags),
local_addresses_(local_addresses),
num_ipv6_nodad_dadfailed_addresses_(
num_ipv6_nodad_dadfailed_addresses) {}
void Run(struct nlmsghdr* netlink_message) override {
// each nlmsg contains a header and multiple address attributes.
if (netlink_message->nlmsg_type != RTM_NEWADDR) {
QUIC_LOG(INFO) << "Unexpected nlmsg_type: " << netlink_message->nlmsg_type
<< " expected: " << RTM_NEWADDR;
return;
}
struct ifaddrmsg* interface_address =
reinterpret_cast<struct ifaddrmsg*>(NLMSG_DATA(netlink_message));
// Make sure this is for an address family we're interested in.
if (interface_address->ifa_family != AF_INET &&
interface_address->ifa_family != AF_INET6) {
QUIC_VLOG(2) << absl::StrCat("uninteresting ifa family: ",
interface_address->ifa_family);
return;
}
// Keep track of addresses with both 'nodad' and 'dadfailed', this really
// should't be possible and is likely a kernel bug.
if (num_ipv6_nodad_dadfailed_addresses_ != nullptr &&
(interface_address->ifa_flags & IFA_F_NODAD) &&
(interface_address->ifa_flags & IFA_F_DADFAILED)) {
++(*num_ipv6_nodad_dadfailed_addresses_);
}
uint8_t unwanted_flags = interface_address->ifa_flags & unwanted_flags_;
if (unwanted_flags != 0) {
QUIC_VLOG(2) << absl::StrCat("unwanted ifa flags: ", unwanted_flags);
return;
}
// loop through the attributes
struct rtattr* rta;
int payload_length = IFA_PAYLOAD(netlink_message);
Netlink::AddressInfo address_info;
for (rta = IFA_RTA(interface_address); RTA_OK(rta, payload_length);
rta = RTA_NEXT(rta, payload_length)) {
// There's quite a lot of confusion in Linux over the use of IFA_LOCAL and
// IFA_ADDRESS (source and destination address). For broadcast links, such
// as Ethernet, they are identical (see <linux/if_addr.h>), but the kernel
// sometimes uses only one or the other. We'll return both so that the
// caller can decide which to use.
if (rta->rta_type != IFA_LOCAL && rta->rta_type != IFA_ADDRESS) {
QUIC_VLOG(2) << "Ignoring uninteresting rta_type: " << rta->rta_type;
continue;
}
switch (interface_address->ifa_family) {
case AF_INET:
ABSL_FALLTHROUGH_INTENDED;
case AF_INET6:
// QuicIpAddress knows how to parse ip from raw bytes as long as they
// are in network byte order.
if (RTA_PAYLOAD(rta) == sizeof(struct in_addr) ||
RTA_PAYLOAD(rta) == sizeof(struct in6_addr)) {
auto* raw_ip = reinterpret_cast<char*>(RTA_DATA(rta));
if (rta->rta_type == IFA_LOCAL) {
address_info.local_address.FromPackedString(raw_ip,
RTA_PAYLOAD(rta));
} else {
address_info.interface_address.FromPackedString(raw_ip,
RTA_PAYLOAD(rta));
}
}
break;
default:
QUIC_LOG(ERROR) << absl::StrCat("Unknown address family: ",
interface_address->ifa_family);
}
}
QUIC_VLOG(2) << "local_address: " << address_info.local_address.ToString()
<< " interface_address: "
<< address_info.interface_address.ToString()
<< " index: " << interface_address->ifa_index;
if (interface_address->ifa_index != interface_index_) {
return;
}
address_info.prefix_length = interface_address->ifa_prefixlen;
address_info.scope = interface_address->ifa_scope;
if (address_info.local_address.IsInitialized() ||
address_info.interface_address.IsInitialized()) {
local_addresses_->push_back(address_info);
}
}
private:
const int interface_index_;
const uint8_t unwanted_flags_;
std::vector<Netlink::AddressInfo>* const local_addresses_;
int* const num_ipv6_nodad_dadfailed_addresses_;
};
} // namespace
bool Netlink::GetAddresses(int interface_index, uint8_t unwanted_flags,
std::vector<AddressInfo>* addresses,
int* num_ipv6_nodad_dadfailed_addresses) {
// the message doesn't contain the index, we'll have to do the filtering while
// parsing the reply. This is because NLM_F_MATCH, which only returns entries
// that matches the request criteria, is not yet implemented (see man 3
// netlink).
auto message = AddressMessage::New(RtnetlinkMessage::Operation::GET,
NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST,
seq_, getpid(), nullptr);
// the send routine returns the socket to listen on.
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed.";
return false;
}
addresses->clear();
if (num_ipv6_nodad_dadfailed_addresses != nullptr) {
*num_ipv6_nodad_dadfailed_addresses = 0;
}
LocalAddressParser parser(interface_index, unwanted_flags, addresses,
num_ipv6_nodad_dadfailed_addresses);
// Pass the parser to the receive routine. It may be called multiple times
// since there may be multiple reply packets each with multiple reply
// messages.
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "recv failed";
return false;
}
return true;
}
namespace {
class UnknownParser : public NetlinkParserInterface {
public:
void Run(struct nlmsghdr* netlink_message) override {
QUIC_LOG(INFO) << "nlmsg reply type: " << netlink_message->nlmsg_type;
}
};
} // namespace
bool Netlink::ChangeLocalAddress(
uint32_t interface_index, Verb verb, const QuicIpAddress& address,
uint8_t prefix_length, uint8_t ifa_flags, uint8_t ifa_scope,
const std::vector<struct rtattr*>& additional_attributes) {
if (verb == Verb::kReplace) {
return false;
}
auto operation = verb == Verb::kAdd ? RtnetlinkMessage::Operation::NEW
: RtnetlinkMessage::Operation::DEL;
uint8_t address_family;
if (address.address_family() == IpAddressFamily::IP_V4) {
address_family = AF_INET;
} else if (address.address_family() == IpAddressFamily::IP_V6) {
address_family = AF_INET6;
} else {
return false;
}
struct ifaddrmsg address_header = {address_family, prefix_length, ifa_flags,
ifa_scope, interface_index};
auto message = AddressMessage::New(operation, NLM_F_REQUEST | NLM_F_ACK, seq_,
getpid(), &address_header);
for (const auto& attribute : additional_attributes) {
if (attribute->rta_type == IFA_LOCAL) {
continue;
}
message.AppendAttribute(attribute->rta_type, RTA_DATA(attribute),
RTA_PAYLOAD(attribute));
}
message.AppendAttribute(IFA_LOCAL, address.ToPackedString().c_str(),
address.ToPackedString().size());
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed";
return false;
}
UnknownParser parser;
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "receive failed.";
return false;
}
return true;
}
namespace {
class RoutingRuleParser : public NetlinkParserInterface {
public:
explicit RoutingRuleParser(std::vector<Netlink::RoutingRule>* routing_rules)
: routing_rules_(routing_rules) {}
void Run(struct nlmsghdr* netlink_message) override {
if (netlink_message->nlmsg_type != RTM_NEWROUTE) {
QUIC_LOG(WARNING) << absl::StrCat(
"Unexpected nlmsg_type: ", netlink_message->nlmsg_type,
" expected: ", RTM_NEWROUTE);
return;
}
auto* route = reinterpret_cast<struct rtmsg*>(NLMSG_DATA(netlink_message));
int payload_length = RTM_PAYLOAD(netlink_message);
if (route->rtm_family != AF_INET && route->rtm_family != AF_INET6) {
QUIC_VLOG(2) << absl::StrCat("Uninteresting family: ", route->rtm_family);
return;
}
Netlink::RoutingRule rule;
rule.scope = route->rtm_scope;
rule.table = route->rtm_table;
struct rtattr* rta;
for (rta = RTM_RTA(route); RTA_OK(rta, payload_length);
rta = RTA_NEXT(rta, payload_length)) {
switch (rta->rta_type) {
case RTA_TABLE: {
rule.table = *reinterpret_cast<uint32_t*>(RTA_DATA(rta));
break;
}
case RTA_DST: {
QuicIpAddress destination;
destination.FromPackedString(reinterpret_cast<char*> RTA_DATA(rta),
RTA_PAYLOAD(rta));
rule.destination_subnet = IpRange(destination, route->rtm_dst_len);
break;
}
case RTA_PREFSRC: {
QuicIpAddress preferred_source;
rule.preferred_source.FromPackedString(
reinterpret_cast<char*> RTA_DATA(rta), RTA_PAYLOAD(rta));
break;
}
case RTA_OIF: {
rule.out_interface = *reinterpret_cast<int*>(RTA_DATA(rta));
break;
}
default: {
QUIC_VLOG(2) << absl::StrCat("Uninteresting attribute: ",
rta->rta_type);
}
}
}
routing_rules_->push_back(rule);
}
private:
std::vector<Netlink::RoutingRule>* routing_rules_;
};
} // namespace
bool Netlink::GetRouteInfo(std::vector<Netlink::RoutingRule>* routing_rules) {
rtmsg route_message{};
// Only manipulate main routing table.
route_message.rtm_table = RT_TABLE_MAIN;
auto message = RouteMessage::New(RtnetlinkMessage::Operation::GET,
NLM_F_REQUEST | NLM_F_ROOT | NLM_F_MATCH,
seq_, getpid(), &route_message);
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed";
return false;
}
RoutingRuleParser parser(routing_rules);
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "recv failed";
return false;
}
return true;
}
bool Netlink::ChangeRoute(Netlink::Verb verb, uint32_t table,
const IpRange& destination_subnet, uint8_t scope,
QuicIpAddress preferred_source,
int32_t interface_index) {
if (!destination_subnet.prefix().IsInitialized()) {
return false;
}
if (destination_subnet.address_family() != IpAddressFamily::IP_V4 &&
destination_subnet.address_family() != IpAddressFamily::IP_V6) {
return false;
}
if (preferred_source.IsInitialized() &&
preferred_source.address_family() !=
destination_subnet.address_family()) {
return false;
}
RtnetlinkMessage::Operation operation;
uint16_t flags = NLM_F_REQUEST | NLM_F_ACK;
switch (verb) {
case Verb::kAdd:
operation = RtnetlinkMessage::Operation::NEW;
// Setting NLM_F_EXCL so that an existing entry for this subnet will fail
// the request. NLM_F_CREATE is necessary to indicate this is trying to
// create a new entry - simply having RTM_NEWROUTE is not enough even the
// name suggests so.
flags |= NLM_F_EXCL | NLM_F_CREATE;
break;
case Verb::kRemove:
operation = RtnetlinkMessage::Operation::DEL;
break;
case Verb::kReplace:
operation = RtnetlinkMessage::Operation::NEW;
// Setting NLM_F_REPLACE to tell the kernel that existing entry for this
// subnet should be replaced.
flags |= NLM_F_REPLACE | NLM_F_CREATE;
break;
}
struct rtmsg route_message;
memset(&route_message, 0, sizeof(route_message));
route_message.rtm_family =
destination_subnet.address_family() == IpAddressFamily::IP_V4 ? AF_INET
: AF_INET6;
// rtm_dst_len and rtm_src_len are actually the subnet prefix lengths. Poor
// naming.
route_message.rtm_dst_len = destination_subnet.prefix_length();
// 0 means no source subnet for this rule.
route_message.rtm_src_len = 0;
// Only program the main table. Other tables are intended for the kernel to
// manage.
route_message.rtm_table = RT_TABLE_MAIN;
// Use RTPROT_UNSPEC to match all the different protocol. Rules added by
// kernel have RTPROT_KERNEL. Rules added by the root user have RTPROT_STATIC
// instead.
route_message.rtm_protocol =
verb == Verb::kRemove ? RTPROT_UNSPEC : RTPROT_STATIC;
route_message.rtm_scope = scope;
// Only add unicast routing rule.
route_message.rtm_type = RTN_UNICAST;
auto message =
RouteMessage::New(operation, flags, seq_, getpid(), &route_message);
message.AppendAttribute(RTA_TABLE, &table, sizeof(table));
// RTA_OIF is the target interface for this rule.
message.AppendAttribute(RTA_OIF, &interface_index, sizeof(interface_index));
// The actual destination subnet must be truncated of all the tailing zeros.
message.AppendAttribute(
RTA_DST,
reinterpret_cast<const void*>(
destination_subnet.prefix().ToPackedString().c_str()),
destination_subnet.prefix().ToPackedString().size());
// This is the source address to use in the IP packet should this routing rule
// is used.
if (preferred_source.IsInitialized()) {
auto src_str = preferred_source.ToPackedString();
message.AppendAttribute(RTA_PREFSRC,
reinterpret_cast<const void*>(src_str.c_str()),
src_str.size());
}
if (verb != Verb::kRemove) {
auto gateway_str = QboneConstants::GatewayAddress()->ToPackedString();
message.AppendAttribute(RTA_GATEWAY,
reinterpret_cast<const void*>(gateway_str.c_str()),
gateway_str.size());
}
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed";
return false;
}
UnknownParser parser;
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "receive failed.";
return false;
}
return true;
}
namespace {
class IpRuleParser : public NetlinkParserInterface {
public:
explicit IpRuleParser(std::vector<Netlink::IpRule>* ip_rules)
: ip_rules_(ip_rules) {}
void Run(struct nlmsghdr* netlink_message) override {
if (netlink_message->nlmsg_type != RTM_NEWRULE) {
QUIC_LOG(WARNING) << absl::StrCat(
"Unexpected nlmsg_type: ", netlink_message->nlmsg_type,
" expected: ", RTM_NEWRULE);
return;
}
auto* rule = reinterpret_cast<rtmsg*>(NLMSG_DATA(netlink_message));
int payload_length = RTM_PAYLOAD(netlink_message);
if (rule->rtm_family != AF_INET6) {
QUIC_LOG(ERROR) << absl::StrCat("Unexpected family: ", rule->rtm_family);
return;
}
Netlink::IpRule ip_rule;
ip_rule.table = rule->rtm_table;
struct rtattr* rta;
for (rta = RTM_RTA(rule); RTA_OK(rta, payload_length);
rta = RTA_NEXT(rta, payload_length)) {
switch (rta->rta_type) {
case RTA_TABLE: {
ip_rule.table = *reinterpret_cast<uint32_t*>(RTA_DATA(rta));
break;
}
case RTA_SRC: {
QuicIpAddress src_addr;
src_addr.FromPackedString(reinterpret_cast<char*>(RTA_DATA(rta)),
RTA_PAYLOAD(rta));
IpRange src_range(src_addr, rule->rtm_src_len);
ip_rule.source_range = src_range;
break;
}
default: {
QUIC_VLOG(2) << absl::StrCat("Uninteresting attribute: ",
rta->rta_type);
}
}
}
ip_rules_->emplace_back(ip_rule);
}
private:
std::vector<Netlink::IpRule>* ip_rules_;
};
} // namespace
bool Netlink::GetRuleInfo(std::vector<Netlink::IpRule>* ip_rules) {
rtmsg rule_message{};
rule_message.rtm_family = AF_INET6;
auto message = RuleMessage::New(RtnetlinkMessage::Operation::GET,
NLM_F_REQUEST | NLM_F_DUMP, seq_, getpid(),
&rule_message);
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed";
return false;
}
IpRuleParser parser(ip_rules);
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "receive failed.";
return false;
}
return true;
}
bool Netlink::ChangeRule(Verb verb, uint32_t table, IpRange source_range) {
RtnetlinkMessage::Operation operation;
uint16_t flags = NLM_F_REQUEST | NLM_F_ACK;
rtmsg rule_message{};
rule_message.rtm_family = AF_INET6;
rule_message.rtm_protocol = RTPROT_STATIC;
rule_message.rtm_scope = RT_SCOPE_UNIVERSE;
rule_message.rtm_table = RT_TABLE_UNSPEC;
rule_message.rtm_flags |= FIB_RULE_FIND_SADDR;
switch (verb) {
case Verb::kAdd:
if (!source_range.IsInitialized()) {
QUIC_LOG(ERROR) << "Source range must be initialized.";
return false;
}
operation = RtnetlinkMessage::Operation::NEW;
flags |= NLM_F_EXCL | NLM_F_CREATE;
rule_message.rtm_type = FRA_DST;
rule_message.rtm_src_len = source_range.prefix_length();
break;
case Verb::kRemove:
operation = RtnetlinkMessage::Operation::DEL;
break;
case Verb::kReplace:
QUIC_LOG(ERROR) << "Unsupported verb: kReplace";
return false;
}
auto message =
RuleMessage::New(operation, flags, seq_, getpid(), &rule_message);
message.AppendAttribute(RTA_TABLE, &table, sizeof(table));
if (source_range.IsInitialized()) {
std::string packed_src = source_range.prefix().ToPackedString();
message.AppendAttribute(RTA_SRC,
reinterpret_cast<const void*>(packed_src.c_str()),
packed_src.size());
}
if (!Send(message.BuildIoVec().get(), message.IoVecSize())) {
QUIC_LOG(ERROR) << "send failed";
return false;
}
UnknownParser parser;
if (!Recv(seq_++, &parser)) {
QUIC_LOG(ERROR) << "receive failed.";
return false;
}
return true;
}
bool Netlink::Send(struct iovec* iov, size_t iovlen) {
if (!OpenSocket()) {
QUIC_LOG(ERROR) << "can't open socket";
return false;
}
// an address for communicating with the kernel netlink code
sockaddr_nl netlink_address;
memset(&netlink_address, 0, sizeof(netlink_address));
netlink_address.nl_family = AF_NETLINK;
netlink_address.nl_pid = 0; // destination is kernel
netlink_address.nl_groups = 0; // no multicast
struct msghdr msg = {
&netlink_address, sizeof(netlink_address), iov, iovlen, nullptr, 0, 0};
if (kernel_->sendmsg(socket_fd_, &msg, 0) < 0) {
QUIC_LOG(ERROR) << "sendmsg failed";
CloseSocket();
return false;
}
return true;
}
bool Netlink::Recv(uint32_t seq, NetlinkParserInterface* parser) {
sockaddr_nl netlink_address;
// replies can span multiple packets
for (;;) {
socklen_t address_length = sizeof(netlink_address);
// First, call recvfrom with buffer size of 0 and MSG_PEEK | MSG_TRUNC set
// so that we know the size of the incoming packet before actually receiving
// it.
int next_packet_size = kernel_->recvfrom(
socket_fd_, recvbuf_.get(), /* len = */ 0, MSG_PEEK | MSG_TRUNC,
reinterpret_cast<struct sockaddr*>(&netlink_address), &address_length);
if (next_packet_size < 0) {
QUIC_LOG(ERROR)
<< "error recvfrom with MSG_PEEK | MSG_TRUNC to get packet length.";
CloseSocket();
return false;
}
QUIC_VLOG(3) << "netlink packet size: " << next_packet_size;
if (next_packet_size > recvbuf_length_) {
QUIC_VLOG(2) << "resizing recvbuf to " << next_packet_size;
ResetRecvBuf(next_packet_size);
}
// Get the packet for real.
memset(recvbuf_.get(), 0, recvbuf_length_);
int len = kernel_->recvfrom(
socket_fd_, recvbuf_.get(), recvbuf_length_, /* flags = */ 0,
reinterpret_cast<struct sockaddr*>(&netlink_address), &address_length);
QUIC_VLOG(3) << "recvfrom returned: " << len;
if (len < 0) {
QUIC_LOG(INFO) << "can't receive netlink packet";
CloseSocket();
return false;
}
// there may be multiple nlmsg's in each reply packet
struct nlmsghdr* netlink_message;
for (netlink_message = reinterpret_cast<struct nlmsghdr*>(recvbuf_.get());
NLMSG_OK(netlink_message, len);
netlink_message = NLMSG_NEXT(netlink_message, len)) {
QUIC_VLOG(3) << "netlink_message->nlmsg_type = "
<< netlink_message->nlmsg_type;
// make sure this is to us
if (netlink_message->nlmsg_seq != seq) {
QUIC_LOG(INFO) << "netlink_message not meant for us."
<< " seq: " << seq
<< " nlmsg_seq: " << netlink_message->nlmsg_seq;
continue;
}
// done with this whole reply (not just this particular packet)
if (netlink_message->nlmsg_type == NLMSG_DONE) {
return true;
}
if (netlink_message->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr* err =
reinterpret_cast<struct nlmsgerr*>(NLMSG_DATA(netlink_message));
if (netlink_message->nlmsg_len <
NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
QUIC_LOG(INFO) << "netlink_message ERROR truncated";
} else {
// an ACK
if (err->error == 0) {
QUIC_VLOG(3) << "Netlink sent an ACK";
return true;
}
QUIC_LOG(INFO) << "netlink_message ERROR: " << err->error;
}
return false;
}
parser->Run(netlink_message);
}
}
}
} // namespace quic