| // Copyright (c) 2019 The Chromium Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | #include "quic/qbone/bonnet/tun_device_packet_exchanger.h" | 
 |  | 
 | #include <netinet/icmp6.h> | 
 | #include <netinet/ip6.h> | 
 |  | 
 | #include <utility> | 
 |  | 
 | #include "absl/strings/str_cat.h" | 
 | #include "quic/qbone/platform/icmp_packet.h" | 
 | #include "quic/qbone/platform/netlink_interface.h" | 
 | #include "quic/qbone/qbone_constants.h" | 
 |  | 
 | namespace quic { | 
 |  | 
 | TunDevicePacketExchanger::TunDevicePacketExchanger( | 
 |     size_t mtu, KernelInterface* kernel, NetlinkInterface* netlink, | 
 |     QbonePacketExchanger::Visitor* visitor, size_t max_pending_packets, | 
 |     bool is_tap, StatsInterface* stats, absl::string_view ifname) | 
 |     : QbonePacketExchanger(visitor, max_pending_packets), | 
 |       mtu_(mtu), | 
 |       kernel_(kernel), | 
 |       netlink_(netlink), | 
 |       ifname_(ifname), | 
 |       is_tap_(is_tap), | 
 |       stats_(stats) { | 
 |   if (is_tap_) { | 
 |     mtu_ += ETH_HLEN; | 
 |   } | 
 | } | 
 |  | 
 | bool TunDevicePacketExchanger::WritePacket(const char* packet, size_t size, | 
 |                                            bool* blocked, std::string* error) { | 
 |   *blocked = false; | 
 |   if (fd_ < 0) { | 
 |     *error = absl::StrCat("Invalid file descriptor of the TUN device: ", fd_); | 
 |     stats_->OnWriteError(error); | 
 |     return false; | 
 |   } | 
 |  | 
 |   auto buffer = std::make_unique<QuicData>(packet, size); | 
 |   if (is_tap_) { | 
 |     buffer = ApplyL2Headers(*buffer); | 
 |   } | 
 |   int result = kernel_->write(fd_, buffer->data(), buffer->length()); | 
 |   if (result == -1) { | 
 |     if (errno == EWOULDBLOCK || errno == EAGAIN) { | 
 |       // The tunnel is blocked. Note that this does not mean the receive buffer | 
 |       // of a TCP connection is filled. This simply means the TUN device itself | 
 |       // is blocked on handing packets to the rest part of the kernel. | 
 |       *error = absl::StrCat("Write to the TUN device was blocked: ", errno); | 
 |       *blocked = true; | 
 |       stats_->OnWriteError(error); | 
 |     } | 
 |     return false; | 
 |   } | 
 |   stats_->OnPacketWritten(result); | 
 |  | 
 |   return true; | 
 | } | 
 |  | 
 | std::unique_ptr<QuicData> TunDevicePacketExchanger::ReadPacket( | 
 |     bool* blocked, std::string* error) { | 
 |   *blocked = false; | 
 |   if (fd_ < 0) { | 
 |     *error = absl::StrCat("Invalid file descriptor of the TUN device: ", fd_); | 
 |     stats_->OnReadError(error); | 
 |     return nullptr; | 
 |   } | 
 |   // Reading on a TUN device returns a packet at a time. If the packet is longer | 
 |   // than the buffer, it's truncated. | 
 |   auto read_buffer = std::make_unique<char[]>(mtu_); | 
 |   int result = kernel_->read(fd_, read_buffer.get(), mtu_); | 
 |   // Note that 0 means end of file, but we're talking about a TUN device - there | 
 |   // is no end of file. Therefore 0 also indicates error. | 
 |   if (result <= 0) { | 
 |     if (errno == EAGAIN || errno == EWOULDBLOCK) { | 
 |       *error = absl::StrCat("Read from the TUN device was blocked: ", errno); | 
 |       *blocked = true; | 
 |       stats_->OnReadError(error); | 
 |     } | 
 |     return nullptr; | 
 |   } | 
 |  | 
 |   auto buffer = std::make_unique<QuicData>(read_buffer.release(), result, true); | 
 |   if (is_tap_) { | 
 |     buffer = ConsumeL2Headers(*buffer); | 
 |   } | 
 |   if (buffer) { | 
 |     stats_->OnPacketRead(buffer->length()); | 
 |   } | 
 |   return buffer; | 
 | } | 
 |  | 
 | void TunDevicePacketExchanger::set_file_descriptor(int fd) { fd_ = fd; } | 
 |  | 
 | const TunDevicePacketExchanger::StatsInterface* | 
 | TunDevicePacketExchanger::stats_interface() const { | 
 |   return stats_; | 
 | } | 
 |  | 
 | std::unique_ptr<QuicData> TunDevicePacketExchanger::ApplyL2Headers( | 
 |     const QuicData& l3_packet) { | 
 |   if (is_tap_ && !mac_initialized_) { | 
 |     NetlinkInterface::LinkInfo link_info{}; | 
 |     if (netlink_->GetLinkInfo(ifname_, &link_info)) { | 
 |       memcpy(tap_mac_, link_info.hardware_address, ETH_ALEN); | 
 |       mac_initialized_ = true; | 
 |     } else { | 
 |       QUIC_LOG_EVERY_N_SEC(ERROR, 30) | 
 |           << "Unable to get link info for: " << ifname_; | 
 |     } | 
 |   } | 
 |  | 
 |   const auto l2_packet_size = l3_packet.length() + ETH_HLEN; | 
 |   auto l2_buffer = std::make_unique<char[]>(l2_packet_size); | 
 |  | 
 |   // Populate the Ethernet header | 
 |   auto* hdr = reinterpret_cast<ethhdr*>(l2_buffer.get()); | 
 |   // Set src & dst to my own address | 
 |   memcpy(hdr->h_dest, tap_mac_, ETH_ALEN); | 
 |   memcpy(hdr->h_source, tap_mac_, ETH_ALEN); | 
 |   // Assume ipv6 for now | 
 |   // TODO(b/195113643): Support additional protocols. | 
 |   hdr->h_proto = absl::ghtons(ETH_P_IPV6); | 
 |  | 
 |   // Copy the l3 packet into buffer, just after the ethernet header. | 
 |   memcpy(l2_buffer.get() + ETH_HLEN, l3_packet.data(), l3_packet.length()); | 
 |  | 
 |   return std::make_unique<QuicData>(l2_buffer.release(), l2_packet_size, true); | 
 | } | 
 |  | 
 | std::unique_ptr<QuicData> TunDevicePacketExchanger::ConsumeL2Headers( | 
 |     const QuicData& l2_packet) { | 
 |   if (l2_packet.length() < ETH_HLEN) { | 
 |     // Packet is too short for ethernet headers. Drop it. | 
 |     return nullptr; | 
 |   } | 
 |   auto* hdr = reinterpret_cast<const ethhdr*>(l2_packet.data()); | 
 |   if (hdr->h_proto != absl::ghtons(ETH_P_IPV6)) { | 
 |     return nullptr; | 
 |   } | 
 |   constexpr auto kIp6PrefixLen = ETH_HLEN + sizeof(ip6_hdr); | 
 |   constexpr auto kIcmp6PrefixLen = kIp6PrefixLen + sizeof(icmp6_hdr); | 
 |   if (l2_packet.length() < kIp6PrefixLen) { | 
 |     // Packet is too short to be ipv6. Drop it. | 
 |     return nullptr; | 
 |   } | 
 |   auto* ip_hdr = reinterpret_cast<const ip6_hdr*>(l2_packet.data() + ETH_HLEN); | 
 |   const bool is_icmp = ip_hdr->ip6_ctlun.ip6_un1.ip6_un1_nxt == IPPROTO_ICMPV6; | 
 |  | 
 |   bool is_neighbor_solicit = false; | 
 |   if (is_icmp) { | 
 |     if (l2_packet.length() < kIcmp6PrefixLen) { | 
 |       // Packet is too short to be icmp6. Drop it. | 
 |       return nullptr; | 
 |     } | 
 |     is_neighbor_solicit = | 
 |         reinterpret_cast<const icmp6_hdr*>(l2_packet.data() + kIp6PrefixLen) | 
 |             ->icmp6_type == ND_NEIGHBOR_SOLICIT; | 
 |   } | 
 |  | 
 |   if (is_neighbor_solicit) { | 
 |     // If we've received a neighbor solicitation, craft an advertisement to | 
 |     // respond with and write it back to the local interface. | 
 |     auto* icmp6_payload = l2_packet.data() + kIcmp6PrefixLen; | 
 |  | 
 |     QuicIpAddress target_address( | 
 |         *reinterpret_cast<const in6_addr*>(icmp6_payload)); | 
 |     if (target_address != *QboneConstants::GatewayAddress()) { | 
 |       // Only respond to solicitations for our gateway address | 
 |       return nullptr; | 
 |     } | 
 |  | 
 |     // Neighbor Advertisement crafted per: | 
 |     // https://datatracker.ietf.org/doc/html/rfc4861#section-4.4 | 
 |     // | 
 |     // Using the Target link-layer address option defined at: | 
 |     // https://datatracker.ietf.org/doc/html/rfc4861#section-4.6.1 | 
 |     constexpr size_t kIcmpv6OptionSize = 8; | 
 |     const int payload_size = sizeof(in6_addr) + kIcmpv6OptionSize; | 
 |     auto payload = std::make_unique<char[]>(payload_size); | 
 |     // Place the solicited IPv6 address at the beginning of the response payload | 
 |     memcpy(payload.get(), icmp6_payload, sizeof(in6_addr)); | 
 |     // Setup the Target link-layer address option: | 
 |     //      0                   1                   2                   3 | 
 |     //  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 
 |     // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 
 |     // |     Type      |    Length     |    Link-Layer Address ... | 
 |     // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 
 |     int pos = sizeof(in6_addr); | 
 |     payload[pos++] = ND_OPT_TARGET_LINKADDR;    // Type | 
 |     payload[pos++] = 1;                         // Length in units of 8 octets | 
 |     memcpy(&payload[pos], tap_mac_, ETH_ALEN);  // This interfaces' MAC address | 
 |  | 
 |     // Populate the ICMPv6 header | 
 |     icmp6_hdr response_hdr{}; | 
 |     response_hdr.icmp6_type = ND_NEIGHBOR_ADVERT; | 
 |     // Set the solicited bit to true | 
 |     response_hdr.icmp6_dataun.icmp6_un_data8[0] = 64; | 
 |     // Craft the full ICMPv6 packet and then ship it off to WritePacket | 
 |     // to have it frame it with L2 headers and send it back to the requesting | 
 |     // neighbor. | 
 |     CreateIcmpPacket(ip_hdr->ip6_src, ip_hdr->ip6_src, response_hdr, | 
 |                      absl::string_view(payload.get(), payload_size), | 
 |                      [this](absl::string_view packet) { | 
 |                        bool blocked; | 
 |                        std::string error; | 
 |                        WritePacket(packet.data(), packet.size(), &blocked, | 
 |                                    &error); | 
 |                      }); | 
 |     // Do not forward the neighbor solicitation through the tunnel since it's | 
 |     // link-local. | 
 |     return nullptr; | 
 |   } | 
 |  | 
 |   // If this isn't a Neighbor Solicitation, remove the L2 headers and forward | 
 |   // it as though it were an L3 packet. | 
 |   const auto l3_packet_size = l2_packet.length() - ETH_HLEN; | 
 |   auto shift_buffer = std::make_unique<char[]>(l3_packet_size); | 
 |   memcpy(shift_buffer.get(), l2_packet.data() + ETH_HLEN, l3_packet_size); | 
 |  | 
 |   return std::make_unique<QuicData>(shift_buffer.release(), l3_packet_size, | 
 |                                     true); | 
 | } | 
 |  | 
 | }  // namespace quic |