Update four-pass algorithm to reduce copying and conform to draft-ietf-quic-load-balancers-19.

Code is not in production.

Performance (ns, Unencrypted / 3-pass / 1-pass)
Tip-of-tree: 20.9/102/26.3
This CL: 21.2/96.7/26.4
PiperOrigin-RevId: 604412894
diff --git a/quiche/quic/load_balancer/load_balancer_config.cc b/quiche/quic/load_balancer/load_balancer_config.cc
index 6dfb30c..70c2590 100644
--- a/quiche/quic/load_balancer/load_balancer_config.cc
+++ b/quiche/quic/load_balancer/load_balancer_config.cc
@@ -89,6 +89,7 @@
              : std::optional<LoadBalancerConfig>();
 }
 
+// Note that |ciphertext| does not include the first byte of the connection ID.
 bool LoadBalancerConfig::FourPassDecrypt(
     absl::Span<const uint8_t> ciphertext,
     LoadBalancerServerId& server_id) const {
@@ -103,7 +104,7 @@
   // Do 3 or 4 passes. Only 3 are necessary if the server_id is short enough
   // to fit in the first half of the connection ID (the decoder doesn't need
   // to extract the nonce).
-  uint8_t left[kLoadBalancerBlockSize];
+  uint8_t* left = server_id.mutable_data();
   uint8_t right[kLoadBalancerBlockSize];
   uint8_t half_len;  // half the length of the plaintext, rounded up
   bool is_length_odd =
@@ -117,19 +118,18 @@
   // Consolidate left and right into a server ID with minimum copying.
   if (server_id_len_ < half_len ||
       (server_id_len_ == half_len && !is_length_odd)) {
-    // There is no half-byte to handle
-    memcpy(server_id.mutable_data(), &left[2], server_id_len_);
+    // There is no half-byte to handle. Server ID is already written in to
+    // server_id.
     return true;
   }
   if (is_length_odd) {
-    right[2] |= left[half_len-- + 1];  // Combine the halves of the odd byte.
+    right[0] |= *(left + --half_len);  // Combine the halves of the odd byte.
   }
-  memcpy(server_id.mutable_data(), &left[2], half_len);
-  memcpy(server_id.mutable_data() + half_len, &right[2],
-         server_id_len_ - half_len);
+  memcpy(server_id.mutable_data() + half_len, right, server_id_len_ - half_len);
   return true;
 }
 
+// Note that |plaintext| includes the first byte of the connection ID.
 QuicConnectionId LoadBalancerConfig::FourPassEncrypt(
     absl::Span<uint8_t> plaintext) const {
   if (plaintext.size() < total_len()) {
@@ -151,14 +151,10 @@
   // Consolidate left and right into a server ID with minimum copying.
   if (is_length_odd) {
     // Combine the halves of the odd byte.
-    left[half_len + 1] |= right[2];
+    right[0] |= left[--half_len];
   }
-  memcpy(plaintext.data() + 1, &left[2], half_len);
-  if (is_length_odd) {
-    memcpy(plaintext.data() + 1 + half_len, &right[3], half_len - 1);
-  } else {
-    memcpy(plaintext.data() + 1 + half_len, &right[2], half_len);
-  }
+  memcpy(plaintext.data() + 1, left, half_len);
+  memcpy(plaintext.data() + half_len + 1, right, plaintext_len() - half_len);
   return QuicConnectionId(reinterpret_cast<char*>(plaintext.data()),
                           total_len());
 }
@@ -195,6 +191,7 @@
                              ? BuildKey(key, /* encrypt = */ false)
                              : std::optional<AES_KEY>()) {}
 
+// Note that |input| does not include the first byte of the connection ID.
 bool LoadBalancerConfig::InitializeFourPass(const uint8_t* input, uint8_t* left,
                                             uint8_t* right,
                                             uint8_t* half_len) const {
@@ -210,17 +207,17 @@
   memset(right, 0, kLoadBalancerBlockSize);
   // The first byte is the plaintext/ciphertext length, the second byte will be
   // the index of the pass. Half the plaintext or ciphertext follows.
-  left[0] = plaintext_len();
-  right[0] = plaintext_len();
-  // Leave left_[1], right_[1] as zero. It will be set for each pass.
-  memcpy(&left[2], input, *half_len);
+  left[kLoadBalancerBlockSize - 2] = plaintext_len();
+  right[kLoadBalancerBlockSize - 2] = plaintext_len();
+  // Leave left_[15]], right_[15] as zero. It will be set for each pass.
+  memcpy(left, input, *half_len);
   // If is_length_odd, then both left and right will have part of the middle
   // byte. Then that middle byte will be split in half via the bitmask in the
   // next step.
-  memcpy(&right[2], input + (plaintext_len() / 2), *half_len);
+  memcpy(right, input + (plaintext_len() / 2), *half_len);
   if (is_length_odd) {
-    left[*half_len + 1] &= 0xf0;
-    right[2] &= 0x0f;
+    left[*half_len - 1] &= 0xf0;
+    right[0] &= 0x0f;
   }
   return is_length_odd;
 }
@@ -230,26 +227,26 @@
                                         uint8_t* right) const {
   uint8_t ciphertext[kLoadBalancerBlockSize];
   if (index % 2 == 0) {  // Go right to left.
-    right[1] = index;
+    right[kLoadBalancerBlockSize - 1] = index;
     AES_encrypt(right, ciphertext, &*key_);
     for (int i = 0; i < half_len; ++i) {
       // Skip over the first two bytes, which have the plaintext_len and the
       // index. The CID bits are in [2, half_len - 1].
-      left[2 + i] ^= ciphertext[i];
+      left[i] ^= ciphertext[i];
     }
     if (is_length_odd) {
-      left[half_len + 1] &= 0xf0;
+      left[half_len - 1] &= 0xf0;
     }
     return;
   }
   // Go left to right.
-  left[1] = index;
+  left[kLoadBalancerBlockSize - 1] = index;
   AES_encrypt(left, ciphertext, &*key_);
   for (int i = 0; i < half_len; ++i) {
-    right[2 + i] ^= ciphertext[i];
+    right[i] ^= ciphertext[i];
   }
   if (is_length_odd) {
-    right[2] &= 0x0f;
+    right[0] &= 0x0f;
   }
 }
 
diff --git a/quiche/quic/load_balancer/load_balancer_config_test.cc b/quiche/quic/load_balancer/load_balancer_config_test.cc
index dd34fb1..20dcd55 100644
--- a/quiche/quic/load_balancer/load_balancer_config_test.cc
+++ b/quiche/quic/load_balancer/load_balancer_config_test.cc
@@ -104,7 +104,7 @@
 }
 
 // Compare EncryptionPass() results to the example in
-// draft-ietf-quic-load-balancers-15, Section 4.3.2.
+// draft-ietf-quic-load-balancers-19, Section 4.3.2.
 TEST_F(LoadBalancerConfigTest, TestEncryptionPassExample) {
   auto config =
       LoadBalancerConfig::Create(0, 3, 4, absl::string_view(raw_key, 16));
@@ -120,30 +120,30 @@
   std::array<std::array<uint8_t, kLoadBalancerBlockSize>,
              kNumLoadBalancerCryptoPasses + 1>
       expected_left = {{
-          {0x07, 0x00, 0x31, 0x44, 0x1a, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x01, 0x31, 0x44, 0x1a, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x01, 0x02, 0x8e, 0x1b, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x03, 0x02, 0x8e, 0x1b, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x03, 0x8e, 0x9a, 0x91, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
+          {0x31, 0x44, 0x1a, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x00},
+          {0x31, 0x44, 0x1a, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x01},
+          {0xd4, 0xa0, 0x48, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x01},
+          {0xd4, 0xa0, 0x48, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x03},
+          {0x67, 0x94, 0x7d, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x03},
       }};
   std::array<std::array<uint8_t, kLoadBalancerBlockSize>,
              kNumLoadBalancerCryptoPasses + 1>
       expected_right = {{
-          {0x07, 0x00, 0x0c, 0x69, 0xc2, 0x75, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x00, 0x0f, 0x1a, 0x5b, 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x02, 0x0f, 0x1a, 0x5b, 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x02, 0x04, 0x94, 0x97, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
-          {0x07, 0x04, 0x04, 0x94, 0x97, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00,
-           0x00, 0x00, 0x00, 0x00, 0x00},
+          {0x0c, 0x69, 0xc2, 0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x00},
+          {0x0e, 0x3c, 0x1f, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x00},
+          {0x0e, 0x3c, 0x1f, 0xf9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x02},
+          {0x09, 0xbe, 0x05, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x02},
+          {0x09, 0xbe, 0x05, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+           0x00, 0x00, 0x00, 0x07, 0x04},
       }};
 
   EXPECT_EQ(left, expected_left[0]);
@@ -163,12 +163,12 @@
   auto config =
       LoadBalancerConfig::Create(0, 3, 4, absl::string_view(raw_key, 16));
   std::array<uint8_t, kLoadBalancerBlockSize> start_left = {
-      0x07, 0x00, 0x31, 0x44, 0x1a, 0x90, 0x00, 0x00,
-      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x31, 0x44, 0x1a, 0x90, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
   };
   std::array<uint8_t, kLoadBalancerBlockSize> start_right = {
-      0x07, 0x00, 0x0c, 0x69, 0xc2, 0x75, 0x00, 0x00,
-      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x0c, 0x69, 0xc2, 0x75, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
   };
   std::array<uint8_t, kLoadBalancerBlockSize> left = start_left,
                                               right = start_right;
@@ -183,8 +183,8 @@
                                          right.data());
   // Since index is manually written into the second byte only on input, it is
   // not reversible.
-  left[1] = 0;
-  right[1] = 0;
+  left[15] = 0;
+  right[15] = 0;
   EXPECT_EQ(left, start_left);
   EXPECT_EQ(right, start_right);
 }
@@ -216,7 +216,7 @@
 }
 
 // Block decrypt test from the Test Vector in
-// draft-ietf-quic-load-balancers-15, Appendix B.
+// draft-ietf-quic-load-balancers-19, Appendix B.
 TEST_F(LoadBalancerConfigTest, BlockEncryptionExample) {
   const uint8_t ptext[] = {0xed, 0x79, 0x3a, 0x51, 0xd4, 0x9b, 0x8f, 0x5f,
                            0xee, 0x08, 0x0d, 0xbf, 0x48, 0xc0, 0xd1, 0xe5};
diff --git a/quiche/quic/load_balancer/load_balancer_decoder_test.cc b/quiche/quic/load_balancer/load_balancer_decoder_test.cc
index 6e8a399..890557f 100644
--- a/quiche/quic/load_balancer/load_balancer_decoder_test.cc
+++ b/quiche/quic/load_balancer/load_balancer_decoder_test.cc
@@ -63,7 +63,7 @@
   }
 }
 
-// Compare test vectors from Appendix B of draft-ietf-quic-load-balancers-15.
+// Compare test vectors from Appendix B of draft-ietf-quic-load-balancers-19.
 TEST_F(LoadBalancerDecoderTest, DecoderTestVectors) {
   // Try (1) the "standard" CID length of 8
   // (2) server_id_len > nonce_len, so there is a fourth decryption pass
@@ -72,13 +72,13 @@
   const struct LoadBalancerDecoderTestCase test_vectors[4] = {
       {
           *LoadBalancerConfig::Create(0, 3, 4, kKey),
-          QuicConnectionId({0x07, 0x41, 0x26, 0xee, 0x38, 0xbf, 0x54, 0x54}),
+          QuicConnectionId({0x07, 0x20, 0xb1, 0xd0, 0x7b, 0x35, 0x9d, 0x3c}),
           MakeServerId(kServerId, 3),
       },
       {
           *LoadBalancerConfig::Create(1, 10, 5, kKey),
-          QuicConnectionId({0x2f, 0xcd, 0x3f, 0x57, 0x2d, 0x4e, 0xef, 0xb0,
-                            0x46, 0xfd, 0xb5, 0x1d, 0x16, 0x4e, 0xfc, 0xcc}),
+          QuicConnectionId({0x2f, 0xcc, 0x38, 0x1b, 0xc7, 0x4c, 0xb4, 0xfb,
+                            0xad, 0x28, 0x23, 0xa3, 0xd1, 0xf8, 0xfe, 0xd2}),
           MakeServerId(kServerId, 10),
       },
       {
@@ -89,10 +89,10 @@
           MakeServerId(kServerId, 8),
       },
       {
-          *LoadBalancerConfig::Create(3, 9, 9, kKey),
-          QuicConnectionId({0x72, 0x12, 0x4d, 0x1e, 0xb8, 0xfb, 0xb2, 0x1e,
-                            0x4a, 0x49, 0x0c, 0xa5, 0x3c, 0xfe, 0x21, 0xd0,
-                            0x4a, 0xe6, 0x3a}),
+          *LoadBalancerConfig::Create(0, 9, 9, kKey),
+          QuicConnectionId({0x12, 0x57, 0x79, 0xc9, 0xcc, 0x86, 0xbe, 0xb3,
+                            0xa3, 0xa4, 0xa3, 0xca, 0x96, 0xfc, 0xe4, 0xbf,
+                            0xe0, 0xcd, 0xbc}),
           MakeServerId(kServerId, 9),
       },
   };
diff --git a/quiche/quic/load_balancer/load_balancer_encoder_test.cc b/quiche/quic/load_balancer/load_balancer_encoder_test.cc
index c48de0d..f776eac 100644
--- a/quiche/quic/load_balancer/load_balancer_encoder_test.cc
+++ b/quiche/quic/load_balancer/load_balancer_encoder_test.cc
@@ -194,7 +194,7 @@
   }
 }
 
-// Follow example in draft-ietf-quic-load-balancers-15.
+// Follow example in draft-ietf-quic-load-balancers-19.
 TEST_F(LoadBalancerEncoderTest, FollowSpecExample) {
   const uint8_t config_id = 0, server_id_len = 3, nonce_len = 4;
   const uint8_t raw_server_id[] = {
@@ -215,14 +215,14 @@
   EXPECT_TRUE(
       encoder->UpdateConfig(*config, LoadBalancerServerId(raw_server_id)));
   EXPECT_TRUE(encoder->IsEncoding());
-  const char raw_connection_id[] = {0x07, 0x8e, 0x9a, 0x91,
-                                    0xf4, 0x94, 0x97, 0x62};
+  const char raw_connection_id[] = {0x07, 0x67, 0x94, 0x7d,
+                                    0x29, 0xbe, 0x05, 0x4a};
   auto expected =
       QuicConnectionId(raw_connection_id, 1 + server_id_len + nonce_len);
   EXPECT_EQ(encoder->GenerateConnectionId(), expected);
 }
 
-// Compare test vectors from Appendix B of draft-ietf-quic-load-balancers-15.
+// Compare test vectors from Appendix B of draft-ietf-quic-load-balancers-19.
 TEST_F(LoadBalancerEncoderTest, EncoderTestVectors) {
   // Try (1) the "standard" ConnectionId length of 8
   // (2) server_id_len > nonce_len, so there is a fourth decryption pass
@@ -231,13 +231,13 @@
   const LoadBalancerEncoderTestCase test_vectors[4] = {
       {
           *LoadBalancerConfig::Create(0, 3, 4, kKey),
-          QuicConnectionId({0x07, 0x41, 0x26, 0xee, 0x38, 0xbf, 0x54, 0x54}),
+          QuicConnectionId({0x07, 0x20, 0xb1, 0xd0, 0x7b, 0x35, 0x9d, 0x3c}),
           MakeServerId(kServerId, 3),
       },
       {
           *LoadBalancerConfig::Create(1, 10, 5, kKey),
-          QuicConnectionId({0x2f, 0xcd, 0x3f, 0x57, 0x2d, 0x4e, 0xef, 0xb0,
-                            0x46, 0xfd, 0xb5, 0x1d, 0x16, 0x4e, 0xfc, 0xcc}),
+          QuicConnectionId({0x2f, 0xcc, 0x38, 0x1b, 0xc7, 0x4c, 0xb4, 0xfb,
+                            0xad, 0x28, 0x23, 0xa3, 0xd1, 0xf8, 0xfe, 0xd2}),
           MakeServerId(kServerId, 10),
       },
       {
@@ -249,9 +249,9 @@
       },
       {
           *LoadBalancerConfig::Create(0, 9, 9, kKey),
-          QuicConnectionId({0x12, 0x12, 0x4d, 0x1e, 0xb8, 0xfb, 0xb2, 0x1e,
-                            0x4a, 0x49, 0x0c, 0xa5, 0x3c, 0xfe, 0x21, 0xd0,
-                            0x4a, 0xe6, 0x3a}),
+          QuicConnectionId({0x12, 0x57, 0x79, 0xc9, 0xcc, 0x86, 0xbe, 0xb3,
+                            0xa3, 0xa4, 0xa3, 0xca, 0x96, 0xfc, 0xe4, 0xbf,
+                            0xe0, 0xcd, 0xbc}),
           MakeServerId(kServerId, 9),
       },
   };
@@ -277,7 +277,7 @@
   LoadBalancerEncoderPeer::SetNumNoncesLeft(*encoder, 2);
   EXPECT_EQ(encoder->num_nonces_left(), 2);
   EXPECT_EQ(encoder->GenerateConnectionId(),
-            QuicConnectionId({0x07, 0x1d, 0x4a, 0xb8, 0xc6, 0x1d, 0xd6, 0x5d}));
+            QuicConnectionId({0x07, 0x29, 0xd8, 0xc2, 0x17, 0xce, 0x2d, 0x92}));
   EXPECT_EQ(encoder->num_nonces_left(), 1);
   encoder->GenerateConnectionId();
   EXPECT_EQ(encoder->IsEncoding(), false);