Adds a BB2U copt causes QUIC BBR2 to wait two rounds with out draining the queue before exiting PROBE_UP and BB2S has the same effect in STARTUP.

BB2U achieves 18% of the full bandwidth in PROBE_UP with 100x bw increase.

Protected by quic_reloadable_flag_quic_bbr2_probe_two_rounds.

PiperOrigin-RevId: 492320582
diff --git a/quiche/quic/core/congestion_control/bbr2_misc.cc b/quiche/quic/core/congestion_control/bbr2_misc.cc
index d234f5b..55e63e9 100644
--- a/quiche/quic/core/congestion_control/bbr2_misc.cc
+++ b/quiche/quic/core/congestion_control/bbr2_misc.cc
@@ -379,6 +379,7 @@
 void Bbr2NetworkModel::RestartRoundEarly() {
   OnNewRound();
   round_trip_counter_.RestartRound();
+  rounds_with_queueing_ = 0;
 }
 
 void Bbr2NetworkModel::OnNewRound() {
@@ -437,26 +438,23 @@
   return false;
 }
 
-bool Bbr2NetworkModel::CheckPersistentQueue(
-    const Bbr2CongestionEvent& congestion_event, float bdp_gain) {
+void Bbr2NetworkModel::CheckPersistentQueue(
+    const Bbr2CongestionEvent& congestion_event, float target_gain) {
   QUICHE_DCHECK(congestion_event.end_of_round_trip);
   QUICHE_DCHECK_NE(min_bytes_in_flight_in_round_,
                    std::numeric_limits<uint64_t>::max());
-  QuicByteCount target = bdp_gain * BDP();
-  if (bdp_gain >= 2) {
-    // Use a more conservative threshold for STARTUP because CWND gain is 2.
-    if (target <= QueueingThresholdExtraBytes()) {
-      return false;
-    }
-    target -= QueueingThresholdExtraBytes();
-  } else {
-    target += QueueingThresholdExtraBytes();
+  QUICHE_DCHECK_GE(target_gain, Params().full_bw_threshold);
+  QuicByteCount target =
+      std::max(static_cast<QuicByteCount>(target_gain * BDP()),
+               BDP() + QueueingThresholdExtraBytes());
+  if (min_bytes_in_flight_in_round_ < target) {
+    rounds_with_queueing_ = 0;
+    return;
   }
-  if (min_bytes_in_flight_in_round_ > target) {
+  rounds_with_queueing_++;
+  if (rounds_with_queueing_ >= Params().max_startup_queue_rounds) {
     full_bandwidth_reached_ = true;
-    return true;
   }
-  return false;
 }
 
 }  // namespace quic
diff --git a/quiche/quic/core/congestion_control/bbr2_misc.h b/quiche/quic/core/congestion_control/bbr2_misc.h
index e817e74..aac7f80 100644
--- a/quiche/quic/core/congestion_control/bbr2_misc.h
+++ b/quiche/quic/core/congestion_control/bbr2_misc.h
@@ -86,6 +86,11 @@
 
   QuicRoundTripCount startup_full_bw_rounds = 3;
 
+  // Number of rounds to stay in STARTUP when there's a sufficient queue that
+  // bytes_in_flight never drops below the target (1.75 * BDP).  0 indicates the
+  // feature is disabled and we never exit due to queueing.
+  QuicRoundTripCount max_startup_queue_rounds = 0;
+
   // The minimum number of loss marking events to exit STARTUP.
   int64_t startup_full_loss_count =
       GetQuicFlag(quic_bbr2_default_startup_full_loss_count);
@@ -98,9 +103,6 @@
   // acked when bandwidth increases.
   bool startup_include_extra_acked = false;
 
-  // If true, exit STARTUP if bytes in flight has not gone below 2 * BDP at
-  // any point in the last round.
-  bool exit_startup_on_persistent_queue = false;
 
   /*
    * DRAIN parameters.
@@ -151,10 +153,14 @@
    * PROBE_UP parameters.
    */
   bool probe_up_includes_acks_after_cwnd_limited = false;
-  bool probe_up_dont_exit_if_no_queue_ = false;
   bool probe_up_ignore_inflight_hi = true;
   bool probe_up_simplify_inflight_hi = false;
 
+  // Number of rounds to stay in PROBE_UP when there's a sufficient queue that
+  // bytes_in_flight never drops below the target.  0 indicates the feature is
+  // disabled and we never exit due to queueing.
+  QuicRoundTripCount max_probe_up_queue_rounds = 0;
+
   /*
    * PROBE_RTT parameters.
    */
@@ -455,10 +461,10 @@
   // |full_bandwidth_reached_| to true.
   bool HasBandwidthGrowth(const Bbr2CongestionEvent& congestion_event);
 
-  // Returns true if the minimum bytes in flight during the round is greater
-  // than the BDP * |bdp_gain|.
-  bool CheckPersistentQueue(const Bbr2CongestionEvent& congestion_event,
-                            float bdp_gain);
+  // Increments rounds_with_queueing_ if the minimum bytes in flight during the
+  // round is greater than the BDP * |target_gain|.
+  void CheckPersistentQueue(const Bbr2CongestionEvent& congestion_event,
+                            float target_gain);
 
   QuicPacketNumber last_sent_packet() const {
     return round_trip_counter_.last_sent_packet();
@@ -532,6 +538,9 @@
   QuicRoundTripCount rounds_without_bandwidth_growth() const {
     return rounds_without_bandwidth_growth_;
   }
+  QuicRoundTripCount rounds_with_queueing() const {
+    return rounds_with_queueing_;
+  }
 
  private:
   // Called when a new round trip starts.
@@ -592,6 +601,9 @@
   bool full_bandwidth_reached_ = false;
   QuicBandwidth full_bandwidth_baseline_ = QuicBandwidth::Zero();
   QuicRoundTripCount rounds_without_bandwidth_growth_ = 0;
+
+  // Used by STARTUP and PROBE_UP to decide when to exit.
+  QuicRoundTripCount rounds_with_queueing_ = 0;
 };
 
 enum class Bbr2Mode : uint8_t {
diff --git a/quiche/quic/core/congestion_control/bbr2_probe_bw.cc b/quiche/quic/core/congestion_control/bbr2_probe_bw.cc
index 1e35b97..5dd509f 100644
--- a/quiche/quic/core/congestion_control/bbr2_probe_bw.cc
+++ b/quiche/quic/core/congestion_control/bbr2_probe_bw.cc
@@ -10,7 +10,6 @@
 #include "quiche/quic/core/quic_time.h"
 #include "quiche/quic/core/quic_types.h"
 #include "quiche/quic/platform/api/quic_flag_utils.h"
-#include "quiche/quic/platform/api/quic_flags.h"
 #include "quiche/quic/platform/api/quic_logging.h"
 
 namespace quic {
@@ -198,11 +197,7 @@
     if (cycle_.is_sample_from_probing) {
       cycle_.is_sample_from_probing = false;
       if (!send_state.is_app_limited ||
-          Params().probe_up_dont_exit_if_no_queue_) {
-        if (send_state.is_app_limited) {
-          // If there's excess loss or a queue is building, exit even if the
-          // last sample was app limited.
-        }
+          Params().max_probe_up_queue_rounds > 0) {
         const QuicByteCount inflight_target =
             sender_->GetTargetBytesInflight() * (1.0 - Params().beta);
         if (inflight_at_send >= inflight_target) {
@@ -497,10 +492,16 @@
     // TCP uses min_rtt instead of a full round:
     //   HasPhaseLasted(model_->MinRtt(), congestion_event)
   } else if (cycle_.rounds_in_phase > 0) {
-    if (Params().probe_up_dont_exit_if_no_queue_) {
-      is_queuing = congestion_event.end_of_round_trip &&
-                   model_->CheckPersistentQueue(congestion_event,
-                                                Params().full_bw_threshold);
+    if (Params().max_probe_up_queue_rounds > 0) {
+      if (congestion_event.end_of_round_trip) {
+        model_->CheckPersistentQueue(congestion_event,
+                                     Params().full_bw_threshold);
+        if (model_->rounds_with_queueing() >=
+            Params().max_probe_up_queue_rounds) {
+          QUIC_RELOADABLE_FLAG_COUNT_N(quic_bbr2_probe_two_rounds, 1, 3);
+          is_queuing = true;
+        }
+      }
     } else {
       QuicByteCount queuing_threshold_extra_bytes =
           model_->QueueingThresholdExtraBytes();
diff --git a/quiche/quic/core/congestion_control/bbr2_sender.cc b/quiche/quic/core/congestion_control/bbr2_sender.cc
index 86e00ed..00c136a 100644
--- a/quiche/quic/core/congestion_control/bbr2_sender.cc
+++ b/quiche/quic/core/congestion_control/bbr2_sender.cc
@@ -173,7 +173,7 @@
     params_.probe_bw_check_cwnd_limited_before_aggregation_epoch = true;
   }
   if (ContainsQuicTag(connection_options, kB202)) {
-    params_.probe_up_dont_exit_if_no_queue_ = true;
+    params_.max_probe_up_queue_rounds = 1;
   }
   if (ContainsQuicTag(connection_options, kB203)) {
     params_.probe_up_ignore_inflight_hi = false;
@@ -185,7 +185,7 @@
     params_.startup_include_extra_acked = true;
   }
   if (ContainsQuicTag(connection_options, kB207)) {
-    params_.exit_startup_on_persistent_queue = true;
+    params_.max_startup_queue_rounds = 1;
   }
   if (ContainsQuicTag(connection_options, kBBRA)) {
     model_.SetStartNewAggregationEpochAfterFullRound(true);
@@ -211,6 +211,16 @@
     // so ensure we're not ignoring it.
     params_.probe_up_ignore_inflight_hi = false;
   }
+  if (GetQuicReloadableFlag(quic_bbr2_probe_two_rounds) &&
+      ContainsQuicTag(connection_options, kBB2U)) {
+    QUIC_RELOADABLE_FLAG_COUNT_N(quic_bbr2_probe_two_rounds, 1, 3);
+    params_.max_probe_up_queue_rounds = 2;
+  }
+  if (GetQuicReloadableFlag(quic_bbr2_probe_two_rounds) &&
+      ContainsQuicTag(connection_options, kBB2S)) {
+    QUIC_RELOADABLE_FLAG_COUNT_N(quic_bbr2_probe_two_rounds, 2, 3);
+    params_.max_startup_queue_rounds = 2;
+  }
 }
 
 Limits<QuicByteCount> Bbr2Sender::GetCwndLimitsByMode() const {
diff --git a/quiche/quic/core/congestion_control/bbr2_simulator_test.cc b/quiche/quic/core/congestion_control/bbr2_simulator_test.cc
index 08237bb..e3757fe 100644
--- a/quiche/quic/core/congestion_control/bbr2_simulator_test.cc
+++ b/quiche/quic/core/congestion_control/bbr2_simulator_test.cc
@@ -444,6 +444,38 @@
   EXPECT_EQ(0u, sender_connection_stats().packets_lost);
 }
 
+// Add extra_acked to CWND in STARTUP and exit STARTUP on a persistent queue.
+TEST_F(Bbr2DefaultTopologyTest, NormalStartupBB2S) {
+  SetQuicReloadableFlag(quic_bbr2_probe_two_rounds, true);
+  SetConnectionOption(kBB2S);
+  DefaultTopologyParams params;
+  CreateNetwork(params);
+
+  // Run until the full bandwidth is reached and check how many rounds it was.
+  sender_endpoint_.AddBytesToTransfer(12 * 1024 * 1024);
+  QuicRoundTripCount max_bw_round = 0;
+  QuicBandwidth max_bw(QuicBandwidth::Zero());
+  bool simulator_result = simulator_.RunUntilOrTimeout(
+      [this, &max_bw, &max_bw_round]() {
+        if (max_bw < sender_->ExportDebugState().bandwidth_hi) {
+          max_bw = sender_->ExportDebugState().bandwidth_hi;
+          max_bw_round = sender_->ExportDebugState().round_trip_count;
+        }
+        return sender_->ExportDebugState().startup.full_bandwidth_reached;
+      },
+      QuicTime::Delta::FromSeconds(5));
+  ASSERT_TRUE(simulator_result);
+  EXPECT_EQ(Bbr2Mode::DRAIN, sender_->ExportDebugState().mode);
+  // BB2S reduces 3 rounds without bandwidth growth to 2.
+  EXPECT_EQ(2u, sender_->ExportDebugState().round_trip_count - max_bw_round);
+  EXPECT_EQ(
+      2u,
+      sender_->ExportDebugState().startup.round_trips_without_bandwidth_growth);
+  EXPECT_APPROX_EQ(params.BottleneckBandwidth(),
+                   sender_->ExportDebugState().bandwidth_hi, 0.01f);
+  EXPECT_EQ(0u, sender_connection_stats().packets_lost);
+}
+
 // Test a simple long data transfer in the default setup.
 TEST_F(Bbr2DefaultTopologyTest, SimpleTransfer) {
   DefaultTopologyParams params;
@@ -1246,6 +1278,120 @@
                    sender_->ExportDebugState().bandwidth_hi, 0.9f);
 }
 
+// Test Bbr2's reaction to a 100x bandwidth increase during a transfer with BB2U
+TEST_F(Bbr2DefaultTopologyTest, QUIC_SLOW_TEST(BandwidthIncreaseBB2U)) {
+  SetQuicReloadableFlag(quic_bbr2_probe_two_rounds, true);
+  SetConnectionOption(kBB2U);
+  DefaultTopologyParams params;
+  params.local_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(15000);
+  params.test_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(100);
+  CreateNetwork(params);
+
+  sender_endpoint_.AddBytesToTransfer(10 * 1024 * 1024);
+
+  simulator_.RunFor(QuicTime::Delta::FromSeconds(15));
+  EXPECT_TRUE(Bbr2ModeIsOneOf({Bbr2Mode::PROBE_BW, Bbr2Mode::PROBE_RTT}));
+  QUIC_LOG(INFO) << "Bandwidth increasing at time " << SimulatedNow();
+
+  EXPECT_APPROX_EQ(params.test_link.bandwidth,
+                   sender_->ExportDebugState().bandwidth_est, 0.1f);
+  EXPECT_LE(sender_loss_rate_in_packets(), 0.25);
+
+  // Now increase the bottleneck bandwidth from 100Kbps to 10Mbps.
+  params.test_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(10000);
+  TestLink()->set_bandwidth(params.test_link.bandwidth);
+
+  bool simulator_result = simulator_.RunUntilOrTimeout(
+      [this]() { return sender_endpoint_.bytes_to_transfer() == 0; },
+      QuicTime::Delta::FromSeconds(50));
+  EXPECT_TRUE(simulator_result);
+  // Ensure the full bandwidth is discovered.
+  EXPECT_APPROX_EQ(params.test_link.bandwidth,
+                   sender_->ExportDebugState().bandwidth_hi, 0.1f);
+}
+
+// Test Bbr2's reaction to a 100x bandwidth increase during a transfer with BB2U
+// in the presence of ACK aggregation.
+TEST_F(Bbr2DefaultTopologyTest,
+       QUIC_SLOW_TEST(BandwidthIncreaseBB2UAggregation)) {
+  SetQuicReloadableFlag(quic_bbr2_probe_two_rounds, true);
+  SetConnectionOption(kBB2U);
+  DefaultTopologyParams params;
+  params.local_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(15000);
+  params.test_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(100);
+  CreateNetwork(params);
+
+  // 2 RTTs of aggregation, with a max of 10kb.
+  EnableAggregation(10 * 1024, 2 * params.RTT());
+
+  // Reduce the payload to 5MB because 10MB takes too long.
+  sender_endpoint_.AddBytesToTransfer(5 * 1024 * 1024);
+
+  simulator_.RunFor(QuicTime::Delta::FromSeconds(15));
+  EXPECT_TRUE(Bbr2ModeIsOneOf({Bbr2Mode::PROBE_BW, Bbr2Mode::PROBE_RTT}));
+  QUIC_LOG(INFO) << "Bandwidth increasing at time " << SimulatedNow();
+
+  // This is much farther off when aggregation is present,
+  // Ideally BSAO or another option would fix this.
+  EXPECT_APPROX_EQ(params.test_link.bandwidth,
+                   sender_->ExportDebugState().bandwidth_est, 0.45f);
+  EXPECT_LE(sender_loss_rate_in_packets(), 0.30);
+
+  // Now increase the bottleneck bandwidth from 100Kbps to 10Mbps.
+  params.test_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(10000);
+  TestLink()->set_bandwidth(params.test_link.bandwidth);
+
+  bool simulator_result = simulator_.RunUntilOrTimeout(
+      [this]() { return sender_endpoint_.bytes_to_transfer() == 0; },
+      QuicTime::Delta::FromSeconds(50));
+  EXPECT_TRUE(simulator_result);
+  // Ensure at least 30% of the full bandwidth is observed.
+  EXPECT_APPROX_EQ(params.test_link.bandwidth,
+                   sender_->ExportDebugState().bandwidth_hi, 0.82f);
+}
+
+// Test Bbr2's reaction to a 100x bandwidth increase during a transfer with BB2U
+// and BBHI in the presence of ACK aggregation.
+TEST_F(Bbr2DefaultTopologyTest,
+       QUIC_SLOW_TEST(BandwidthIncreaseBB2UandBBHIAggregation)) {
+  SetQuicReloadableFlag(quic_bbr2_probe_two_rounds, true);
+  SetConnectionOption(kBB2U);
+  SetQuicReloadableFlag(quic_bbr2_simplify_inflight_hi, true);
+  SetConnectionOption(kBBHI);
+  DefaultTopologyParams params;
+  params.local_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(15000);
+  params.test_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(100);
+  CreateNetwork(params);
+
+  // 2 RTTs of aggregation, with a max of 10kb.
+  EnableAggregation(10 * 1024, 2 * params.RTT());
+
+  // Reduce the payload to 5MB because 10MB takes too long.
+  sender_endpoint_.AddBytesToTransfer(5 * 1024 * 1024);
+
+  simulator_.RunFor(QuicTime::Delta::FromSeconds(15));
+  EXPECT_TRUE(Bbr2ModeIsOneOf({Bbr2Mode::PROBE_BW, Bbr2Mode::PROBE_RTT}));
+  QUIC_LOG(INFO) << "Bandwidth increasing at time " << SimulatedNow();
+
+  // This is much farther off when aggregation is present,
+  // Ideally BSAO or another option would fix this.
+  EXPECT_APPROX_EQ(params.test_link.bandwidth,
+                   sender_->ExportDebugState().bandwidth_est, 0.45f);
+  EXPECT_LE(sender_loss_rate_in_packets(), 0.30);
+
+  // Now increase the bottleneck bandwidth from 100Kbps to 10Mbps.
+  params.test_link.bandwidth = QuicBandwidth::FromKBitsPerSecond(10000);
+  TestLink()->set_bandwidth(params.test_link.bandwidth);
+
+  bool simulator_result = simulator_.RunUntilOrTimeout(
+      [this]() { return sender_endpoint_.bytes_to_transfer() == 0; },
+      QuicTime::Delta::FromSeconds(50));
+  EXPECT_TRUE(simulator_result);
+  // Ensure at least 20% of the full bandwidth is observed.
+  EXPECT_APPROX_EQ(params.test_link.bandwidth,
+                   sender_->ExportDebugState().bandwidth_hi, 0.82f);
+}
+
 // Test the number of losses incurred by the startup phase in a situation when
 // the buffer is less than BDP.
 TEST_F(Bbr2DefaultTopologyTest, PacketLossOnSmallBufferStartup) {
diff --git a/quiche/quic/core/congestion_control/bbr2_startup.cc b/quiche/quic/core/congestion_control/bbr2_startup.cc
index 8f9bd0e..3c84f51 100644
--- a/quiche/quic/core/congestion_control/bbr2_startup.cc
+++ b/quiche/quic/core/congestion_control/bbr2_startup.cc
@@ -53,8 +53,10 @@
     return Bbr2Mode::STARTUP;
   }
   bool has_bandwidth_growth = model_->HasBandwidthGrowth(congestion_event);
-  if (Params().exit_startup_on_persistent_queue && !has_bandwidth_growth) {
-    model_->CheckPersistentQueue(congestion_event, Params().startup_cwnd_gain);
+  if (Params().max_startup_queue_rounds > 0 && !has_bandwidth_growth) {
+    // 1.75 is less than the 2x CWND gain, but substantially more than 1.25x,
+    // the minimum bandwidth increase expected during STARTUP.
+    model_->CheckPersistentQueue(congestion_event, 1.75);
   }
   // TCP BBR always exits upon excessive losses. QUIC BBRv1 does not exit
   // upon excessive losses, if enough bandwidth growth is observed or if the
diff --git a/quiche/quic/core/crypto/crypto_protocol.h b/quiche/quic/core/crypto/crypto_protocol.h
index bc9556d..e0a725c 100644
--- a/quiche/quic/core/crypto/crypto_protocol.h
+++ b/quiche/quic/core/crypto/crypto_protocol.h
@@ -178,6 +178,12 @@
 const QuicTag kB206 = TAG('B', '2', '0', '6');   // Exit STARTUP after 2 losses.
 const QuicTag kB207 = TAG('B', '2', '0', '7');   // Exit STARTUP on persistent
                                                  // queue
+const QuicTag kBB2U = TAG('B', 'B', '2', 'U');   // Exit PROBE_UP on
+                                                 // min_bytes_in_flight for two
+                                                 // rounds in a row.
+const QuicTag kBB2S = TAG('B', 'B', '2', 'S');   // Exit STARTUP on
+                                                 // min_bytes_in_flight for two
+                                                 // rounds in a row.
 const QuicTag kNTLP = TAG('N', 'T', 'L', 'P');   // No tail loss probe
 const QuicTag k1TLP = TAG('1', 'T', 'L', 'P');   // 1 tail loss probe
 const QuicTag k1RTO = TAG('1', 'R', 'T', 'O');   // Send 1 packet upon RTO
diff --git a/quiche/quic/core/quic_flags_list.h b/quiche/quic/core/quic_flags_list.h
index 5fcb551..f3af359 100644
--- a/quiche/quic/core/quic_flags_list.h
+++ b/quiche/quic/core/quic_flags_list.h
@@ -89,6 +89,8 @@
 QUIC_FLAG(quic_reloadable_flag_quic_default_to_bbr, false)
 // When true, support draft-ietf-quic-v2-01
 QUIC_FLAG(quic_reloadable_flag_quic_enable_version_2_draft_01, false)
+// When true, the BB2U copt causes BBR2 to wait two rounds with out draining the queue before exiting PROBE_UP and BB2S has the same effect in STARTUP.
+QUIC_FLAG(quic_reloadable_flag_quic_bbr2_probe_two_rounds, true)
 // When true, the BBHI copt causes QUIC BBRv2 to use a simpler algorithm for raising inflight_hi in PROBE_UP.
 QUIC_FLAG(quic_reloadable_flag_quic_bbr2_simplify_inflight_hi, true)
 // When true, the BBR4 copt sets the extra_acked window to 20 RTTs and BBR5 sets it to 40 RTTs.