summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-10-13 14:59:26 +0000
committerJakub Kicinski <kuba@kernel.org>2025-10-15 08:56:30 -0700
commit1c51450f1afff1e7419797720df3fbd9ccbf610c (patch)
tree0d4870a48bf8659f4cc741f42b47624fc49e7b5e /net/ipv4/tcp.c
parent6378e25ee1ca2ed687eee78eff7bd588d52a4e14 (diff)
tcp: better handle TCP_TX_DELAY on established flows
Some applications uses TCP_TX_DELAY socket option after TCP flow is established. Some metrics need to be updated, otherwise TCP might take time to adapt to the new (emulated) RTT. This patch adjusts tp->srtt_us, tp->rtt_min, icsk_rto and sk->sk_pacing_rate. This is best effort, and for instance icsk_rto is reset without taking backoff into account. Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://patch.msgid.link/20251013145926.833198-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c31
1 files changed, 27 insertions, 4 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8a18aeca7ab0..4d720aa09a4c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3583,9 +3583,12 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
EXPORT_IPV6_MOD(tcp_tx_delay_enabled);
-static void tcp_enable_tx_delay(void)
+static void tcp_enable_tx_delay(struct sock *sk, int val)
{
- if (!static_branch_unlikely(&tcp_tx_delay_enabled)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ s32 delta = (val - tp->tcp_tx_delay) << 3;
+
+ if (val && !static_branch_unlikely(&tcp_tx_delay_enabled)) {
static int __tcp_tx_delay_enabled = 0;
if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) {
@@ -3593,6 +3596,22 @@ static void tcp_enable_tx_delay(void)
pr_info("TCP_TX_DELAY enabled\n");
}
}
+ /* If we change tcp_tx_delay on a live flow, adjust tp->srtt_us,
+ * tp->rtt_min, icsk_rto and sk->sk_pacing_rate.
+ * This is best effort.
+ */
+ if (delta && sk->sk_state == TCP_ESTABLISHED) {
+ s64 srtt = (s64)tp->srtt_us + delta;
+
+ tp->srtt_us = clamp_t(s64, srtt, 1, ~0U);
+
+ /* Note: does not deal with non zero icsk_backoff */
+ tcp_set_rto(sk);
+
+ minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
+
+ tcp_update_pacing_rate(sk);
+ }
}
/* When set indicates to always queue non-full frames. Later the user clears
@@ -4119,8 +4138,12 @@ ao_parse:
tp->recvmsg_inq = val;
break;
case TCP_TX_DELAY:
- if (val)
- tcp_enable_tx_delay();
+ /* tp->srtt_us is u32, and is shifted by 3 */
+ if (val < 0 || val >= (1U << (31 - 3))) {
+ err = -EINVAL;
+ break;
+ }
+ tcp_enable_tx_delay(sk, val);
WRITE_ONCE(tp->tcp_tx_delay, val);
break;
default: