summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-10-16 10:53:13 -0700
committerJakub Kicinski <kuba@kernel.org>2025-10-31 06:46:03 -0700
commit1a2352ad82b515035efe563f997ef8f5ca4f8080 (patch)
tree5ab09f8bb295fef3b0ae901438132fbe757e9d95 /net/mptcp/protocol.c
parent0d0eb186421d0886ac466008235f6d9eedaf918e (diff)
parente5763491237ffee22d9b554febc2d00669f81dee (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.18-rc4). No conflicts, adjacent changes: drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ded9813d17d3 ("net: stmmac: Consider Tx VLAN offload tag length for maxSDU") 26ab9830beab ("net: stmmac: replace has_xxxx with core_type") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c83
1 files changed, 53 insertions, 30 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 94a5f6dcc577..d568575cdcb5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -194,17 +194,26 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
* - mptcp does not maintain a msk-level window clamp
* - returns true when the receive buffer is actually updated
*/
-static bool mptcp_rcvbuf_grow(struct sock *sk)
+static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval)
{
struct mptcp_sock *msk = mptcp_sk(sk);
const struct net *net = sock_net(sk);
- int rcvwin, rcvbuf, cap;
+ u32 rcvwin, rcvbuf, cap, oldval;
+ u64 grow;
+ oldval = msk->rcvq_space.space;
+ msk->rcvq_space.space = newval;
if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) ||
(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
return false;
- rcvwin = msk->rcvq_space.space << 1;
+ /* DRS is always one RTT late. */
+ rcvwin = newval << 1;
+
+ /* slow start: allow the sender to double its rate. */
+ grow = (u64)rcvwin * (newval - oldval);
+ do_div(grow, oldval);
+ rcvwin += grow << 1;
if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq;
@@ -334,7 +343,7 @@ end:
skb_set_owner_r(skb, sk);
/* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */
if (sk->sk_socket)
- mptcp_rcvbuf_grow(sk);
+ mptcp_rcvbuf_grow(sk, msk->rcvq_space.space);
}
static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset,
@@ -998,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk)
if (WARN_ON_ONCE(!msk->recovery))
break;
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ msk->first_pending = mptcp_send_next(sk);
}
dfrag_clear(sk, dfrag);
@@ -1291,7 +1300,12 @@ alloc_skb:
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
- if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
+ /* No need for zero probe if there are any data pending
+ * either at the msk or ssk level; skb is the current write
+ * queue tail and can be empty at this point.
+ */
+ if (snd_una != msk->snd_nxt || skb->len ||
+ skb != tcp_send_head(ssk)) {
tcp_remove_empty_skb(ssk);
return 0;
}
@@ -1342,6 +1356,7 @@ alloc_skb:
mpext->dsn64);
if (zero_window_probe) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
mpext->frozen = 1;
if (READ_ONCE(msk->csum_enabled))
@@ -1544,7 +1559,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
mptcp_update_post_push(msk, dfrag, ret);
}
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ msk->first_pending = mptcp_send_next(sk);
if (msk->snd_burst <= 0 ||
!sk_stream_memory_free(ssk) ||
@@ -1904,7 +1919,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
get_page(dfrag->page);
list_add_tail(&dfrag->list, &msk->rtx_queue);
if (!msk->first_pending)
- WRITE_ONCE(msk->first_pending, dfrag);
+ msk->first_pending = dfrag;
}
pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
@@ -1937,22 +1952,36 @@ do_error:
static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
-static int __mptcp_recvmsg_mskq(struct sock *sk,
- struct msghdr *msg,
- size_t len, int flags,
+static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags, int copied_total,
struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb, *tmp;
+ int total_data_len = 0;
int copied = 0;
skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
- u32 offset = MPTCP_SKB_CB(skb)->offset;
+ u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
u32 data_len = skb->len - offset;
- u32 count = min_t(size_t, len - copied, data_len);
+ u32 count;
int err;
+ if (flags & MSG_PEEK) {
+ /* skip already peeked skbs */
+ if (total_data_len + data_len <= copied_total) {
+ total_data_len += data_len;
+ continue;
+ }
+
+ /* skip the already peeked data in the current skb */
+ delta = copied_total - total_data_len;
+ offset += delta;
+ data_len -= delta;
+ }
+
+ count = min_t(size_t, len - copied, data_len);
if (!(flags & MSG_TRUNC)) {
err = skb_copy_datagram_msg(skb, offset, msg, count);
if (unlikely(err < 0)) {
@@ -1969,16 +1998,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
copied += count;
- if (count < data_len) {
- if (!(flags & MSG_PEEK)) {
+ if (!(flags & MSG_PEEK)) {
+ msk->bytes_consumed += count;
+ if (count < data_len) {
MPTCP_SKB_CB(skb)->offset += count;
MPTCP_SKB_CB(skb)->map_seq += count;
- msk->bytes_consumed += count;
+ break;
}
- break;
- }
- if (!(flags & MSG_PEEK)) {
/* avoid the indirect call, we know the destructor is sock_rfree */
skb->destructor = NULL;
skb->sk = NULL;
@@ -1986,7 +2013,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
sk_mem_uncharge(sk, skb->truesize);
__skb_unlink(skb, &sk->sk_receive_queue);
skb_attempt_defer_free(skb);
- msk->bytes_consumed += count;
}
if (copied >= len)
@@ -2050,9 +2076,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- msk->rcvq_space.space = msk->rcvq_space.copied;
- if (mptcp_rcvbuf_grow(sk)) {
-
+ if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) {
/* Make subflows follow along. If we do not do this, we
* get drops at subflow level if skbs can't be moved to
* the mptcp rx queue fast enough (announced rcv_win can
@@ -2064,8 +2088,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
ssk = mptcp_subflow_tcp_sock(subflow);
slow = lock_sock_fast(ssk);
- tcp_sk(ssk)->rcvq_space.space = msk->rcvq_space.copied;
- tcp_rcvbuf_grow(ssk);
+ /* subflows can be added before tcp_init_transfer() */
+ if (tcp_sk(ssk)->rcvq_space.space)
+ tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied);
unlock_sock_fast(ssk, slow);
}
}
@@ -2184,7 +2209,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int err, bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
+ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
+ copied, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2875,7 +2901,7 @@ static void __mptcp_clear_xmit(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- WRITE_ONCE(msk->first_pending, NULL);
+ msk->first_pending = NULL;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag);
}
@@ -3415,9 +3441,6 @@ void __mptcp_data_acked(struct sock *sk)
void __mptcp_check_push(struct sock *sk, struct sock *ssk)
{
- if (!mptcp_send_head(sk))
- return;
-
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk, false);
else