Skip to content

Commit

Permalink
Merge branch 'mptcp-rx-path-refactor'
Browse files Browse the repository at this point in the history
Matthieu Baerts says:

====================
mptcp: rx path refactor

Paolo worked on this RX path refactor for these two main reasons:

- Currently, the MPTCP RX path introduces quite a bit of 'exceptional'
  accounting/locking processing WRT to plain TCP, adding up to the
  implementation complexity in a miserable way.

- The performance gap WRT plain TCP for single subflow connections is
  quite measurable.

The present refactor addresses both the above items: most of the
additional complexity is dropped, and single stream performances
increase measurably, from 55Gbps to 71Gbps in Paolo's loopback test.
As a reference, plain TCP was around 84Gbps on the same host.

The above comes to a price: the patch are invasive, even in subtle ways.

Note: patch 5/7 removes the sk_forward_alloc_get() helper, which caused
some trivial modifications in different places in the net tree: sockets,
IPv4, sched. That's why a few more people have been Cc here. Feel free
to only look at this patch 5/7.
====================

Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-0-4a47d90d7998@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 20, 2025
2 parents 9a6c2b2 + e0ca405 commit 22af030
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 289 deletions.
13 changes: 0 additions & 13 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -1285,10 +1285,6 @@ struct proto {
unsigned int inuse_idx;
#endif

#if IS_ENABLED(CONFIG_MPTCP)
int (*forward_alloc_get)(const struct sock *sk);
#endif

bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
Expand Down Expand Up @@ -1349,15 +1345,6 @@ int sock_load_diag_module(int family, int protocol);

INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));

static inline int sk_forward_alloc_get(const struct sock *sk)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (sk->sk_prot->forward_alloc_get)
return sk->sk_prot->forward_alloc_get(sk);
#endif
return READ_ONCE(sk->sk_forward_alloc);
}

static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
Expand Down
2 changes: 1 addition & 1 deletion net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -3882,7 +3882,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc);
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc));
WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
WARN_ON_ONCE(sk->sk_wmem_queued);
WARN_ON_ONCE(sk_forward_alloc_get(sk));
WARN_ON_ONCE(sk->sk_forward_alloc);

kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/inet_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
.idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
.idiag_fmem = sk_forward_alloc_get(sk),
.idiag_fmem = READ_ONCE(sk->sk_forward_alloc),
.idiag_tmem = sk_wmem_alloc_get(sk),
};

Expand Down
27 changes: 4 additions & 23 deletions net/mptcp/fastopen.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,40 +40,21 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
tp->copied_seq += skb->len;
subflow->ssn_offset += skb->len;

/* initialize a dummy sequence number, we will update it at MPC
* completion, if needed
*/
/* Only the sequence delta is relevant */
MPTCP_SKB_CB(skb)->map_seq = -skb->len;
MPTCP_SKB_CB(skb)->end_seq = 0;
MPTCP_SKB_CB(skb)->offset = 0;
MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
MPTCP_SKB_CB(skb)->cant_coalesce = 1;

mptcp_data_lock(sk);
DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));

mptcp_set_owner_r(skb, sk);
skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
mptcp_sk(sk)->bytes_received += skb->len;

sk->sk_data_ready(sk);

mptcp_data_unlock(sk);
}

void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;

skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk,
MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
}

pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq);
}
Loading

0 comments on commit 22af030

Please sign in to comment.