Skip to content

Commit

Permalink
mptcp: implement TCP_NOTSENT_LOWAT support
Browse files Browse the repository at this point in the history
Add support for such socket option storing the user-space provided
value in a new msk field, and using such data to implement the
_mptcp_stream_memory_free() helper, similar to the TCP one.

To avoid adding more indirect calls in the fast path, open-code
a variant of sk_stream_memory_free() in mptcp_sendmsg() and add
direct calls to the mptcp stream memory free helper where possible.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/464
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Paolo Abeni authored and David S. Miller committed Mar 4, 2024
1 parent a747626 commit 29b5e5e
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 6 deletions.
39 changes: 34 additions & 5 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy,
return 0;
}

/* open-code sk_stream_memory_free() plus sent limit computation to
* avoid indirect calls in fast-path.
* Called under the msk socket lock, so we can avoid a bunch of ONCE
* annotations.
*/
static u32 mptcp_send_limit(const struct sock *sk)
{
const struct mptcp_sock *msk = mptcp_sk(sk);
u32 limit, not_sent;

if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf))
return 0;

limit = mptcp_notsent_lowat(sk);
if (limit == UINT_MAX)
return UINT_MAX;

not_sent = msk->write_seq - msk->snd_nxt;
if (not_sent >= limit)
return 0;

return limit - not_sent;
}

static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
Expand Down Expand Up @@ -1806,16 +1830,19 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct mptcp_data_frag *dfrag;
bool dfrag_collapsed;
size_t psize, offset;
u32 copy_limit;

/* ensure fitting the notsent_lowat() constraint */
copy_limit = mptcp_send_limit(sk);
if (!copy_limit)
goto wait_for_memory;

/* reuse tail pfrag, if possible, or carve a new one from the
* page allocator
*/
dfrag = mptcp_pending_tail(sk);
dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
if (!dfrag_collapsed) {
if (!sk_stream_memory_free(sk))
goto wait_for_memory;

if (!mptcp_page_frag_refill(sk, pfrag))
goto wait_for_memory;

Expand All @@ -1830,6 +1857,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
offset = dfrag->offset + dfrag->data_len;
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
psize = min_t(size_t, psize, copy_limit);
total_ts = psize + frag_truesize;

if (!sk_wmem_schedule(sk, total_ts))
Expand Down Expand Up @@ -3760,6 +3788,7 @@ static struct proto mptcp_prot = {
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
.forward_alloc_get = mptcp_forward_alloc_get,
.stream_memory_free = mptcp_stream_memory_free,
.sockets_allocated = &mptcp_sockets_allocated,

.memory_allocated = &tcp_memory_allocated,
Expand Down Expand Up @@ -3933,12 +3962,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;

if (sk_stream_is_writeable(sk))
if (__mptcp_stream_is_writeable(sk, 1))
return EPOLLOUT | EPOLLWRNORM;

set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */
if (sk_stream_is_writeable(sk))
if (__mptcp_stream_is_writeable(sk, 1))
return EPOLLOUT | EPOLLWRNORM;

return 0;
Expand Down
28 changes: 27 additions & 1 deletion net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ struct mptcp_sock {
in_accept_queue:1,
free_first:1,
rcvspace_init:1;
u32 notsent_lowat;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
Expand Down Expand Up @@ -807,11 +808,36 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}

static inline u32 mptcp_notsent_lowat(const struct sock *sk)
{
struct net *net = sock_net(sk);
u32 val;

val = READ_ONCE(mptcp_sk(sk)->notsent_lowat);
return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}

static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake)
{
const struct mptcp_sock *msk = mptcp_sk(sk);
u32 notsent_bytes;

notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt);
return (notsent_bytes << wake) < mptcp_notsent_lowat(sk);
}

static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake)
{
return mptcp_stream_memory_free(sk, wake) &&
__sk_stream_is_writeable(sk, wake);
}

static inline void mptcp_write_space(struct sock *sk)
{
/* pairs with memory barrier in mptcp_poll */
smp_mb();
sk_stream_write_space(sk);
if (mptcp_stream_memory_free(sk, 1))
sk_stream_write_space(sk);
}

static inline void __mptcp_sync_sndbuf(struct sock *sk)
Expand Down
12 changes: 12 additions & 0 deletions net/mptcp/sockopt.c
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,16 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return 0;
case TCP_ULP:
return -EOPNOTSUPP;
case TCP_NOTSENT_LOWAT:
ret = mptcp_get_int_option(msk, optval, optlen, &val);
if (ret)
return ret;

lock_sock(sk);
WRITE_ONCE(msk->notsent_lowat, val);
mptcp_write_space(sk);
release_sock(sk);
return 0;
case TCP_CONGESTION:
return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
case TCP_CORK:
Expand Down Expand Up @@ -1345,6 +1355,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->cork);
case TCP_NODELAY:
return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
case TCP_NOTSENT_LOWAT:
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
}
return -EOPNOTSUPP;
}
Expand Down

0 comments on commit 29b5e5e

Please sign in to comment.