Skip to content

Commit

Permalink
mptcp: rethink 'is writable' conditional
Browse files Browse the repository at this point in the history
Currently, when checking for the 'msk is writable' condition, we
look at the individual subflows write space.
That works well while we send data via a single subflow, but will
not as soon as we will enable concurrent xmit on multiple subflows.

With this change msk becomes writable when the following conditions
hold:
- the socket has some free write space
- there is at least a subflow with write free space

Additionally we need to set the NOSPACE bit on all subflows
before blocking.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Paolo Abeni authored and David S. Miller committed Sep 14, 2020
1 parent 26cdb8f commit 63561a4
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 27 deletions.
71 changes: 46 additions & 25 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ void mptcp_data_acked(struct sock *sk)
{
mptcp_reset_timer(sk);

if ((!sk_stream_is_writeable(sk) ||
if ((!test_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags) ||
(inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
Expand Down Expand Up @@ -567,6 +567,20 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
put_page(dfrag->page);
}

static bool mptcp_is_writeable(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;

if (!sk_stream_is_writeable((struct sock *)msk))
return false;

mptcp_for_each_subflow(msk, subflow) {
if (sk_stream_is_writeable(subflow->tcp_sock))
return true;
}
return false;
}

static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
Expand Down Expand Up @@ -609,8 +623,15 @@ static void mptcp_clean_una(struct sock *sk)
sk_mem_reclaim_partial(sk);

/* Only wake up writers if a subflow is ready */
if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
if (mptcp_is_writeable(msk)) {
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(sk)->flags);
smp_mb__after_atomic();

/* set SEND_SPACE before sk_stream_write_space clears
* NOSPACE
*/
sk_stream_write_space(sk);
}
}
}

Expand Down Expand Up @@ -801,34 +822,41 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
return ret;
}

static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock)
static void mptcp_nospace(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;

clear_bit(MPTCP_SEND_SPACE, &msk->flags);
smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */

/* enables sk->write_space() callbacks */
set_bit(SOCK_NOSPACE, &sock->flags);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct socket *sock = READ_ONCE(ssk->sk_socket);

/* enables ssk->write_space() callbacks */
if (sock)
set_bit(SOCK_NOSPACE, &sock->flags);
}
}

static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
struct sock *backup = NULL;
bool free;

sock_owned_by_me((const struct sock *)msk);
sock_owned_by_me(sk);

if (!mptcp_ext_cache_refill(msk))
return NULL;

mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

if (!sk_stream_memory_free(ssk)) {
struct socket *sock = ssk->sk_socket;

if (sock)
mptcp_nospace(msk, sock);

free = sk_stream_is_writeable(subflow->tcp_sock);
if (!free) {
mptcp_nospace(msk);
return NULL;
}

Expand All @@ -845,16 +873,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
return backup;
}

static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
static void ssk_check_wmem(struct mptcp_sock *msk)
{
struct socket *sock;

if (likely(sk_stream_is_writeable(ssk)))
return;

sock = READ_ONCE(ssk->sk_socket);
if (sock)
mptcp_nospace(msk, sock);
if (unlikely(!mptcp_is_writeable(msk)))
mptcp_nospace(msk);
}

static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
Expand Down Expand Up @@ -907,6 +929,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
mptcp_reset_timer(sk);
}

mptcp_nospace(msk);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
Expand Down Expand Up @@ -945,7 +968,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!sk_stream_memory_free(ssk) ||
!mptcp_page_frag_refill(ssk, pfrag) ||
!mptcp_ext_cache_refill(msk)) {
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
tcp_push(ssk, msg->msg_flags, mss_now,
tcp_sk(ssk)->nonagle, size_goal);
mptcp_set_timeout(sk, ssk);
Expand Down Expand Up @@ -993,9 +1015,9 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
mptcp_reset_timer(sk);
}

ssk_check_wmem(msk, ssk);
release_sock(ssk);
out:
ssk_check_wmem(msk);
release_sock(sk);
return copied ? : ret;
}
Expand Down Expand Up @@ -2291,8 +2313,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,

if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
if (sk_stream_is_writeable(sk) &&
test_bit(MPTCP_SEND_SPACE, &msk->flags))
if (test_bit(MPTCP_SEND_SPACE, &msk->flags))
mask |= EPOLLOUT | EPOLLWRNORM;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)
Expand Down
6 changes: 4 additions & 2 deletions net/mptcp/subflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -996,8 +996,10 @@ static void subflow_write_space(struct sock *sk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;

sk_stream_write_space(sk);
if (sk_stream_is_writeable(sk)) {
if (!sk_stream_is_writeable(sk))
return;

if (sk_stream_is_writeable(parent)) {
set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
smp_mb__after_atomic();
/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
Expand Down

0 comments on commit 63561a4

Please sign in to comment.