Skip to content

Commit

Permalink
Merge branch 'mptcp-fastclose'
Browse files Browse the repository at this point in the history
Mat Martineau says:

====================
mptcp: Fastclose edge cases and error handling

MPTCP has existing code to use the MP_FASTCLOSE option header, which
works like a RST for the MPTCP-level connection (regular RSTs only
affect specific subflows in MPTCP). This series has some improvements
for fastclose.

Patch 1 aligns fastclose socket error handling with TCP RST behavior on
TCP sockets.

Patch 2 adds use of MP_FASTCLOSE in some more edge cases, like file
descriptor close, FIN_WAIT timeout, and when the socket has unread data.

Patch 3 updates the fastclose self tests.

Patch 4 does not change any code, just fixes some outdated comments.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 3, 2022
2 parents 7171e8a + d89e3ed commit 197060c
Show file tree
Hide file tree
Showing 3 changed files with 217 additions and 62 deletions.
124 changes: 87 additions & 37 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,19 +662,19 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,

skb = skb_peek(&ssk->sk_receive_queue);
if (!skb) {
/* if no data is found, a racing workqueue/recvmsg
* already processed the new data, stop here or we
* can enter an infinite loop
/* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
* a different CPU can have already processed the pending
* data, stop here or we can enter an infinite loop
*/
if (!moved)
done = true;
break;
}

if (__mptcp_check_fallback(msk)) {
/* if we are running under the workqueue, TCP could have
* collapsed skbs between dummy map creation and now
* be sure to adjust the size
/* Under fallback skbs have no MPTCP extension and TCP could
* collapse them between the dummy map creation and the
* current dequeue. Be sure to adjust the map size.
*/
map_remaining = skb->len;
subflow->map_data_len = skb->len;
Expand Down Expand Up @@ -1707,7 +1707,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
} else if (ret) {
release_sock(ssk);
goto out;
goto do_error;
}
release_sock(ssk);
}
Expand All @@ -1717,9 +1717,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
ret = sk_stream_wait_connect(sk, &timeo);
if (ret)
goto out;
goto do_error;
}

ret = -EPIPE;
if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)))
goto do_error;

pfrag = sk_page_frag(sk);

while (msg_data_left(msg)) {
Expand All @@ -1728,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
bool dfrag_collapsed;
size_t psize, offset;

if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
ret = -EPIPE;
goto out;
}

/* reuse tail pfrag, if possible, or carve a new one from the
* page allocator
*/
Expand Down Expand Up @@ -1764,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
ret = -EFAULT;
goto out;
goto do_error;
}

/* data successfully copied into the write queue */
Expand Down Expand Up @@ -1796,15 +1795,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
__mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
goto do_error;
}

if (copied)
__mptcp_push_pending(sk, msg->msg_flags);

out:
release_sock(sk);
return copied ? : ret;
return copied;

do_error:
if (copied)
goto out;

copied = sk_stream_error(sk, msg->msg_flags, ret);
goto out;
}

static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
Expand Down Expand Up @@ -2307,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,

lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);

if (flags & MPTCP_CF_FASTCLOSE)
if (flags & MPTCP_CF_FASTCLOSE) {
/* be sure to force the tcp_disconnect() path,
* to generate the egress reset
*/
ssk->sk_lingertime = 0;
sock_set_flag(ssk, SOCK_LINGER);
subflow->send_fastclose = 1;
}

need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
Expand Down Expand Up @@ -2441,12 +2453,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
unlock_sock_fast(tcp_sk, slow);
}

/* Mirror the tcp_reset() error propagation */
switch (sk->sk_state) {
case TCP_SYN_SENT:
sk->sk_err = ECONNREFUSED;
break;
case TCP_CLOSE_WAIT:
sk->sk_err = EPIPE;
break;
case TCP_CLOSE:
return;
default:
sk->sk_err = ECONNRESET;
}

inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_shutdown = SHUTDOWN_MASK;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);

mptcp_close_wake_up(sk);
/* the calling mptcp_worker will properly destroy the socket */
if (sock_flag(sk, SOCK_DEAD))
return;

sk->sk_state_change(sk);
sk_error_report(sk);
}

static void __mptcp_retrans(struct sock *sk)
Expand Down Expand Up @@ -2552,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_reset_timeout(msk, 0);
}

static void mptcp_do_fastclose(struct sock *sk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);

mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
}

static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
Expand Down Expand Up @@ -2580,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work)
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD) &&
(mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) {
inet_sk_state_store(sk, TCP_CLOSE);
__mptcp_destroy_sock(sk);
goto unlock;
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_check_close_timeout(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
goto unlock;
}
}

if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
Expand Down Expand Up @@ -2825,6 +2870,18 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}

static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
* always show at least one non-empty queue when checked in this order.
*/
if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
skb_queue_empty_lockless(&msk->receive_queue))
return 0;

return EPOLLIN | EPOLLRDNORM;
}

bool __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
Expand All @@ -2838,8 +2895,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}

if (mptcp_close_state(sk))
if (mptcp_check_readable(msk)) {
/* the msk has read data, do the MPTCP equivalent of TCP reset */
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
} else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
}

sk_stream_wait_close(sk, timeout);

Expand Down Expand Up @@ -3656,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
return err;
}

static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
* always show at least one non-empty queue when checked in this order.
*/
if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
skb_queue_empty_lockless(&msk->receive_queue))
return 0;

return EPOLLIN | EPOLLRDNORM;
}

static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
Expand Down Expand Up @@ -3718,7 +3768,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;

/* This barrier is coupled with smp_wmb() in tcp_reset() */
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
if (sk->sk_err)
mask |= EPOLLERR;
Expand Down
Loading

0 comments on commit 197060c

Please sign in to comment.