Skip to content

Commit

Permalink
Merge branch 'mptcp-fixes-for-6-3'
Browse files Browse the repository at this point in the history
Matthieu Baerts says:

====================
mptcp: fixes for 6.3

Patch 1 fixes a possible deadlock in subflow_error_report() reported by
lockdep. The report was in fact a false positive but the modification
makes sense and silences lockdep to allow syzkaller to find real issues.
The regression has been introduced in v5.12.

Patch 2 is a refactoring needed to be able to fix the two next issues.
It improves the situation and can be backported up to v6.0.

Patches 3 and 4 fix UaF reported by KASAN. It fixes issues potentially
visible since v5.7 and v5.19 but only reproducible until recently
(v6.0). These two patches depend on patch 2/7.

Patch 5 fixes the order of the printed values: expected vs seen values.
The regression has been introduced recently: v6.3-rc1.

Patch 6 adds missing ro_after_init flags. A previous patch added them
for other functions but these two have been missed. This previous patch
has been backported to stable versions (up to v5.12) so probably better
to do the same here.

Patch 7 fixes tcp_set_state() being called twice in a row since v5.10.

Patch 8 fixes another lockdep false positive issue but this time in
MPTCP PM code. Same here, some modifications in the code has been made
to silence this issue and help finding real ones later. This issue can
be seen since v6.2.

v1: https://lore.kernel.org/r/20230227-upstream-net-20230227-mptcp-fixes-v1-0-070e30ae4a8e@tessares.net
====================

Link: https://lore.kernel.org/r/20230227-upstream-net-20230227-mptcp-fixes-v2-0-47c2e95eada9@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Mar 11, 2023
2 parents 12508b3 + cee4034 commit dee85ac
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 121 deletions.
16 changes: 16 additions & 0 deletions net/mptcp/pm_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -997,9 +997,13 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
return ret;
}

static struct lock_class_key mptcp_slock_keys[2];
static struct lock_class_key mptcp_keys[2];

static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
struct mptcp_pm_addr_entry *entry)
{
bool is_ipv6 = sk->sk_family == AF_INET6;
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
struct socket *ssock;
Expand All @@ -1016,6 +1020,18 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (!newsk)
return -EINVAL;

/* The subflow socket lock is acquired in a nested to the msk one
* in several places, even by the TCP stack, and this msk is a kernel
* socket: lockdep complains. Instead of propagating the _nested
* modifiers in several places, re-init the lock class for the msk
* socket to an mptcp specific one.
*/
sock_lock_init_class_and_name(newsk,
is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
&mptcp_slock_keys[is_ipv6],
is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
&mptcp_keys[is_ipv6]);

lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
release_sock(newsk);
Expand Down
64 changes: 32 additions & 32 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);

mptcp_propagate_sndbuf((struct sock *)msk, ssk);
mptcp_sockopt_sync_locked(msk, ssk);
return true;
}
Expand Down Expand Up @@ -2343,23 +2342,32 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}

sock_orphan(ssk);
subflow->disposable = 1;

/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
* the ssk has been already destroyed, we just need to release the
* reference owned by msk;
*/
if (!inet_csk(ssk)->icsk_ulp_ops) {
WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
} else if (msk->in_accept_queue && msk->first == ssk) {
/* if the first subflow moved to a close state, e.g. due to
* incoming reset and we reach here before inet_child_forget()
* the TCP stack could later try to close it via
* inet_csk_listen_stop(), or deliver it to the user space via
* accept().
* We can't delete the subflow - or risk a double free - nor let
* the msk survive - or will be leaked in the non accept scenario:
* fallback and let TCP cope with the subflow cleanup.
*/
WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
mptcp_subflow_drop_ctx(ssk);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
if (ssk->sk_state == TCP_LISTEN)
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}

__tcp_close(ssk, 0);

/* close acquired an extra ref */
Expand Down Expand Up @@ -2399,9 +2407,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}

static void __mptcp_close_subflow(struct mptcp_sock *msk)
static void __mptcp_close_subflow(struct sock *sk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);

might_sleep();

Expand All @@ -2415,7 +2424,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;

mptcp_close_ssk((struct sock *)msk, ssk, subflow);
mptcp_close_ssk(sk, ssk, subflow);
}

/* if the MPC subflow has been closed before the msk is accepted,
* msk will never be accept-ed, close it now
*/
if (!msk->first && msk->in_accept_queue) {
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
}
}

Expand Down Expand Up @@ -2624,6 +2641,9 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_check_send_data_fin(sk);
mptcp_check_data_fin(sk);

if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk);

/* There is no point in keeping around an orphaned sk timedout or
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
Expand All @@ -2639,9 +2659,6 @@ static void mptcp_worker(struct work_struct *work)
}
}

if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);

if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);

Expand Down Expand Up @@ -3079,6 +3096,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
Expand All @@ -3096,8 +3114,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
security_inet_csk_clone(nsk, req);
bh_unlock_sock(nsk);

/* keep a single reference */
__sock_put(nsk);
/* note: the newly allocated socket refcount is 2 now */
return nsk;
}

Expand Down Expand Up @@ -3153,8 +3170,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
goto out;
}

/* acquire the 2nd reference for the owning socket */
sock_hold(new_mptcp_sock);
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
Expand Down Expand Up @@ -3705,25 +3720,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct sock *newsk = newsock->sk;

set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
msk->in_accept_queue = 0;

lock_sock(newsk);

/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
* we can notify it, if needed.
*
* Even if remote has reset the initial subflow by now
* the refcnt is still at least one.
*/
subflow = mptcp_subflow_ctx(msk->first);
list_add(&subflow->node, &msk->conn_list);
sock_hold(msk->first);
if (mptcp_is_fully_established(newsk))
mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);

mptcp_rcv_space_init(msk, msk->first);
mptcp_propagate_sndbuf(newsk, msk->first);

/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
Expand Down
6 changes: 4 additions & 2 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
fastopening:1;
fastopening:1,
in_accept_queue:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
Expand Down Expand Up @@ -628,7 +629,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
Expand Down Expand Up @@ -666,6 +666,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);

bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);

void mptcp_subflow_drop_ctx(struct sock *ssk);

static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
Expand Down
Loading

0 comments on commit dee85ac

Please sign in to comment.