Skip to content

Commit

Permalink
Merge branch 'mptcp-expose-more-info-and-small-improvements'
Browse files Browse the repository at this point in the history
Matthieu Baerts says:

====================
mptcp: expose more info and small improvements

Patch 1-3/9 track and expose some aggregated data counters at the MPTCP
level: the number of retransmissions and the bytes that have been
transferred. The first patch prepares the work by moving where snd_una
is updated for fallback sockets while the last patch adds some tests to
cover the new code.

Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO.
This new socket option allows to combine info from MPTCP_INFO,
MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be
needed to have all info in one because the path-manager can close and
re-create subflows between getsockopt() and fooling the accounting. The
first patch introduces a unique subflow ID to easily detect when
subflows are being re-created with the same 5-tuple while the last patch
adds some tests to cover the new code.

Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt")
can reveal a bug that were there for a bit of time, see [1]. A fix has
recently been fixed to netdev for the -net tree: "mptcp: ensure listener
is unhashed before updating the sk status", see [2]. There is no
conflicts between the two patches but it might be better to apply this
series after the one for -net and after having merged "net" into
"net-next".

Patch 7/9 is similar to commit 47867f0 ("selftests: mptcp: join:
skip check if MIB counter not supported") recently applied in the -net
tree but here it adapts the new code that is only in net-next (and it
fixes a merge conflict resolution which didn't have any impact).

Patch 8 and 9/9 are two simple refactoring. One to consolidate the
transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated
code. The other one reduces the scope of an argument passed to
mptcp_pm_alloc_anno_list() function.

Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1]
Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2]
====================

Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Jun 22, 2023
2 parents 5dfbbaa + 528cb5f commit 98e9587
Show file tree
Hide file tree
Showing 10 changed files with 356 additions and 46 deletions.
29 changes: 29 additions & 0 deletions include/uapi/linux/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ struct mptcp_info {
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
__u8 mptcpi_csum_enabled;
__u32 mptcpi_retransmits;
__u64 mptcpi_bytes_retrans;
__u64 mptcpi_bytes_sent;
__u64 mptcpi_bytes_received;
__u64 mptcpi_bytes_acked;
};

/*
Expand Down Expand Up @@ -244,9 +249,33 @@ struct mptcp_subflow_addrs {
};
};

struct mptcp_subflow_info {
__u32 id;
struct mptcp_subflow_addrs addrs;
};

struct mptcp_full_info {
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
__u32 size_tcpinfo_user;
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
__u32 size_sfinfo_user;
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
__u32 size_arrays_user; /* max subflows that userspace is interested in;
* the buffers at subflow_info/tcp_info
* are respectively at least:
* size_arrays * size_sfinfo_user
* size_arrays * size_tcpinfo_user
* bytes wide
*/
__aligned_u64 subflow_info;
__aligned_u64 tcp_info;
struct mptcp_info mptcp_info;
};

/* MPTCP socket options */
#define MPTCP_INFO 1
#define MPTCP_TCPINFO 2
#define MPTCP_SUBFLOW_ADDRS 3
#define MPTCP_FULL_INFO 4

#endif /* _UAPI_MPTCP_H */
14 changes: 13 additions & 1 deletion net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
return cur_seq;
}

static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
{
msk->bytes_acked += new_snd_una - msk->snd_una;
msk->snd_una = new_snd_una;
}

static void ack_update_msk(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_options_received *mp_opt)
Expand Down Expand Up @@ -1057,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
__mptcp_check_push(sk, ssk);

if (after64(new_snd_una, old_snd_una)) {
msk->snd_una = new_snd_una;
__mptcp_snd_una_update(msk, new_snd_una);
__mptcp_data_acked(sk);
}
mptcp_data_unlock(sk);
Expand Down Expand Up @@ -1119,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mptcp_data_lock(subflow->conn);
if (sk_stream_memory_free(sk))
__mptcp_check_push(subflow->conn, sk);

/* on fallback we just need to ignore the msk-level snd_una, as
* this is really plain TCP
*/
__mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));

__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
return true;
Expand Down
8 changes: 4 additions & 4 deletions net/mptcp/pm_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,15 +341,15 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
}

bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry)
const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);

lockdep_assert_held(&msk->pm.lock);

add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);

if (add_entry) {
if (mptcp_pm_is_kernel(msk))
Expand All @@ -366,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,

list_add(&add_entry->list, &msk->pm.anno_list);

add_entry->addr = entry->addr;
add_entry->addr = *addr;
add_entry->sock = msk;
add_entry->retrans_times = 0;

Expand Down Expand Up @@ -576,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
return;

if (local) {
if (mptcp_pm_alloc_anno_list(msk, local)) {
if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
Expand Down
2 changes: 1 addition & 1 deletion net/mptcp/pm_userspace.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
lock_sock((struct sock *)msk);
spin_lock_bh(&msk->pm.lock);

if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
Expand Down
31 changes: 19 additions & 12 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
subflow->subflow_id = msk->subflow_id++;

/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
Expand Down Expand Up @@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,

if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
msk->bytes_received += copy_len;
WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail && mptcp_try_coalesce(sk, tail, skb))
Expand Down Expand Up @@ -760,6 +762,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
MPTCP_SKB_CB(skb)->map_seq += delta;
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
msk->bytes_received += end_seq - msk->ack_seq;
msk->ack_seq = end_seq;
moved = true;
}
Expand Down Expand Up @@ -845,6 +848,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);

mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
return true;
Expand Down Expand Up @@ -1004,12 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk)
struct mptcp_data_frag *dtmp, *dfrag;
u64 snd_una;

/* on fallback we just need to ignore snd_una, as this is really
* plain TCP
*/
if (__mptcp_check_fallback(msk))
msk->snd_una = READ_ONCE(msk->snd_nxt);

snd_una = msk->snd_una;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
Expand Down Expand Up @@ -1537,8 +1535,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
* that has been handed to the subflow for transmission
* and skip update in case it was old dfrag.
*/
if (likely(after64(snd_nxt_new, msk->snd_nxt)))
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
msk->snd_nxt = snd_nxt_new;
}
}

void mptcp_check_and_set_pending(struct sock *sk)
Expand Down Expand Up @@ -2596,6 +2596,7 @@ static void __mptcp_retrans(struct sock *sk)
}
if (copied) {
dfrag->already_sent = max(dfrag->already_sent, info.sent);
msk->bytes_retrans += copied;
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
WRITE_ONCE(msk->allow_infinite_fallback, false);
Expand Down Expand Up @@ -2654,6 +2655,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);

inet_sk_state_store(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
Expand Down Expand Up @@ -2691,10 +2693,9 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
if (mptcp_should_close(sk))
mptcp_do_fastclose(sk);
}

if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
goto unlock;
Expand Down Expand Up @@ -2733,6 +2734,7 @@ static int __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
msk->subflow_id = 1;

mptcp_pm_data_init(msk);

Expand Down Expand Up @@ -2936,7 +2938,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
Expand Down Expand Up @@ -2978,7 +2979,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
timeout = 0;
} else if (mptcp_close_state(sk)) {
Expand Down Expand Up @@ -3108,6 +3108,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
msk->bytes_acked = 0;
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;

WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
Expand Down Expand Up @@ -3157,6 +3161,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;

/* passive msk is created after the first/MPC subflow */
msk->subflow_id = 2;

sock_reset_flag(nsk, SOCK_RCU_FREE);
security_inet_csk_clone(nsk, req);

Expand Down
11 changes: 9 additions & 2 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,13 @@ struct mptcp_sock {
u64 local_key;
u64 remote_key;
u64 write_seq;
u64 bytes_sent;
u64 snd_nxt;
u64 bytes_received;
u64 ack_seq;
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
u64 bytes_retrans;
int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
Expand All @@ -274,6 +277,7 @@ struct mptcp_sock {
* recovery related fields are under data_lock
* protection
*/
u64 bytes_acked;
u64 snd_una;
u64 wnd_end;
unsigned long timer_ival;
Expand Down Expand Up @@ -319,7 +323,8 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;

u32 setsockopt_seq;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
struct mptcp_sock *dl_next;
};
Expand Down Expand Up @@ -500,6 +505,8 @@ struct mptcp_subflow_context {
u8 reset_reason:4;
u8 stale_count;

u32 subflow_id;

long delegated_status;
unsigned long fail_tout;

Expand Down Expand Up @@ -810,7 +817,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *rem,
u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry);
const struct mptcp_addr_info *addr);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
Expand Down
Loading

0 comments on commit 98e9587

Please sign in to comment.