Skip to content

Commit

Permalink
Merge branch 'mptcp-another-set-of-miscellaneous-mptcp-fixes'
Browse files Browse the repository at this point in the history
Mat Martineau says:

====================
mptcp: Another set of miscellaneous MPTCP fixes

This is another collection of MPTCP fixes and enhancements that we have
tested in the MPTCP tree:

Patch 1 cleans up cgroup attachment for in-kernel subflow sockets.

Patches 2 and 3 make sure that deletion of advertised addresses by an
MPTCP path manager when flushing all addresses behaves similarly to the
remove-single-address operation, and adds related tests.

Patches 4 and 8 do some minor cleanup.

Patches 5-7 add MPTCP_FASTCLOSE functionality. Note that patch 6 adds MPTCP
option parsing to tcp_reset().

Patch 9 optimizes skb size for outgoing MPTCP packets.
====================

Link: https://lore.kernel.org/r/20201210222506.222251-1-mathew.j.martineau@linux.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Dec 15, 2020
2 parents efc36d3 + 15e6ca9 commit ebf3228
Show file tree
Hide file tree
Showing 9 changed files with 155 additions and 37 deletions.
2 changes: 1 addition & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
/* tcp_input.c */
void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk);
void tcp_reset(struct sock *sk, struct sk_buff *skb);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);

Expand Down
13 changes: 8 additions & 5 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -4218,10 +4218,13 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
}

/* When we get a reset we do this. */
void tcp_reset(struct sock *sk)
void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
trace_tcp_receive_reset(sk);

if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);

/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
Expand Down Expand Up @@ -5604,7 +5607,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
} else if (tcp_reset_check(sk, skb)) {
tcp_reset(sk);
tcp_reset(sk, skb);
}
goto discard;
}
Expand Down Expand Up @@ -5640,7 +5643,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}

if (rst_seq_match)
tcp_reset(sk);
tcp_reset(sk, skb);
else {
/* Disable TFO if RST is out-of-order
* and no data has been received
Expand Down Expand Up @@ -6077,7 +6080,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
*/

if (th->rst) {
tcp_reset(sk);
tcp_reset(sk, skb);
goto discard;
}

Expand Down Expand Up @@ -6519,7 +6522,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
tcp_reset(sk, skb);
return 1;
}
}
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
req->rsk_ops->send_reset(sk, skb);
} else if (fastopen) { /* received a valid RST pkt */
reqsk_fastopen_remove(sk, req, true);
tcp_reset(sk);
tcp_reset(sk, skb);
}
if (!fastopen) {
inet_csk_reqsk_queue_drop(sk, req);
Expand Down
17 changes: 17 additions & 0 deletions net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
break;

case MPTCPOPT_MP_FASTCLOSE:
if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
break;

ptr += 2;
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
mp_opt->fastclose = 1;
break;

default:
break;
}
Expand All @@ -299,6 +309,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->mp_join = 0;
mp_opt->add_addr = 0;
mp_opt->ahmac = 0;
mp_opt->fastclose = 0;
mp_opt->port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
Expand Down Expand Up @@ -942,6 +953,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;

if (mp_opt.fastclose &&
msk->local_key == mp_opt.rcvr_key) {
WRITE_ONCE(msk->rcv_fastclose, true);
mptcp_schedule_work((struct sock *)msk);
}

if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
struct mptcp_addr_info addr;

Expand Down
21 changes: 12 additions & 9 deletions net/mptcp/pm_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
struct mptcp_pm_addr_entry *entry, *ret = NULL;

rcu_read_lock();
spin_lock_bh(&msk->join_list_lock);
__mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;
Expand All @@ -144,13 +144,11 @@ select_local_address(const struct pm_nl_pernet *pernet,
* pending join
*/
if (entry->addr.family == ((struct sock *)msk)->sk_family &&
!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
!lookup_subflow_by_saddr(&msk->join_list, &entry->addr)) {
!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
ret = entry;
break;
}
}
spin_unlock_bh(&msk->join_list_lock);
rcu_read_unlock();
return ret;
}
Expand Down Expand Up @@ -867,13 +865,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
return ret;
}

static void __flush_addrs(struct pm_nl_pernet *pernet)
static void __flush_addrs(struct net *net, struct list_head *list)
{
while (!list_empty(&pernet->local_addr_list)) {
while (!list_empty(list)) {
struct mptcp_pm_addr_entry *cur;

cur = list_entry(pernet->local_addr_list.next,
cur = list_entry(list->next,
struct mptcp_pm_addr_entry, list);
mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
list_del_rcu(&cur->list);
kfree_rcu(cur, rcu);
}
Expand All @@ -890,11 +889,13 @@ static void __reset_counters(struct pm_nl_pernet *pernet)
static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
{
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
LIST_HEAD(free_list);

spin_lock_bh(&pernet->lock);
__flush_addrs(pernet);
list_splice_init(&pernet->local_addr_list, &free_list);
__reset_counters(pernet);
spin_unlock_bh(&pernet->lock);
__flush_addrs(sock_net(skb->sk), &free_list);
return 0;
}

Expand Down Expand Up @@ -1156,10 +1157,12 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct net *net;

list_for_each_entry(net, net_list, exit_list) {
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);

/* net is removed from namespace list, can't race with
* other modifiers
*/
__flush_addrs(net_generic(net, pm_nl_pernet_id));
__flush_addrs(net, &pernet->local_addr_list);
}
}

Expand Down
47 changes: 42 additions & 5 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1256,6 +1256,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_ext *mpext = NULL;
struct sk_buff *skb, *tail;
bool can_collapse = false;
int size_bias = 0;
int avail_size;
size_t ret = 0;

Expand All @@ -1277,10 +1278,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
can_collapse = (info->size_goal - skb->len > 0) &&
mptcp_skb_can_collapse_to(data_seq, skb, mpext);
if (!can_collapse)
if (!can_collapse) {
TCP_SKB_CB(skb)->eor = 1;
else
} else {
size_bias = skb->len;
avail_size = info->size_goal - skb->len;
}
}

/* Zero window and all data acked? Probe. */
Expand All @@ -1300,8 +1303,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
return 0;

ret = info->limit - info->sent;
tail = tcp_build_frag(ssk, avail_size, info->flags, dfrag->page,
dfrag->offset + info->sent, &ret);
tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags,
dfrag->page, dfrag->offset + info->sent, &ret);
if (!tail) {
tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
return -ENOMEM;
Expand All @@ -1310,8 +1313,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
/* if the tail skb is still the cached one, collapsing really happened.
*/
if (skb == tail) {
WARN_ON_ONCE(!can_collapse);
TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += ret;
WARN_ON_ONCE(!can_collapse);
WARN_ON_ONCE(zero_window_probe);
goto out;
}
Expand Down Expand Up @@ -2217,6 +2221,36 @@ static bool mptcp_check_close_timeout(const struct sock *sk)
return true;
}

static void mptcp_check_fastclose(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = &msk->sk.icsk_inet.sk;

if (likely(!READ_ONCE(msk->rcv_fastclose)))
return;

mptcp_token_destroy(msk);

list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);

lock_sock(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
release_sock(tcp_sk);
}

inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_shutdown = SHUTDOWN_MASK;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_DATA_READY, &msk->flags);
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);

mptcp_close_wake_up(sk);
}

static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
Expand All @@ -2233,6 +2267,9 @@ static void mptcp_worker(struct work_struct *work)

mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk);

mptcp_check_fastclose(msk);

if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);

Expand Down
6 changes: 5 additions & 1 deletion net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#define OPTION_MPTCP_ADD_ADDR BIT(6)
#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
#define OPTION_MPTCP_RM_ADDR BIT(8)
#define OPTION_MPTCP_FASTCLOSE BIT(9)

/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
Expand Down Expand Up @@ -58,6 +59,7 @@
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24
#define TCPOLEN_MPTCP_PORT_LEN 4
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
#define TCPOLEN_MPTCP_FASTCLOSE 12

/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
Expand Down Expand Up @@ -110,6 +112,7 @@ struct mptcp_options_received {
u16 data_len;
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
dss : 1,
add_addr : 1,
rm_addr : 1,
Expand All @@ -119,7 +122,7 @@ struct mptcp_options_received {
u32 token;
u32 nonce;
u64 thmac;
u8 hmac[20];
u8 hmac[MPTCPOPT_HMAC_LEN];
u8 join_id;
u8 use_map:1,
dsn64:1,
Expand Down Expand Up @@ -237,6 +240,7 @@ struct mptcp_sock {
bool fully_established;
bool rcv_data_fin;
bool snd_data_fin_enable;
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
spinlock_t join_list_lock;
struct sock *ack_hint;
Expand Down
34 changes: 33 additions & 1 deletion net/mptcp/subflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,17 @@ void mptcp_subflow_reset(struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;

/* must hold: tcp_done() could drop last reference on parent */
sock_hold(sk);

tcp_set_state(ssk, TCP_CLOSE);
tcp_send_active_reset(ssk, GFP_ATOMIC);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
return; /* worker will put sk for us */

sock_put(sk);
}

static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
Expand Down Expand Up @@ -1167,6 +1172,30 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
return err;
}

static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
{
#ifdef CONFIG_SOCK_CGROUP_DATA
struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data,
*child_skcd = &child->sk_cgrp_data;

/* only the additional subflows created by kworkers have to be modified */
if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
cgroup_id(sock_cgroup_ptr(child_skcd))) {
#ifdef CONFIG_MEMCG
struct mem_cgroup *memcg = parent->sk_memcg;

mem_cgroup_sk_free(child);
if (memcg && css_tryget(&memcg->css))
child->sk_memcg = memcg;
#endif /* CONFIG_MEMCG */

cgroup_sk_free(child_skcd);
*child_skcd = *parent_skcd;
cgroup_sk_clone(child_skcd);
}
#endif /* CONFIG_SOCK_CGROUP_DATA */
}

int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
Expand All @@ -1187,6 +1216,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)

lock_sock(sf->sk);

/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);

/* kernel sockets do not by default acquire net ref, but TCP timer
* needs it.
*/
Expand Down
Loading

0 comments on commit ebf3228

Please sign in to comment.