Skip to content

Commit

Permalink
Merge branch 'mptcp-C-flag-and-fixes'
Browse files Browse the repository at this point in the history
Mat Martineau says:

====================
mptcp: Connection-time 'C' flag and two fixes

Here are six more patches from the MPTCP tree.

Most of them add support for the 'C' flag in the MPTCP connection-time
option headers. This flag affects how the initial address and port are
treated by each peer. Normally one peer may send MP_JOIN requests to the
remote address and port that were used when initiating the MPTCP
connection. The 'C' bit indicates that MP_JOINs should only be sent to
remote addresses that have been advertised with ADD_ADDR.

The other two patches are unrelated improvements.

Patches 1-4: Add the 'C' flag feature, a sysctl to optionally enable it,
and a selftest.

Patch 5: Adjust rp_filter settings in a selftest.

Patch 6: Improve rbuf cleanup for MPTCP sockets.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 22, 2021
2 parents a432c77 + fde56ee commit 38f7592
Show file tree
Hide file tree
Showing 11 changed files with 157 additions and 41 deletions.
13 changes: 13 additions & 0 deletions Documentation/networking/mptcp-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,16 @@ checksum_enabled - BOOLEAN
per-namespace sysctl.

Default: 0

allow_join_initial_addr_port - BOOLEAN
Allow peers to send join requests to the IP address and port number used
by the initial subflow if the value is 1. This controls a flag that is
sent to the peer at connection time, and whether such join requests are
accepted or denied.

Joins to addresses advertised with ADD_ADDR are not affected by this
value.

This is a per-namespace sysctl.

Default: 1
3 changes: 2 additions & 1 deletion include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ struct mptcp_out_options {
u8 backup;
u8 reset_reason:4,
reset_transient:1,
csum_reqd:1;
csum_reqd:1,
allow_join_id0:1;
u32 nonce;
u64 thmac;
u32 token;
Expand Down
16 changes: 16 additions & 0 deletions net/mptcp/ctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
unsigned int add_addr_timeout;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
};

static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
Expand All @@ -46,11 +47,17 @@ int mptcp_is_checksum_enabled(struct net *net)
return mptcp_get_pernet(net)->checksum_enabled;
}

int mptcp_allow_join_id0(struct net *net)
{
return mptcp_get_pernet(net)->allow_join_initial_addr_port;
}

static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
}

#ifdef CONFIG_SYSCTL
Expand Down Expand Up @@ -80,6 +87,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{
.procname = "allow_join_initial_addr_port",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{}
};

Expand All @@ -98,6 +113,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port;

hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
Expand Down
13 changes: 13 additions & 0 deletions net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (flags & MPTCP_CAP_CHECKSUM_REQD)
mp_opt->csum_reqd = 1;

if (flags & MPTCP_CAP_DENY_JOIN_ID0)
mp_opt->deny_join_id0 = 1;

mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
Expand Down Expand Up @@ -360,6 +363,7 @@ void mptcp_get_options(const struct sock *sk,
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
mp_opt->deny_join_id0 = 0;

length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
Expand Down Expand Up @@ -402,6 +406,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
Expand Down Expand Up @@ -490,6 +495,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));

/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
Expand Down Expand Up @@ -827,6 +833,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
opts->csum_reqd = subflow_req->csum_reqd;
opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
Expand Down Expand Up @@ -905,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}

if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);

if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
Expand Down Expand Up @@ -1201,6 +1211,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (opts->csum_reqd)
flag |= MPTCP_CAP_CHECKSUM_REQD;

if (!opts->allow_join_id0)
flag |= MPTCP_CAP_DENY_JOIN_ID0;

*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
flag);
Expand Down
1 change: 1 addition & 0 deletions net/mptcp/pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.addr_signal, 0);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;

spin_lock_init(&msk->pm.lock);
Expand Down
3 changes: 2 additions & 1 deletion net/mptcp/pm_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)

/* check if should create a new subflow */
if (msk->pm.local_addr_used < local_addr_max &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows < subflows_max &&
!READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
struct mptcp_addr_info remote = { 0 };
Expand Down
56 changes: 23 additions & 33 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
}
}

static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
bool slow;
int ret;

slow = lock_sock_fast(ssk);
ret = tcp_can_send_ack(ssk);
if (ret)
if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
return ret;
}

static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
{
const struct inet_connection_sock *icsk = inet_csk(ssk);
bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
const struct tcp_sock *tp = tcp_sk(ssk);

return (ack_pending & ICSK_ACK_SCHED) &&
((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
(rx_empty && ack_pending &
(ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}

static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
bool cleanup;
int space = __mptcp_space(sk);
bool cleanup, rx_empty;

/* this is a simple superset of what tcp_cleanup_rbuf() implements
* so that we don't have to acquire the ssk socket lock most of the time
* to do actually nothing
*/
cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
if (!cleanup)
return;
cleanup = (space > 0) && (space >= (old_space << 1));
rx_empty = !atomic_read(&sk->sk_rmem_alloc);

/* if the hinted ssk is still active, try to use it */
if (likely(ack_hint)) {
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
return;
}
if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
mptcp_subflow_cleanup_rbuf(ssk);
}

/* otherwise pick the first active subflow */
mptcp_for_each_subflow(msk, subflow)
if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
return;
}

static bool mptcp_check_data_fin(struct sock *sk)
Expand Down Expand Up @@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
WRITE_ONCE(msk->ack_hint, ssk);

*bytes += moved;
return done;
Expand Down Expand Up @@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
tcp_cleanup_rbuf(ssk, moved);

if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
Expand All @@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
Expand Down Expand Up @@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;

if (ssk == msk->ack_hint)
msk->ack_hint = NULL;

if (ssk == msk->first)
msk->first = NULL;

Expand Down Expand Up @@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
msk->rmem_released = 0;
msk->tx_pending_data = 0;

msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
Expand Down
12 changes: 8 additions & 4 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,9 @@
#define MPTCP_VERSION_MASK (0x0F)
#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
#define MPTCP_CAP_EXTENSIBILITY BIT(6)
#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5)
#define MPTCP_CAP_HMAC_SHA256 BIT(0)
#define MPTCP_CAP_FLAG_MASK (0x3F)
#define MPTCP_CAP_FLAG_MASK (0x1F)

/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN BIT(4)
Expand Down Expand Up @@ -137,7 +138,8 @@ struct mptcp_options_received {
mp_prio : 1,
echo : 1,
csum_reqd : 1,
backup : 1;
backup : 1,
deny_join_id0 : 1;
u32 token;
u32 nonce;
u64 thmac;
Expand Down Expand Up @@ -192,6 +194,7 @@ struct mptcp_pm_data {
bool work_pending;
bool accept_addr;
bool accept_subflow;
bool remote_deny_join_id0;
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
Expand Down Expand Up @@ -240,7 +243,6 @@ struct mptcp_sock {
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
spinlock_t join_list_lock;
struct sock *ack_hint;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
Expand Down Expand Up @@ -350,7 +352,8 @@ struct mptcp_subflow_request_sock {
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
csum_reqd : 1;
csum_reqd : 1,
allow_join_id0 : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
Expand Down Expand Up @@ -540,6 +543,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
int mptcp_is_checksum_enabled(struct net *net);
int mptcp_allow_join_id0(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
Expand Down
3 changes: 3 additions & 0 deletions net/mptcp/subflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
Expand Down Expand Up @@ -407,6 +408,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)

if (mp_opt.csum_reqd)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
if (mp_opt.deny_join_id0)
WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
Expand Down
Loading

0 comments on commit 38f7592

Please sign in to comment.