Skip to content

Commit

Permalink
Merge branch 'mptcp-socket-options'
Browse files Browse the repository at this point in the history
Mat Martineau says:

====================
mptcp: Improve socket option handling

MPTCP sockets have previously had limited socket option support. The
architecture of MPTCP sockets (one userspace-facing MPTCP socket that
manages one or more in-kernel TCP subflow sockets) adds complexity for
passing options through to lower levels. This patch set adds MPTCP
support for socket options commonly used with TCP.

Patch 1 reverts an interim socket option fix (a socket option blocklist)
that was merged in the net tree for v5.12.

Patch 2 moves the socket option code to a separate file, with no
functional changes.

Patch 3 adds an allowlist for socket options that are known to function
with MPTCP. Later patches in this set add more allowed options.

Patches 4 and 5 add infrastructure for syncing MPTCP-level options with
the TCP subflows.

Patches 6-12 add support for specific socket options.

Patch 13 adds a socket option self test.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 16, 2021
2 parents a1150a0 + dc65fe8 commit c133acf
Show file tree
Hide file tree
Showing 8 changed files with 1,122 additions and 177 deletions.
2 changes: 1 addition & 1 deletion net/mptcp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o

mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
mib.o pm_netlink.o
mib.o pm_netlink.o sockopt.o

obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
Expand Down
219 changes: 45 additions & 174 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,6 @@ static bool mptcp_is_tcpsk(struct sock *sk)
return false;
}

static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
sock_owned_by_me((const struct sock *)msk);

if (likely(!__mptcp_check_fallback(msk)))
return NULL;

return msk->first;
}

static int __mptcp_socket_create(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
Expand Down Expand Up @@ -740,18 +730,47 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk->sk_data_ready(sk);
}

void __mptcp_flush_join_list(struct mptcp_sock *msk)
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
bool ret = false;

if (likely(list_empty(&msk->join_list)))
return;
return false;

spin_lock_bh(&msk->join_list_lock);
list_for_each_entry(subflow, &msk->join_list, node)
list_for_each_entry(subflow, &msk->join_list, node) {
u32 sseq = READ_ONCE(subflow->setsockopt_seq);

mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
if (READ_ONCE(msk->setsockopt_seq) != sseq)
ret = true;
}
list_splice_tail_init(&msk->join_list, &msk->conn_list);
spin_unlock_bh(&msk->join_list_lock);

return ret;
}

void __mptcp_flush_join_list(struct mptcp_sock *msk)
{
if (likely(!mptcp_do_flush_join_list(msk)))
return;

if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
mptcp_schedule_work((struct sock *)msk);
}

static void mptcp_flush_join_list(struct mptcp_sock *msk)
{
bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);

might_sleep();

if (!mptcp_do_flush_join_list(msk) && !sync_needed)
return;

mptcp_sockopt_sync_all(msk);
}

static bool mptcp_timer_pending(struct sock *sk)
Expand Down Expand Up @@ -1467,7 +1486,7 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0;

prev_ssk = ssk;
__mptcp_flush_join_list(msk);
mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);

/* try to keep the subflow socket lock across
Expand Down Expand Up @@ -1893,7 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
unsigned int moved = 0;
bool ret, done;

__mptcp_flush_join_list(msk);
mptcp_flush_join_list(msk);
do {
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
bool slowpath;
Expand Down Expand Up @@ -2317,7 +2336,7 @@ static void mptcp_worker(struct work_struct *work)
goto unlock;

mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk);
mptcp_flush_join_list(msk);

mptcp_check_fastclose(msk);

Expand Down Expand Up @@ -2380,6 +2399,9 @@ static int __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);

tcp_assign_congestion_control(sk);

return 0;
}

Expand Down Expand Up @@ -2517,7 +2539,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
}
}

__mptcp_flush_join_list(msk);
mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);

Expand Down Expand Up @@ -2573,6 +2595,8 @@ static void __mptcp_destroy_sock(struct sock *sk)
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);

tcp_cleanup_congestion_control(sk);
sk_refcnt_debug_release(sk);
mptcp_dispose_initial_subflow(msk);
sock_put(sk);
Expand Down Expand Up @@ -2654,7 +2678,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);

__mptcp_flush_join_list(msk);
mptcp_do_flush_join_list(msk);

mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

Expand Down Expand Up @@ -2703,6 +2728,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;

if (mp_opt->mp_capable) {
msk->can_ack = true;
Expand Down Expand Up @@ -2811,161 +2837,6 @@ static void mptcp_destroy(struct sock *sk)
sk_sockets_allocated_dec(sk);
}

static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
int ret;

switch (optname) {
case SO_REUSEPORT:
case SO_REUSEADDR:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
release_sock(sk);
return -EINVAL;
}

ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
if (ret == 0) {
if (optname == SO_REUSEPORT)
sk->sk_reuseport = ssock->sk->sk_reuseport;
else if (optname == SO_REUSEADDR)
sk->sk_reuse = ssock->sk->sk_reuse;
}
release_sock(sk);
return ret;
}

return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
}

static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
int ret = -EOPNOTSUPP;
struct socket *ssock;

switch (optname) {
case IPV6_V6ONLY:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
release_sock(sk);
return -EINVAL;
}

ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
if (ret == 0)
sk->sk_ipv6only = ssock->sk->sk_ipv6only;

release_sock(sk);
break;
}

return ret;
}

static bool mptcp_unsupported(int level, int optname)
{
if (level == SOL_IP) {
switch (optname) {
case IP_ADD_MEMBERSHIP:
case IP_ADD_SOURCE_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
case IP_DROP_SOURCE_MEMBERSHIP:
case IP_BLOCK_SOURCE:
case IP_UNBLOCK_SOURCE:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_MSFILTER:
return true;
}
return false;
}
if (level == SOL_IPV6) {
switch (optname) {
case IPV6_ADDRFORM:
case IPV6_ADD_MEMBERSHIP:
case IPV6_DROP_MEMBERSHIP:
case IPV6_JOIN_ANYCAST:
case IPV6_LEAVE_ANYCAST:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
case MCAST_MSFILTER:
return true;
}
return false;
}
return false;
}

static int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;

pr_debug("msk=%p", msk);

if (mptcp_unsupported(level, optname))
return -ENOPROTOOPT;

if (level == SOL_SOCKET)
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);

/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
* is in TCP fallback, when TCP socket options are passed through
* to the one remaining subflow.
*/
lock_sock(sk);
ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
if (ssk)
return tcp_setsockopt(ssk, level, optname, optval, optlen);

if (level == SOL_IPV6)
return mptcp_setsockopt_v6(msk, optname, optval, optlen);

return -EOPNOTSUPP;
}

static int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;

pr_debug("msk=%p", msk);

/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
* is in TCP fallback, when socket options are passed through
* to the one remaining subflow.
*/
lock_sock(sk);
ssk = __mptcp_tcp_fallback(msk);
release_sock(sk);
if (ssk)
return tcp_getsockopt(ssk, level, optname, optval, option);

return -EOPNOTSUPP;
}

void __mptcp_data_acked(struct sock *sk)
{
if (!sock_owned_by_user(sk))
Expand Down Expand Up @@ -3375,7 +3246,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
__mptcp_flush_join_list(msk);
mptcp_flush_join_list(msk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

Expand Down
16 changes: 16 additions & 0 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@
#define MPTCP_CLEAN_UNA 7
#define MPTCP_ERROR_REPORT 8
#define MPTCP_RETRANSMIT 9
#define MPTCP_WORK_SYNC_SETSOCKOPT 10

static inline bool before64(__u64 seq1, __u64 seq2)
{
Expand Down Expand Up @@ -255,6 +256,8 @@ struct mptcp_sock {
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;

u32 setsockopt_seq;
};

#define mptcp_lock_sock(___sk, cb) do { \
Expand Down Expand Up @@ -413,6 +416,8 @@ struct mptcp_subflow_context {
long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */

u32 setsockopt_seq;

struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops;
Expand Down Expand Up @@ -571,6 +576,11 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
bool mptcp_schedule_work(struct sock *sk);
int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option);

void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
Expand Down Expand Up @@ -730,6 +740,12 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);

int mptcp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);

void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);

static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
{
return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
Expand Down
Loading

0 comments on commit c133acf

Please sign in to comment.