Skip to content

Commit

Permalink
Merge branch 'mptcp-remove-msk-subflow'
Browse files Browse the repository at this point in the history
Matthieu Baerts says:

====================
mptcp: get rid of msk->subflow

The MPTCP protocol maintains an additional struct socket per connection,
mainly to be able to easily use tcp-level struct socket operations.

This leads to several side effects, beyond the quite unfortunate /
confusing 'subflow' field name:

- active and passive sockets behaviour is inconsistent: only active ones
  have a not NULL msk->subflow, leading to different error handling and
  different error code returned to the user-space in several places.

- active sockets uses an unneeded, larger amount of memory

- passive sockets can't successfully go through accept(), disconnect(),
  accept() sequence, see [1] for more details.

The 13 first patches of this series are from Paolo and address all the
above, finally getting rid of the blamed field:

- The first patch is a minor clean-up.

- In the next 11 patches, msk->subflow usage is systematically removed
  from the MPTCP protocol, replacing it with direct msk->first usage,
  eventually introducing new core helpers when needed.

- The 13th patch finally disposes the field, and it's the only patch in
  the series intended to produce functional changes.

The last and 14th patch is from Kuniyuki and it is not linked to the
previous ones: it is a small clean-up to get rid of an unnecessary check
in mptcp_init_sock().

[1] https://github.com/multipath-tcp/mptcp_net-next/issues/290
====================

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
  • Loading branch information
David S. Miller committed Aug 14, 2023
2 parents f614a29 + e263691 commit afb0c19
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 177 deletions.
2 changes: 2 additions & 0 deletions include/net/inet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
int __inet_listen_sk(struct sock *sk, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
/* Don't allocate port at this moment, defer to connect. */
#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
/* Grab and release socket lock. */
Expand Down
1 change: 1 addition & 0 deletions include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,7 @@ void inet6_cleanup_sock(struct sock *sk);
void inet6_sock_destruct(struct sock *sk);
int inet6_release(struct socket *sock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
Expand Down
46 changes: 28 additions & 18 deletions net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,24 +187,13 @@ static int inet_autobind(struct sock *sk)
return 0;
}

/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
int __inet_listen_sk(struct sock *sk, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
unsigned char old_state = sk->sk_state;
int err, tcp_fastopen;

lock_sock(sk);

err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;

old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
return -EINVAL;

WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
Expand All @@ -227,10 +216,27 @@ int inet_listen(struct socket *sock, int backlog)

err = inet_csk_listen_start(sk);
if (err)
goto out;
return err;

tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
err = 0;
return 0;
}

/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
int err = -EINVAL;

lock_sock(sk);

if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;

err = __inet_listen_sk(sk, backlog);

out:
release_sock(sk);
Expand Down Expand Up @@ -431,9 +437,8 @@ int inet_release(struct socket *sock)
}
EXPORT_SYMBOL(inet_release);

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
int err;

Expand All @@ -454,6 +459,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

return __inet_bind(sk, uaddr, addr_len, flags);
}

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
return inet_bind_sk(sock->sk, uaddr, addr_len);
}
EXPORT_SYMBOL(inet_bind);

int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
Expand Down
10 changes: 7 additions & 3 deletions net/ipv6/af_inet6.c
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
goto out;
}

/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
const struct proto *prot;
int err = 0;
Expand All @@ -462,6 +460,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

return __inet6_bind(sk, uaddr, addr_len, flags);
}

/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
return inet6_bind_sk(sock->sk, uaddr, addr_len);
}
EXPORT_SYMBOL(inet6_bind);

int inet6_release(struct socket *sock)
Expand Down
30 changes: 17 additions & 13 deletions net/mptcp/pm_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/inet.h>
#include <linux/kernel.h>
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/netns/generic.h>
#include <net/mptcp.h>
#include <net/genetlink.h>
Expand Down Expand Up @@ -1005,8 +1006,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
bool is_ipv6 = sk->sk_family == AF_INET6;
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
struct socket *ssock;
struct sock *newsk;
struct sock *newsk, *ssk;
int backlog = 1024;
int err;

Expand All @@ -1032,28 +1032,32 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
&mptcp_keys[is_ipv6]);

lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
release_sock(newsk);
if (IS_ERR(ssock))
return PTR_ERR(ssock);
if (IS_ERR(ssk))
return PTR_ERR(ssk);

mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (entry->addr.family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
if (ssk->sk_family == AF_INET)
err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else if (ssk->sk_family == AF_INET6)
err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
#endif
if (err)
return err;

inet_sk_state_store(newsk, TCP_LISTEN);
err = kernel_listen(ssock, backlog);
if (err)
return err;

mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);

return 0;
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
if (!err)
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
release_sock(ssk);
return err;
}

int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
Expand Down
Loading

0 comments on commit afb0c19

Please sign in to comment.