Skip to content

Commit

Permalink
Merge branch 'kernel_socket_netns'
Browse files Browse the repository at this point in the history
Eric W. Biederman says:

====================
Cleanup the kernel sockets.

Right now the situtation for allocating kernel sockets is a mess.
- sock_create_kern does not take a namespace parameter.
- kernel sockets must not reference count a network namespace and keep
  it alive or else we will have a reference counting loop.
- The way we avoid the reference counting loop with sk_change_net
  and sk_release_kernel are major hacks.

This patchset addresses this mess by fixing sock_create_kern to do
everything necessary to create a kernel socket.  None of the current
users of kernel sockets need the network namespace reference counted.
Either kernel sockets are network namespace aware (and using the current
hacks) or kernel sockets are limited to the initial network namespace
in which case it does not matter.

This patchset starts by addressing tun which should be using normal
userspace sockets like macvtap.

Then sock_create_kern is fixed to take a network namespace.
Then the in kernel status of sockets are passed through to sk_alloc.
Then sk_alloc is fixed to not reference count the network namespace
     of kernel sockets.
Then the callers of sock_create_kern are fixed up to stop using hacks.
Then netlink which uses it's own flavor of sock_create_kern is fixed.

Finally the hacks that are sk_change_net and sk_release_kernel are removed.

When it is all done the code is easier to follow, easier to use, easier
to maintain and shorter by about 70 lines.
====================

Reported-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 11, 2015
2 parents 80ba92f + affb979 commit 0198e09
Show file tree
Hide file tree
Showing 72 changed files with 166 additions and 238 deletions.
4 changes: 2 additions & 2 deletions crypto/af_alg.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
if (!type)
goto unlock;

sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto);
sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
err = -ENOMEM;
if (!sk2)
goto unlock;
Expand Down Expand Up @@ -324,7 +324,7 @@ static int alg_create(struct net *net, struct socket *sock, int protocol,
return -EPROTONOSUPPORT;

err = -ENOMEM;
sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto);
sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto, kern);
if (!sk)
goto out;

Expand Down
4 changes: 2 additions & 2 deletions drivers/block/drbd/drbd_receiver.c
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);

what = "sock_create_kern";
err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (err < 0) {
sock = NULL;
Expand Down Expand Up @@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce
memcpy(&my_addr, &connection->my_addr, my_addr_len);

what = "sock_create_kern";
err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
SOCK_STREAM, IPPROTO_TCP, &s_listen);
if (err) {
s_listen = NULL;
Expand Down
12 changes: 6 additions & 6 deletions drivers/isdn/mISDN/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,14 @@ static const struct proto_ops data_sock_ops = {
};

static int
data_sock_create(struct net *net, struct socket *sock, int protocol)
data_sock_create(struct net *net, struct socket *sock, int protocol, int kern)
{
struct sock *sk;

if (sock->type != SOCK_DGRAM)
return -ESOCKTNOSUPPORT;

sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto);
sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern);
if (!sk)
return -ENOMEM;

Expand Down Expand Up @@ -756,14 +756,14 @@ static const struct proto_ops base_sock_ops = {


static int
base_sock_create(struct net *net, struct socket *sock, int protocol)
base_sock_create(struct net *net, struct socket *sock, int protocol, int kern)
{
struct sock *sk;

if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;

sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto);
sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern);
if (!sk)
return -ENOMEM;

Expand All @@ -785,7 +785,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern)

switch (proto) {
case ISDN_P_BASE:
err = base_sock_create(net, sock, proto);
err = base_sock_create(net, sock, proto, kern);
break;
case ISDN_P_TE_S0:
case ISDN_P_NT_S0:
Expand All @@ -799,7 +799,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern)
case ISDN_P_B_L2DTMF:
case ISDN_P_B_L2DSP:
case ISDN_P_B_L2DSPHDLC:
err = data_sock_create(net, sock, proto);
err = data_sock_create(net, sock, proto, kern);
break;
default:
return err;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/macvtap.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ static int macvtap_open(struct inode *inode, struct file *file)

err = -ENOMEM;
q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
&macvtap_proto);
&macvtap_proto, 0);
if (!q)
goto out;

Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ppp/pppoe.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,11 +546,11 @@ static struct proto pppoe_sk_proto __read_mostly = {
* Initialize a new struct sock.
*
**********************************************************************/
static int pppoe_create(struct net *net, struct socket *sock)
static int pppoe_create(struct net *net, struct socket *sock, int kern)
{
struct sock *sk;

sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto);
sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern);
if (!sk)
return -ENOMEM;

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ppp/pppox.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol,
!try_module_get(pppox_protos[protocol]->owner))
goto out;

rc = pppox_protos[protocol]->create(net, sock);
rc = pppox_protos[protocol]->create(net, sock, kern);

module_put(pppox_protos[protocol]->owner);
out:
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ppp/pptp.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,14 +561,14 @@ static void pptp_sock_destruct(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue);
}

static int pptp_create(struct net *net, struct socket *sock)
static int pptp_create(struct net *net, struct socket *sock, int kern)
{
int error = -ENOMEM;
struct sock *sk;
struct pppox_sock *po;
struct pptp_opt *opt;

sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto);
sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto, kern);
if (!sk)
goto out;

Expand Down
26 changes: 5 additions & 21 deletions drivers/net/tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ struct tun_file {
struct socket socket;
struct socket_wq wq;
struct tun_struct __rcu *tun;
struct net *net;
struct fasync_struct *fasync;
/* only used for fasnyc */
unsigned int flags;
Expand Down Expand Up @@ -493,10 +492,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}

BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
&tfile->socket.flags));
sk_release_kernel(&tfile->sk);
sock_put(&tfile->sk);
}
}

Expand Down Expand Up @@ -1492,18 +1488,10 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
return ret;
}

static int tun_release(struct socket *sock)
{
if (sock->sk)
sock_put(sock->sk);
return 0;
}

/* Ops structure to mimic raw sockets with tun */
static const struct proto_ops tun_socket_ops = {
.sendmsg = tun_sendmsg,
.recvmsg = tun_recvmsg,
.release = tun_release,
};

static struct proto tun_proto = {
Expand Down Expand Up @@ -1865,7 +1853,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
if (cmd == TUNSETIFF && !tun) {
ifr.ifr_name[IFNAMSIZ-1] = '\0';

ret = tun_set_iff(tfile->net, file, &ifr);
ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);

if (ret)
goto unlock;
Expand Down Expand Up @@ -2154,16 +2142,16 @@ static int tun_chr_fasync(int fd, struct file *file, int on)

static int tun_chr_open(struct inode *inode, struct file * file)
{
struct net *net = current->nsproxy->net_ns;
struct tun_file *tfile;

DBG1(KERN_INFO, "tunX: tun_chr_open\n");

tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL,
&tun_proto);
tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
&tun_proto, 0);
if (!tfile)
return -ENOMEM;
RCU_INIT_POINTER(tfile->tun, NULL);
tfile->net = get_net(current->nsproxy->net_ns);
tfile->flags = 0;
tfile->ifindex = 0;

Expand All @@ -2174,13 +2162,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
tfile->socket.ops = &tun_socket_ops;

sock_init_data(&tfile->socket, &tfile->sk);
sk_change_net(&tfile->sk, tfile->net);

tfile->sk.sk_write_space = tun_sock_write_space;
tfile->sk.sk_sndbuf = INT_MAX;

file->private_data = tfile;
set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags);
INIT_LIST_HEAD(&tfile->next);

sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
Expand All @@ -2191,10 +2177,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
static int tun_chr_close(struct inode *inode, struct file *file)
{
struct tun_file *tfile = file->private_data;
struct net *net = tfile->net;

tun_detach(tfile, true);
put_net(net);

return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion fs/afs/rxrpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ int afs_open_socket(void)
return -ENOMEM;
}

ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
if (ret < 0) {
destroy_workqueue(afs_async_calls);
_leave(" = %d [socket]", ret);
Expand Down
16 changes: 8 additions & 8 deletions fs/dlm/lowcomms.c
Original file line number Diff line number Diff line change
Expand Up @@ -921,8 +921,8 @@ static int tcp_accept_from_sock(struct connection *con)
mutex_unlock(&connections_lock);

memset(&peeraddr, 0, sizeof(peeraddr));
result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
IPPROTO_TCP, &newsock);
result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
SOCK_STREAM, IPPROTO_TCP, &newsock);
if (result < 0)
return -ENOMEM;

Expand Down Expand Up @@ -1173,8 +1173,8 @@ static void tcp_connect_to_sock(struct connection *con)
goto out;

/* Create a socket to communicate with */
result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
IPPROTO_TCP, &sock);
result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (result < 0)
goto out_err;

Expand Down Expand Up @@ -1258,8 +1258,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
addr_len = sizeof(struct sockaddr_in6);

/* Create a socket to communicate with */
result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
IPPROTO_TCP, &sock);
result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (result < 0) {
log_print("Can't create listening comms socket");
goto create_out;
Expand Down Expand Up @@ -1365,8 +1365,8 @@ static int sctp_listen_for_all(void)

log_print("Using SCTP for communications");

result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
IPPROTO_SCTP, &sock);
result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
SOCK_SEQPACKET, IPPROTO_SCTP, &sock);
if (result < 0) {
log_print("Can't create comms socket, check SCTP is loaded");
goto out;
Expand Down
2 changes: 1 addition & 1 deletion include/linux/if_pppox.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po)
struct module;

struct pppox_proto {
int (*create)(struct net *net, struct socket *sock);
int (*create)(struct net *net, struct socket *sock, int kern);
int (*ioctl)(struct socket *sock, unsigned int cmd,
unsigned long arg);
struct module *owner;
Expand Down
3 changes: 1 addition & 2 deletions include/linux/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ struct net;
#define SOCK_NOSPACE 2
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
#define SOCK_EXTERNALLY_ALLOCATED 5

#ifndef ARCH_HAS_SOCKET_TYPES
/**
Expand Down Expand Up @@ -208,7 +207,7 @@ void sock_unregister(int family);
int __sock_create(struct net *net, int family, int type, int proto,
struct socket **res, int kern);
int sock_create(int family, int type, int proto, struct socket **res);
int sock_create_kern(int family, int type, int proto, struct socket **res);
int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
int sock_create_lite(int family, int type, int proto, struct socket **res);
void sock_release(struct socket *sock);
int sock_sendmsg(struct socket *sock, struct msghdr *msg);
Expand Down
2 changes: 1 addition & 1 deletion include/net/af_vsock.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work);
struct sock *__vsock_create(struct net *net,
struct socket *sock,
struct sock *parent,
gfp_t priority, unsigned short type);
gfp_t priority, unsigned short type, int kern);

/**** TRANSPORT ****/

Expand Down
2 changes: 1 addition & 1 deletion include/net/inet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,

static inline void inet_ctl_sock_destroy(struct sock *sk)
{
sk_release_kernel(sk);
sock_release(sk->sk_socket);
}

#endif
2 changes: 1 addition & 1 deletion include/net/llc_conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb)
}

struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot);
struct proto *prot, int kern);
void llc_sk_free(struct sock *sk);

void llc_sk_reset(struct sock *sk);
Expand Down
21 changes: 3 additions & 18 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ struct sock_common {
unsigned char skc_reuse:4;
unsigned char skc_reuseport:1;
unsigned char skc_ipv6only:1;
unsigned char skc_net_refcnt:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
Expand Down Expand Up @@ -323,6 +324,7 @@ struct sock {
#define sk_reuse __sk_common.skc_reuse
#define sk_reuseport __sk_common.skc_reuseport
#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_net_refcnt __sk_common.skc_net_refcnt
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
Expand Down Expand Up @@ -1514,9 +1516,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)


struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot);
struct proto *prot, int kern);
void sk_free(struct sock *sk);
void sk_release_kernel(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);

struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
Expand Down Expand Up @@ -2192,22 +2193,6 @@ void sock_net_set(struct sock *sk, struct net *net)
write_pnet(&sk->sk_net, net);
}

/*
* Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace.
* They should not hold a reference to a namespace in order to allow
* to stop it.
* Sockets after sk_change_net should be released using sk_release_kernel
*/
static inline void sk_change_net(struct sock *sk, struct net *net)
{
struct net *current_net = sock_net(sk);

if (!net_eq(current_net, net)) {
put_net(current_net);
sock_net_set(sk, net);
}
}

static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {
Expand Down
2 changes: 1 addition & 1 deletion net/appletalk/ddp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
goto out;
rc = -ENOMEM;
sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto);
sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
if (!sk)
goto out;
rc = 0;
Expand Down
Loading

0 comments on commit 0198e09

Please sign in to comment.