Skip to content

Commit

Permalink
Merge branch 'faster-soreuseport'
Browse files Browse the repository at this point in the history
Craig Gallek says:

====================
Faster SO_REUSEPORT

This series contains two optimizations for the SO_REUSEPORT feature:
Faster lookup when selecting a socket for an incoming packet and
the ability to select the socket from the group using a BPF program.

This series only includes the UDP path.  I plan to submit a follow-up
including the TCP path if the implementation in this series is
acceptable.

Changes in v4:
- pskb_may_pull is unnecessary with pskb_pull (per Alexei Starovoitov)

Changes in v3:
- skb_pull_inline -> pskb_pull (per Alexei Starovoitov)
- reuseport_attach* -> sk_reuseport_attach* and simple return statement
  syntax change (per Daniel Borkmann)

Changes in v2:
- Fix ARM build; remove unnecessary include.
- Handle case where protocol header is not in linear section (per
  Alexei Starovoitov).
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 5, 2016
2 parents ebb3cf4 + 3ca8e40 commit 6a5ef90
Show file tree
Hide file tree
Showing 29 changed files with 1,076 additions and 69 deletions.
3 changes: 3 additions & 0 deletions arch/alpha/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _UAPI_ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/avr32/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _UAPI__ASM_AVR32_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/frv/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,8 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _ASM_SOCKET_H */

3 changes: 3 additions & 0 deletions arch/ia64/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _ASM_IA64_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/m32r/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _ASM_M32R_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/mips/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _UAPI_ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/mn10300/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/parisc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,7 @@
#define SO_ATTACH_BPF 0x402B
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 0x402C
#define SO_ATTACH_REUSEPORT_EBPF 0x402D

#endif /* _UAPI_ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/powerpc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _ASM_POWERPC_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/s390/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/sparc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@
#define SO_ATTACH_BPF 0x0034
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 0x0035
#define SO_ATTACH_REUSEPORT_EBPF 0x0036

/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
Expand Down
3 changes: 3 additions & 0 deletions arch/xtensa/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* _XTENSA_SOCKET_H */
2 changes: 2 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp);

int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
int sk_detach_filter(struct sock *sk);
int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
unsigned int len);
Expand Down
3 changes: 2 additions & 1 deletion include/net/addrconf.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2);
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);

Expand Down
2 changes: 2 additions & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ struct cg_proto;
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv: callback to process the backlog
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
* @sk_reuseport_cb: reuseport group container
*/
struct sock {
/*
Expand Down Expand Up @@ -453,6 +454,7 @@ struct sock {
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
};

#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
Expand Down
28 changes: 28 additions & 0 deletions include/net/sock_reuseport.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef _SOCK_REUSEPORT_H
#define _SOCK_REUSEPORT_H

#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/sock.h>

struct sock_reuseport {
struct rcu_head rcu;

u16 max_socks; /* length of socks */
u16 num_socks; /* elements in socks */
struct bpf_prog __rcu *prog; /* optional BPF sock selector */
struct sock *socks[0]; /* array of sock pointers */
};

extern int reuseport_alloc(struct sock *sk);
extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
extern void reuseport_detach_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
struct sk_buff *skb,
int hdr_len);
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
struct bpf_prog *prog);

#endif /* _SOCK_REUSEPORT_H */
7 changes: 4 additions & 3 deletions include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
}

int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*)(const struct sock *, const struct sock *),
int (*)(const struct sock *, const struct sock *, bool),
unsigned int hash2_nulladdr);

u32 udp_flow_hashrnd(void);
Expand Down Expand Up @@ -258,15 +258,16 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif);
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif,
struct udp_table *tbl);
struct udp_table *tbl, struct sk_buff *skb);
struct sock *udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif);
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *tbl);
int dif, struct udp_table *tbl,
struct sk_buff *skb);

/*
* SNMP statistics for UDP and UDP-Lite
Expand Down
3 changes: 3 additions & 0 deletions include/uapi/asm-generic/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,7 @@
#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52

#endif /* __ASM_GENERIC_SOCKET_H */
2 changes: 1 addition & 1 deletion net/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o

obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o

obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
Expand Down
121 changes: 99 additions & 22 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
#include <net/dst.h>
#include <net/sock_reuseport.h>

/**
* sk_filter - run a packet through a socket filter
Expand Down Expand Up @@ -1167,51 +1168,83 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return 0;
}

/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
{
struct bpf_prog *old_prog;
int err;

if (bpf_prog_size(prog->len) > sysctl_optmem_max)
return -ENOMEM;

if (sk_unhashed(sk)) {
err = reuseport_alloc(sk);
if (err)
return err;
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
/* The socket wasn't bound with SO_REUSEPORT */
return -EINVAL;
}

old_prog = reuseport_attach_prog(sk, prog);
if (old_prog)
bpf_prog_destroy(old_prog);

return 0;
}

static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
unsigned int fsize = bpf_classic_proglen(fprog);
unsigned int bpf_fsize = bpf_prog_size(fprog->len);
struct bpf_prog *prog;
int err;

if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
return ERR_PTR(-EPERM);

/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
return -EINVAL;
return ERR_PTR(-EINVAL);

prog = bpf_prog_alloc(bpf_fsize, 0);
if (!prog)
return -ENOMEM;
return ERR_PTR(-ENOMEM);

if (copy_from_user(prog->insns, fprog->filter, fsize)) {
__bpf_prog_free(prog);
return -EFAULT;
return ERR_PTR(-EFAULT);
}

prog->len = fprog->len;

err = bpf_prog_store_orig_filter(prog, fprog);
if (err) {
__bpf_prog_free(prog);
return -ENOMEM;
return ERR_PTR(-ENOMEM);
}

/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
prog = bpf_prepare_filter(prog, NULL);
return bpf_prepare_filter(prog, NULL);
}

/**
* sk_attach_filter - attach a socket filter
* @fprog: the filter program
* @sk: the socket to use
*
* Attach the user's filter code. We first run some sanity checks on
* it to make sure it does not explode on us later. If an error
* occurs or there is insufficient memory for the filter a negative
* errno code is returned. On success the return is zero.
*/
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog = __get_filter(fprog, sk);
int err;

if (IS_ERR(prog))
return PTR_ERR(prog);

Expand All @@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_attach_filter);

int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog;
struct bpf_prog *prog = __get_filter(fprog, sk);
int err;

if (IS_ERR(prog))
return PTR_ERR(prog);

err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
__bpf_prog_release(prog);
return err;
}

return 0;
}

static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog;

if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
return ERR_PTR(-EPERM);

prog = bpf_prog_get(ufd);
if (IS_ERR(prog))
return PTR_ERR(prog);
return prog;

if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
return -EINVAL;
return ERR_PTR(-EINVAL);
}

return prog;
}

int sk_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog = __get_bpf(ufd, sk);
int err;

if (IS_ERR(prog))
return PTR_ERR(prog);

err = __sk_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
Expand All @@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}

int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog = __get_bpf(ufd, sk);
int err;

if (IS_ERR(prog))
return PTR_ERR(prog);

err = __reuseport_attach_prog(prog, sk);
if (err < 0) {
bpf_prog_put(prog);
return err;
}

return 0;
}

#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
#define BPF_LDST_LEN 16U

Expand Down
Loading

0 comments on commit 6a5ef90

Please sign in to comment.