Skip to content

Commit

Permalink
Merge branch 'bpf_fanout'
Browse files Browse the repository at this point in the history
Willem de Bruijn says:

====================
packet: add cBPF and eBPF fanout modes

Allow programmable fanout modes. Support both classical BPF programs
passed directly and extended BPF programs passed by file descriptor.

One use case is packet steering by deep packet inspection, for
instance for packet steering by application layer header fields.

Separate the configuration of the fanout mode and the configuration
of the program, to allow dynamic updates to the latter at runtime.

Changes
  v1 -> v2:
    - follow SO_LOCK_FILTER semantics on filter updates
    - only accept eBPF programs of type BPF_PROG_TYPE_SOCKET_FILTER
    - rename PACKET_FANOUT_BPF to PACKET_FANOUT_CBPF to match
      man 2 bpf usage: "classic" vs. "extended" BPF.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 17, 2015
2 parents a1c234f + 30da679 commit 90eb7fa
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 14 deletions.
3 changes: 3 additions & 0 deletions include/uapi/linux/if_packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,16 @@ struct sockaddr_ll {
#define PACKET_TX_HAS_OFF 19
#define PACKET_QDISC_BYPASS 20
#define PACKET_ROLLOVER_STATS 21
#define PACKET_FANOUT_DATA 22

#define PACKET_FANOUT_HASH 0
#define PACKET_FANOUT_LB 1
#define PACKET_FANOUT_CPU 2
#define PACKET_FANOUT_ROLLOVER 3
#define PACKET_FANOUT_RND 4
#define PACKET_FANOUT_QM 5
#define PACKET_FANOUT_CBPF 6
#define PACKET_FANOUT_EBPF 7
#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000
#define PACKET_FANOUT_FLAG_DEFRAG 0x8000

Expand Down
130 changes: 129 additions & 1 deletion net/packet/af_packet.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
#include <linux/bpf.h>

#include "internal.h"

Expand Down Expand Up @@ -1410,6 +1411,22 @@ static unsigned int fanout_demux_qm(struct packet_fanout *f,
return skb_get_queue_mapping(skb) % num;
}

static unsigned int fanout_demux_bpf(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
struct bpf_prog *prog;
unsigned int ret = 0;

rcu_read_lock();
prog = rcu_dereference(f->bpf_prog);
if (prog)
ret = BPF_PROG_RUN(prog, skb) % num;
rcu_read_unlock();

return ret;
}

static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
{
return f->flags & (flag >> 8);
Expand Down Expand Up @@ -1454,6 +1471,10 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
case PACKET_FANOUT_ROLLOVER:
idx = fanout_demux_rollover(f, skb, 0, false, num);
break;
case PACKET_FANOUT_CBPF:
case PACKET_FANOUT_EBPF:
idx = fanout_demux_bpf(f, skb, num);
break;
}

if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
Expand Down Expand Up @@ -1502,6 +1523,103 @@ static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
return false;
}

static void fanout_init_data(struct packet_fanout *f)
{
switch (f->type) {
case PACKET_FANOUT_LB:
atomic_set(&f->rr_cur, 0);
break;
case PACKET_FANOUT_CBPF:
case PACKET_FANOUT_EBPF:
RCU_INIT_POINTER(f->bpf_prog, NULL);
break;
}
}

static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
{
struct bpf_prog *old;

spin_lock(&f->lock);
old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
rcu_assign_pointer(f->bpf_prog, new);
spin_unlock(&f->lock);

if (old) {
synchronize_net();
bpf_prog_destroy(old);
}
}

static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
unsigned int len)
{
struct bpf_prog *new;
struct sock_fprog fprog;
int ret;

if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
return -EPERM;
if (len != sizeof(fprog))
return -EINVAL;
if (copy_from_user(&fprog, data, len))
return -EFAULT;

ret = bpf_prog_create_from_user(&new, &fprog, NULL);
if (ret)
return ret;

__fanout_set_data_bpf(po->fanout, new);
return 0;
}

static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
unsigned int len)
{
struct bpf_prog *new;
u32 fd;

if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
return -EPERM;
if (len != sizeof(fd))
return -EINVAL;
if (copy_from_user(&fd, data, len))
return -EFAULT;

new = bpf_prog_get(fd);
if (IS_ERR(new))
return PTR_ERR(new);
if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(new);
return -EINVAL;
}

__fanout_set_data_bpf(po->fanout, new);
return 0;
}

static int fanout_set_data(struct packet_sock *po, char __user *data,
unsigned int len)
{
switch (po->fanout->type) {
case PACKET_FANOUT_CBPF:
return fanout_set_data_cbpf(po, data, len);
case PACKET_FANOUT_EBPF:
return fanout_set_data_ebpf(po, data, len);
default:
return -EINVAL;
};
}

static void fanout_release_data(struct packet_fanout *f)
{
switch (f->type) {
case PACKET_FANOUT_CBPF:
case PACKET_FANOUT_EBPF:
__fanout_set_data_bpf(f, NULL);
};
}

static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_sock *po = pkt_sk(sk);
Expand All @@ -1519,6 +1637,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
case PACKET_FANOUT_CPU:
case PACKET_FANOUT_RND:
case PACKET_FANOUT_QM:
case PACKET_FANOUT_CBPF:
case PACKET_FANOUT_EBPF:
break;
default:
return -EINVAL;
Expand Down Expand Up @@ -1561,10 +1681,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->id = id;
match->type = type;
match->flags = flags;
atomic_set(&match->rr_cur, 0);
INIT_LIST_HEAD(&match->list);
spin_lock_init(&match->lock);
atomic_set(&match->sk_ref, 0);
fanout_init_data(match);
match->prot_hook.type = po->prot_hook.type;
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
Expand Down Expand Up @@ -1610,6 +1730,7 @@ static void fanout_release(struct sock *sk)
if (atomic_dec_and_test(&f->sk_ref)) {
list_del(&f->list);
dev_remove_pack(&f->prot_hook);
fanout_release_data(f);
kfree(f);
}
mutex_unlock(&fanout_mutex);
Expand Down Expand Up @@ -3529,6 +3650,13 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv

return fanout_add(sk, val & 0xffff, val >> 16);
}
case PACKET_FANOUT_DATA:
{
if (!po->fanout)
return -EINVAL;

return fanout_set_data(po, optval, optlen);
}
case PACKET_TX_HAS_OFF:
{
unsigned int val;
Expand Down
5 changes: 4 additions & 1 deletion net/packet/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ struct packet_fanout {
u16 id;
u8 type;
u8 flags;
atomic_t rr_cur;
union {
atomic_t rr_cur;
struct bpf_prog __rcu *bpf_prog;
};
struct list_head list;
struct sock *arr[PACKET_FANOUT_MAX];
spinlock_t lock;
Expand Down
69 changes: 65 additions & 4 deletions tools/testing/selftests/net/psock_fanout.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
* - PACKET_FANOUT_LB
* - PACKET_FANOUT_CPU
* - PACKET_FANOUT_ROLLOVER
* - PACKET_FANOUT_CBPF
* - PACKET_FANOUT_EBPF
*
* Todo:
* - functionality: PACKET_FANOUT_FLAG_DEFRAG
Expand All @@ -44,7 +46,9 @@
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/unistd.h> /* for __NR_bpf */
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
Expand Down Expand Up @@ -91,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
return fd;
}

static void sock_fanout_set_ebpf(int fd)
{
const int len_off = __builtin_offsetof(struct __sk_buff, len);
struct bpf_insn prog[] = {
{ BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
{ BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 },
{ BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN },
{ BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 },
{ BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 },
{ BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR },
{ BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 },
{ BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
};
char log_buf[512];
union bpf_attr attr;
int pfd;

memset(&attr, 0, sizeof(attr));
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
attr.insns = (unsigned long) prog;
attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
attr.license = (unsigned long) "GPL";
attr.log_buf = (unsigned long) log_buf,
attr.log_size = sizeof(log_buf),
attr.log_level = 1,

pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
if (pfd < 0) {
perror("bpf");
fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
exit(1);
}

if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
perror("fanout data ebpf");
exit(1);
}

if (close(pfd)) {
perror("close ebpf");
exit(1);
}
}

static char *sock_fanout_open_ring(int fd)
{
struct tpacket_req req = {
Expand All @@ -115,8 +164,8 @@ static char *sock_fanout_open_ring(int fd)

ring = mmap(0, req.tp_block_size * req.tp_block_nr,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (!ring) {
fprintf(stderr, "packetsock ring mmap\n");
if (ring == MAP_FAILED) {
perror("packetsock ring mmap");
exit(1);
}

Expand Down Expand Up @@ -209,6 +258,7 @@ static int test_datapath(uint16_t typeflags, int port_off,
{
const int expect0[] = { 0, 0 };
char *rings[2];
uint8_t type = typeflags & 0xFF;
int fds[2], fds_udp[2][2], ret;

fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
Expand All @@ -219,6 +269,11 @@ static int test_datapath(uint16_t typeflags, int port_off,
fprintf(stderr, "ERROR: failed open\n");
exit(1);
}
if (type == PACKET_FANOUT_CBPF)
sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
else if (type == PACKET_FANOUT_EBPF)
sock_fanout_set_ebpf(fds[0]);

rings[0] = sock_fanout_open_ring(fds[0]);
rings[1] = sock_fanout_open_ring(fds[1]);
pair_udp_open(fds_udp[0], PORT_BASE);
Expand All @@ -227,11 +282,11 @@ static int test_datapath(uint16_t typeflags, int port_off,

/* Send data, but not enough to overflow a queue */
pair_udp_send(fds_udp[0], 15);
pair_udp_send(fds_udp[1], 5);
pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
ret = sock_fanout_read(fds, rings, expect1);

/* Send more data, overflow the queue */
pair_udp_send(fds_udp[0], 15);
pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
/* TODO: ensure consistent order between expect1 and expect2 */
ret |= sock_fanout_read(fds, rings, expect2);

Expand Down Expand Up @@ -275,6 +330,7 @@ int main(int argc, char **argv)
const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } };
const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
int port_off = 2, tries = 5, ret;

test_control_single();
Expand All @@ -296,6 +352,11 @@ int main(int argc, char **argv)
ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
port_off, expect_rb[0], expect_rb[1]);

ret |= test_datapath(PACKET_FANOUT_CBPF,
port_off, expect_bpf[0], expect_bpf[1]);
ret |= test_datapath(PACKET_FANOUT_EBPF,
port_off, expect_bpf[0], expect_bpf[1]);

set_cpuaffinity(0);
ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
expect_cpu0[0], expect_cpu0[1]);
Expand Down
Loading

0 comments on commit 90eb7fa

Please sign in to comment.