Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-02-28

The following pull-request contains BPF updates for your *net-next* tree.

We've added 41 non-merge commits during the last 7 day(s) which contain
a total of 49 files changed, 1383 insertions(+), 499 deletions(-).

The main changes are:

1) BPF and Real-Time nicely co-exist.

2) bpftool feature improvements.

3) retrieve bpf_sk_storage via INET_DIAG.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 29, 2020
2 parents 9a834f9 + 812285f commit 9f0ca0c
Show file tree
Hide file tree
Showing 49 changed files with 1,383 additions and 499 deletions.
56 changes: 34 additions & 22 deletions drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
struct page *page, unsigned int offset,
unsigned int len, unsigned int truesize,
bool hdr_valid)
bool hdr_valid, unsigned int metasize)
{
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
Expand All @@ -393,6 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);

/* hdr_valid means no XDP, so we can copy the vnet header */
if (hdr_valid)
memcpy(hdr, p, hdr_len);

Expand All @@ -405,6 +406,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
copy = skb_tailroom(skb);
skb_put_data(skb, p, copy);

if (metasize) {
__skb_pull(skb, metasize);
skb_metadata_set(skb, metasize);
}

len -= copy;
offset += copy;

Expand Down Expand Up @@ -450,10 +456,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct virtio_net_hdr_mrg_rxbuf *hdr;
int err;

/* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP;

if (unlikely(xdpf->headroom < vi->hdr_len))
return -EOVERFLOW;

Expand Down Expand Up @@ -644,6 +646,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
unsigned int delta = 0;
struct page *xdp_page;
int err;
unsigned int metasize = 0;

len -= vi->hdr_len;
stats->bytes += len;
Expand Down Expand Up @@ -683,8 +686,8 @@ static struct sk_buff *receive_small(struct net_device *dev,

xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
xdp.data = xdp.data_hard_start + xdp_headroom;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
Expand All @@ -695,6 +698,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
/* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
metasize = xdp.data - xdp.data_meta;
break;
case XDP_TX:
stats->xdp_tx++;
Expand Down Expand Up @@ -735,10 +739,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
}
skb_reserve(skb, headroom - delta);
skb_put(skb, len);
if (!delta) {
if (!xdp_prog) {
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
} /* keep zeroed vnet hdr since packet was changed by bpf */
} /* keep zeroed vnet hdr since XDP is loaded */

if (metasize)
skb_metadata_set(skb, metasize);

err:
return skb;
Expand All @@ -760,8 +767,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page = buf;
struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
PAGE_SIZE, true);
struct sk_buff *skb =
page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);

stats->bytes += len - vi->hdr_len;
if (unlikely(!skb))
Expand Down Expand Up @@ -793,6 +800,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
int err;
unsigned int metasize = 0;

head_skb = NULL;
stats->bytes += len - vi->hdr_len;
Expand Down Expand Up @@ -839,33 +847,36 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
data = page_address(xdp_page) + offset;
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
xdp.data = data + vi->hdr_len;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + (len - vi->hdr_len);
xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;

act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;

switch (act) {
case XDP_PASS:
metasize = xdp.data - xdp.data_meta;

/* recalculate offset to account for any header
* adjustments. Note other cases do not build an
* skb and avoid using offset
* adjustments and minus the metasize to copy the
* metadata in page_to_skb(). Note other cases do not
* build an skb and avoid using offset
*/
offset = xdp.data -
page_address(xdp_page) - vi->hdr_len;
offset = xdp.data - page_address(xdp_page) -
vi->hdr_len - metasize;

/* recalculate len if xdp.data or xdp.data_end were
* adjusted
/* recalculate len if xdp.data, xdp.data_end or
* xdp.data_meta were adjusted
*/
len = xdp.data_end - xdp.data + vi->hdr_len;
len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page,
offset, len,
PAGE_SIZE, false);
head_skb = page_to_skb(vi, rq, xdp_page, offset,
len, PAGE_SIZE, false,
metasize);
return head_skb;
}
break;
Expand Down Expand Up @@ -921,7 +932,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
goto err_skb;
}

head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
metasize);
curr_skb = head_skb;

if (unlikely(!curr_skb))
Expand Down
2 changes: 1 addition & 1 deletion include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map;

struct bpf_storage_buffer {
struct rcu_head rcu;
char data[0];
char data[];
};

struct bpf_cgroup_storage {
Expand Down
41 changes: 36 additions & 5 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ struct bpf_prog_array_item {

struct bpf_prog_array {
struct rcu_head rcu;
struct bpf_prog_array_item items[0];
struct bpf_prog_array_item items[];
};

struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
Expand All @@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
preempt_disable(); \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
Expand All @@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
} \
_out: \
rcu_read_unlock(); \
preempt_enable(); \
migrate_enable(); \
_ret; \
})

Expand Down Expand Up @@ -932,7 +932,7 @@ _out: \
u32 ret; \
u32 _ret = 1; \
u32 _cn = 0; \
preempt_disable(); \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
Expand All @@ -944,7 +944,7 @@ _out: \
_item++; \
} \
rcu_read_unlock(); \
preempt_enable(); \
migrate_enable(); \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
Expand All @@ -961,6 +961,36 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

/*
* Block execution of BPF programs attached to instrumentation (perf,
* kprobes, tracepoints) to prevent deadlocks on map operations as any of
* these events can happen inside a region which holds a map bucket lock
* and can deadlock on it.
*
* Use the preemption safe inc/dec variants on RT because migrate disable
* is preemptible on RT and preemption in the middle of the RMW operation
* might lead to inconsistent state. Use the raw variants for non RT
* kernels as migrate_disable() maps to preempt_disable() so the slightly
* more expensive save operation can be avoided.
*/
static inline void bpf_disable_instrumentation(void)
{
migrate_disable();
if (IS_ENABLED(CONFIG_PREEMPT_RT))
this_cpu_inc(bpf_prog_active);
else
__this_cpu_inc(bpf_prog_active);
}

static inline void bpf_enable_instrumentation(void)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
this_cpu_dec(bpf_prog_active);
else
__this_cpu_dec(bpf_prog_active);
migrate_enable();
}

extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;

Expand Down Expand Up @@ -993,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);

struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map);
Expand Down
37 changes: 29 additions & 8 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);

#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
u32 ret; \
cant_sleep(); \
cant_migrate(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
Expand All @@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
} \
ret; })

#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
bpf_dispatcher_nopfunc)
#define BPF_PROG_RUN(prog, ctx) \
__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)

/*
* Use in preemptible and therefore migratable context to make sure that
* the execution of the BPF program runs on one CPU.
*
* This uses migrate_disable/enable() explicitly to document that the
* invocation of a BPF program does not require reentrancy protection
* against a BPF program which is invoked from a preempting task.
*
* For non RT enabled kernels migrate_disable/enable() maps to
* preempt_disable/enable(), i.e. it disables also preemption.
*/
static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
const void *ctx)
{
u32 ret;

migrate_disable();
ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
migrate_enable();
return ret;
}

#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN

Expand Down Expand Up @@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
return qdisc_skb_cb(skb)->data;
}

/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
struct sk_buff *skb)
{
Expand All @@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
{
u32 res;

preempt_disable();
migrate_disable();
res = __bpf_prog_run_save_cb(prog, skb);
preempt_enable();
migrate_enable();
return res;
}

Expand All @@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
if (unlikely(prog->cb_access))
memset(cb_data, 0, BPF_SKB_CB_LEN);

preempt_disable();
res = BPF_PROG_RUN(prog, skb);
preempt_enable();
res = bpf_prog_run_pin_on_cpu(prog, skb);
return res;
}

Expand Down
27 changes: 16 additions & 11 deletions include/linux/inet_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ struct netlink_callback;
struct inet_diag_handler {
void (*dump)(struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
struct nlattr *bc);
const struct inet_diag_req_v2 *r);

int (*dump_one)(struct sk_buff *in_skb,
const struct nlmsghdr *nlh,
int (*dump_one)(struct netlink_callback *cb,
const struct inet_diag_req_v2 *req);

void (*idiag_get_info)(struct sock *sk,
Expand All @@ -40,18 +38,25 @@ struct inet_diag_handler {
__u16 idiag_info_size;
};

struct bpf_sk_storage_diag;
struct inet_diag_dump_data {
struct nlattr *req_nlas[__INET_DIAG_REQ_MAX];
#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE]
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]

struct bpf_sk_storage_diag *bpf_stg_diag;
};

struct inet_connection_sock;
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, const struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh, bool net_admin);
struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *req,
u16 nlmsg_flags, bool net_admin);
void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
struct nlattr *bc);
const struct inet_diag_req_v2 *r);
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
struct sk_buff *in_skb, const struct nlmsghdr *nlh,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *req);

struct sock *inet_diag_find_one_icsk(struct net *net,
Expand Down
7 changes: 7 additions & 0 deletions include/linux/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,13 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);

#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)

#ifndef CONFIG_PREEMPT_RT
# define cant_migrate() cant_sleep()
#else
/* Placeholder for now */
# define cant_migrate() do { } while (0)
#endif

/**
* abs - return absolute value of an argument
* @x: the value. If it is unsigned type, it is converted to signed type first.
Expand Down
Loading

0 comments on commit 9f0ca0c

Please sign in to comment.