Skip to content

Commit

Permalink
Merge branch 'bpf-eth-get-headlen'
Browse files Browse the repository at this point in the history
Stanislav Fomichev says:

====================
Currently, when eth_get_headlen calls flow dissector, it doesn't pass any
skb. Because we use passed skb to lookup associated networking namespace
to find whether we have a BPF program attached or not, we always use
C-based flow dissector in this case.

The goal of this patch series is to add new networking namespace argument
to the eth_get_headlen and make BPF flow dissector programs be able to
work in the skb-less case.

The series goes like this:
* use new kernel context (struct bpf_flow_dissector) for flow dissector
  programs; this makes it easy to distinguish between skb and no-skb
  case and supports calling BPF flow dissector on a chunk of raw data
* convert BPF_PROG_TEST_RUN to use raw data
* plumb network namespace into __skb_flow_dissect from all callers
* handle no-skb case in __skb_flow_dissect
* update eth_get_headlen to include net namespace argument and
  convert all existing users
* add selftest to make sure bpf_skb_load_bytes is not allowed in
  the no-skb mode
* extend test_progs to exercise skb-less flow dissection as well
* stop adjusting nhoff/thoff by ETH_HLEN in BPF_PROG_TEST_RUN

v6:
* more suggestions by Alexei:
  * eth_get_headlen now takes net dev, not net namespace
  * test skb-less case via tun eth_get_headlen
* fix return errors in bpf_flow_load
* don't adjust nhoff/thoff by ETH_HLEN

v5:
* API changes have been submitted via bpf/stable tree

v4:
* prohibit access to vlan fields as well (otherwise, inconsistent
  between skb/skb-less cases)
* drop extra unneeded check for skb->vlan_present in bpf_flow.c

v3:
* new kernel xdp_buff-like context per Alexei suggestion
* drop skb_net helper
* properly clamp flow_keys->nhoff

v2:
* moved temporary skb from stack into percpu (avoids memset of ~200 bytes
  per packet)
* tightened down access to __sk_buff fields from flow dissector programs to
  avoid touching shinfo (whitelist only relevant fields)
* addressed suggestions from Willem
====================

Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
Daniel Borkmann committed Apr 23, 2019
2 parents 7e6e185 + 02ee065 commit 2aad326
Show file tree
Hide file tree
Showing 27 changed files with 411 additions and 186 deletions.
3 changes: 2 additions & 1 deletion drivers/net/ethernet/aquantia/atlantic/aq_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,

hdr_len = buff->len;
if (hdr_len > AQ_CFG_RX_HDR_SIZE)
hdr_len = eth_get_headlen(aq_buf_vaddr(&buff->rxdata),
hdr_len = eth_get_headlen(skb->dev,
aq_buf_vaddr(&buff->rxdata),
AQ_CFG_RX_HDR_SIZE);

memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/broadcom/bnxt/bnxt.c
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
DMA_ATTR_WEAK_ORDERING);

if (unlikely(!payload))
payload = eth_get_headlen(data_ptr, len);
payload = eth_get_headlen(bp->dev, data_ptr, len);

skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
if (!skb) {
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/hisilicon/hns/hns_enet.c
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
} else {
ring->stats.seg_pkt_cnt++;

pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE);
pull_len = eth_get_headlen(ndev, va, HNS_RX_HEAD_SIZE);
memcpy(__skb_put(skb, pull_len), va,
ALIGN(pull_len, sizeof(long)));

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
Original file line number Diff line number Diff line change
Expand Up @@ -2580,7 +2580,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length,
ring->stats.seg_pkt_cnt++;
u64_stats_update_end(&ring->syncp);

ring->pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE);
ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE);
__skb_put(skb, ring->pull_len);
hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len,
desc_cb);
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/fm10k/fm10k_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
/* we need the header to contain the greater of either ETH_HLEN or
* 60 bytes if the skb->len is less than 60 for skb_pad.
*/
pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2035,7 +2035,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > I40E_RX_HDR_SIZE)
headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
headlen = eth_get_headlen(skb->dev, xdp->data,
I40E_RX_HDR_SIZE);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/iavf/iavf_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1315,7 +1315,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IAVF_RX_HDR_SIZE)
headlen = eth_get_headlen(va, IAVF_RX_HDR_SIZE);
headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE);
headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/igb/igb_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -8051,7 +8051,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IGB_RX_HDR_LEN)
headlen = eth_get_headlen(va, IGB_RX_HDR_LEN);
headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/igc/igc_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IGC_RX_HDR_LEN)
headlen = eth_get_headlen(va, IGC_RX_HDR_LEN);
headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1800,7 +1800,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
* we need the header to contain the greater of either ETH_HLEN or
* 60 bytes if the skb->len is less than 60 for skb_pad.
*/
pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE);
pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);

/* align pull length to size of long to optimize memcpy performance */
skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IXGBEVF_RX_HDR_SIZE)
headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
headlen = eth_get_headlen(skb->dev, xdp->data,
IXGBEVF_RX_HDR_SIZE);

/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
case MLX5_INLINE_MODE_NONE:
return 0;
case MLX5_INLINE_MODE_TCP_UDP:
hlen = eth_get_headlen(skb->data, skb_headlen(skb));
hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
hlen += VLAN_HLEN;
break;
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -1965,7 +1965,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,

if (frags) {
/* Exercise flow dissector code path. */
u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
u32 headlen = eth_get_headlen(tun->dev, skb->data,
skb_headlen(skb));

if (unlikely(headlen > skb_headlen(skb))) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
Expand Down
2 changes: 1 addition & 1 deletion include/linux/etherdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct device;
int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
unsigned char *arch_get_platform_mac_address(void);
int nvmem_get_mac_address(struct device *dev, void *addrbuf);
u32 eth_get_headlen(void *data, unsigned int max_len);
u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
extern const struct header_ops eth_header_ops;

Expand Down
28 changes: 15 additions & 13 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -1275,12 +1275,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
}
#endif

struct bpf_flow_keys;
bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
struct bpf_flow_keys *flow_keys);
bool __skb_flow_dissect(const struct sk_buff *skb,
struct bpf_flow_dissector;
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
__be16 proto, int nhoff, int hlen);

bool __skb_flow_dissect(const struct net *net,
const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, __be16 proto, int nhoff, int hlen,
Expand All @@ -1290,27 +1290,28 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, unsigned int flags)
{
return __skb_flow_dissect(skb, flow_dissector, target_container,
NULL, 0, 0, 0, flags);
return __skb_flow_dissect(NULL, skb, flow_dissector,
target_container, NULL, 0, 0, 0, flags);
}

static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
struct flow_keys *flow,
unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
NULL, 0, 0, 0, flags);
return __skb_flow_dissect(NULL, skb, &flow_keys_dissector,
flow, NULL, 0, 0, 0, flags);
}

static inline bool
skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
skb_flow_dissect_flow_keys_basic(const struct net *net,
const struct sk_buff *skb,
struct flow_keys_basic *flow, void *data,
__be16 proto, int nhoff, int hlen,
unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
data, proto, nhoff, hlen, flags);
}

Expand Down Expand Up @@ -2488,7 +2489,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb)
if (skb_transport_header_was_set(skb))
return;

if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
NULL, 0, 0, 0, 0))
skb_set_transport_header(skb, keys.control.thoff);
}

Expand Down
7 changes: 7 additions & 0 deletions include/net/flow_dissector.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
return ((char *)target_container) + flow_dissector->offset[key_id];
}

struct bpf_flow_dissector {
struct bpf_flow_keys *flow_keys;
const struct sk_buff *skb;
void *data;
void *data_end;
};

#endif
11 changes: 4 additions & 7 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,10 @@ struct tcf_proto {
};

struct qdisc_skb_cb {
union {
struct {
unsigned int pkt_len;
u16 slave_dev_queue_mapping;
u16 tc_classid;
};
struct bpf_flow_keys *flow_keys;
struct {
unsigned int pkt_len;
u16 slave_dev_queue_mapping;
u16 tc_classid;
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
Expand Down
48 changes: 14 additions & 34 deletions net/bpf/test_run.c
Original file line number Diff line number Diff line change
Expand Up @@ -379,13 +379,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
union bpf_attr __user *uattr)
{
u32 size = kattr->test.data_size_in;
struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0;
struct bpf_skb_data_end *cb;
const struct ethhdr *eth;
u32 retval, duration;
struct sk_buff *skb;
struct sock *sk;
void *data;
int ret;
u32 i;
Expand All @@ -396,46 +395,28 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;

data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (size < ETH_HLEN)
return -EINVAL;

data = bpf_test_init(kattr, size, 0, 0);
if (IS_ERR(data))
return PTR_ERR(data);

sk = kzalloc(sizeof(*sk), GFP_USER);
if (!sk) {
kfree(data);
return -ENOMEM;
}
sock_net_set(sk, current->nsproxy->net_ns);
sock_init_data(NULL, sk);

skb = build_skb(data, 0);
if (!skb) {
kfree(data);
kfree(sk);
return -ENOMEM;
}
skb->sk = sk;

skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
skb->protocol = eth_type_trans(skb,
current->nsproxy->net_ns->loopback_dev);
skb_reset_network_header(skb);

cb = (struct bpf_skb_data_end *)skb->cb;
cb->qdisc_cb.flow_keys = &flow_keys;
eth = (struct ethhdr *)data;

if (!repeat)
repeat = 1;

ctx.flow_keys = &flow_keys;
ctx.data = data;
ctx.data_end = (__u8 *)data + size;

rcu_read_lock();
preempt_disable();
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
retval = __skb_flow_bpf_dissect(prog, skb,
&flow_keys_dissector,
&flow_keys);
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
size);

if (signal_pending(current)) {
preempt_enable();
Expand Down Expand Up @@ -468,7 +449,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
retval, duration);

out:
kfree_skb(skb);
kfree(sk);
kfree(data);
return ret;
}
Loading

0 comments on commit 2aad326

Please sign in to comment.