Skip to content

Commit

Permalink
bpf: add bpf_skb_adjust_room encap flags
Browse files Browse the repository at this point in the history
When pushing tunnel headers, annotate skbs in the same way as tunnel
devices.

For GSO packets, the network stack requires certain fields set to
segment packets with tunnel headers. gro_gse_segment depends on
transport and inner mac header, for instance.

Add an option to pass this information.

Remove the restriction on len_diff to network header length, which
is too short, e.g., for GRE protocols.

Changes
  v1->v2:
  - document new flags
  - BPF_F_ADJ_ROOM_MASK moved
  v2->v3:
  - BPF_F_ADJ_ROOM_ENCAP_L3_MASK moved

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Willem de Bruijn authored and Alexei Starovoitov committed Mar 22, 2019
1 parent 2278f6c commit 868d523
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 6 deletions.
16 changes: 15 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1486,11 +1486,20 @@ union bpf_attr {
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header).
*
* There is one supported flag at this time:
* The following flags are supported at this time:
*
* * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
* Adjusting mss in this way is not allowed for datagrams.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
* * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
* Any new space is reserved to hold a tunnel header.
* Configure skb offsets and other fields accordingly.
*
* * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
Expand Down Expand Up @@ -2632,6 +2641,11 @@ enum bpf_func_id {
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)

#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)

/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
Expand Down
66 changes: 61 additions & 5 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -2963,11 +2963,20 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
}
}

#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO)
#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)

#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP)

static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
unsigned int gso_type = SKB_GSO_DODGY;
u16 mac_len, inner_net, inner_trans;
int ret;

if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
Expand All @@ -2981,10 +2990,60 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;

if (encap) {
if (skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_IPV6))
return -ENOTSUPP;

if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
return -EINVAL;

if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
return -EINVAL;

if (skb->encapsulation)
return -EALREADY;

mac_len = skb->network_header - skb->mac_header;
inner_net = skb->network_header;
inner_trans = skb->transport_header;
}

ret = bpf_skb_net_hdr_push(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;

if (encap) {
/* inner mac == inner_net on l3 encap */
skb->inner_mac_header = inner_net;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
skb_set_inner_protocol(skb, skb->protocol);

skb->encapsulation = 1;
skb_set_network_header(skb, mac_len);

if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
gso_type |= SKB_GSO_UDP_TUNNEL;
else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
gso_type |= SKB_GSO_GRE;
else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
gso_type |= SKB_GSO_IPXIP6;
else
gso_type |= SKB_GSO_IPXIP4;

if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
sizeof(struct ipv6hdr) :
sizeof(struct iphdr);

skb_set_transport_header(skb, mac_len + nh_len);
}
}

if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);

Expand All @@ -2993,7 +3052,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
skb_decrease_gso_size(shinfo, len_diff);

/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_type |= gso_type;
shinfo->gso_segs = 0;
}

Expand Down Expand Up @@ -3044,7 +3103,6 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32, mode, u64, flags)
{
bool trans_same = skb->transport_header == skb->network_header;
u32 len_cur, len_diff_abs = abs(len_diff);
u32 len_min = bpf_skb_net_base_len(skb);
u32 len_max = __bpf_skb_max_len(skb);
Expand Down Expand Up @@ -3073,8 +3131,6 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
}

len_cur = skb->len - skb_network_offset(skb);
if (skb_transport_header_was_set(skb) && !trans_same)
len_cur = skb_network_header_len(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
(!shrink && (skb->len + len_diff_abs > len_max &&
Expand Down

0 comments on commit 868d523

Please sign in to comment.