-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
selftests: bpf: tc-bpf flow shaping with EDT
Add a small test that shows how to shape a TCP flow in tc-bpf with EDT and ECN. Signed-off-by: Peter Oskolkov <posk@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
- Loading branch information
Peter Oskolkov
authored and
Alexei Starovoitov
committed
Mar 23, 2019
1 parent
315a202
commit 7df5e3d
Showing
3 changed files
with
210 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
#include <stdint.h> | ||
#include <linux/bpf.h> | ||
#include <linux/if_ether.h> | ||
#include <linux/in.h> | ||
#include <linux/ip.h> | ||
#include <linux/pkt_cls.h> | ||
#include <linux/tcp.h> | ||
#include "bpf_helpers.h" | ||
#include "bpf_endian.h" | ||
|
||
/* the maximum delay we are willing to add (drop packets beyond that) */ | ||
#define TIME_HORIZON_NS (2000 * 1000 * 1000) | ||
#define NS_PER_SEC 1000000000 | ||
#define ECN_HORIZON_NS 5000000 | ||
#define THROTTLE_RATE_BPS (5 * 1000 * 1000) | ||
|
||
/* flow_key => last_tstamp timestamp used */ | ||
struct bpf_map_def SEC("maps") flow_map = { | ||
.type = BPF_MAP_TYPE_HASH, | ||
.key_size = sizeof(uint32_t), | ||
.value_size = sizeof(uint64_t), | ||
.max_entries = 1, | ||
}; | ||
|
||
static inline int throttle_flow(struct __sk_buff *skb) | ||
{ | ||
int key = 0; | ||
uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key); | ||
uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / | ||
THROTTLE_RATE_BPS; | ||
uint64_t now = bpf_ktime_get_ns(); | ||
uint64_t tstamp, next_tstamp = 0; | ||
|
||
if (last_tstamp) | ||
next_tstamp = *last_tstamp + delay_ns; | ||
|
||
tstamp = skb->tstamp; | ||
if (tstamp < now) | ||
tstamp = now; | ||
|
||
/* should we throttle? */ | ||
if (next_tstamp <= tstamp) { | ||
if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY)) | ||
return TC_ACT_SHOT; | ||
return TC_ACT_OK; | ||
} | ||
|
||
/* do not queue past the time horizon */ | ||
if (next_tstamp - now >= TIME_HORIZON_NS) | ||
return TC_ACT_SHOT; | ||
|
||
/* set ecn bit, if needed */ | ||
if (next_tstamp - now >= ECN_HORIZON_NS) | ||
bpf_skb_ecn_set_ce(skb); | ||
|
||
if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST)) | ||
return TC_ACT_SHOT; | ||
skb->tstamp = next_tstamp; | ||
|
||
return TC_ACT_OK; | ||
} | ||
|
||
static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp) | ||
{ | ||
void *data_end = (void *)(long)skb->data_end; | ||
|
||
/* drop malformed packets */ | ||
if ((void *)(tcp + 1) > data_end) | ||
return TC_ACT_SHOT; | ||
|
||
if (tcp->dest == bpf_htons(9000)) | ||
return throttle_flow(skb); | ||
|
||
return TC_ACT_OK; | ||
} | ||
|
||
static inline int handle_ipv4(struct __sk_buff *skb) | ||
{ | ||
void *data_end = (void *)(long)skb->data_end; | ||
void *data = (void *)(long)skb->data; | ||
struct iphdr *iph; | ||
uint32_t ihl; | ||
|
||
/* drop malformed packets */ | ||
if (data + sizeof(struct ethhdr) > data_end) | ||
return TC_ACT_SHOT; | ||
iph = (struct iphdr *)(data + sizeof(struct ethhdr)); | ||
if ((void *)(iph + 1) > data_end) | ||
return TC_ACT_SHOT; | ||
ihl = iph->ihl * 4; | ||
if (((void *)iph) + ihl > data_end) | ||
return TC_ACT_SHOT; | ||
|
||
if (iph->protocol == IPPROTO_TCP) | ||
return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl)); | ||
|
||
return TC_ACT_OK; | ||
} | ||
|
||
SEC("cls_test") int tc_prog(struct __sk_buff *skb) | ||
{ | ||
if (skb->protocol == bpf_htons(ETH_P_IP)) | ||
return handle_ipv4(skb); | ||
|
||
return TC_ACT_OK; | ||
} | ||
|
||
char __license[] SEC("license") = "GPL"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/bin/bash | ||
# SPDX-License-Identifier: GPL-2.0 | ||
# | ||
# This test installs a TC bpf program that throttles a TCP flow | ||
# with dst port = 9000 down to 5MBps. Then it measures actual | ||
# throughput of the flow. | ||
|
||
if [[ $EUID -ne 0 ]]; then | ||
echo "This script must be run as root" | ||
echo "FAIL" | ||
exit 1 | ||
fi | ||
|
||
# check that nc, dd, and timeout are present | ||
command -v nc >/dev/null 2>&1 || \ | ||
{ echo >&2 "nc is not available"; exit 1; } | ||
command -v dd >/dev/null 2>&1 || \ | ||
{ echo >&2 "nc is not available"; exit 1; } | ||
command -v timeout >/dev/null 2>&1 || \ | ||
{ echo >&2 "timeout is not available"; exit 1; } | ||
|
||
readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" | ||
readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" | ||
|
||
readonly IP_SRC="172.16.1.100" | ||
readonly IP_DST="172.16.2.100" | ||
|
||
cleanup() | ||
{ | ||
ip netns del ${NS_SRC} | ||
ip netns del ${NS_DST} | ||
} | ||
|
||
trap cleanup EXIT | ||
|
||
set -e # exit on error | ||
|
||
ip netns add "${NS_SRC}" | ||
ip netns add "${NS_DST}" | ||
ip link add veth_src type veth peer name veth_dst | ||
ip link set veth_src netns ${NS_SRC} | ||
ip link set veth_dst netns ${NS_DST} | ||
|
||
ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src | ||
ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst | ||
|
||
ip -netns ${NS_SRC} link set dev veth_src up | ||
ip -netns ${NS_DST} link set dev veth_dst up | ||
|
||
ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src | ||
ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst | ||
|
||
# set up TC on TX | ||
ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq | ||
ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact | ||
ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ | ||
bpf da obj test_tc_edt.o sec cls_test | ||
|
||
|
||
# start the listener | ||
ip netns exec ${NS_DST} bash -c \ | ||
"nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &" | ||
declare -i NC_PID=$! | ||
sleep 1 | ||
|
||
declare -ir TIMEOUT=20 | ||
declare -ir EXPECTED_BPS=5000000 | ||
|
||
# run the load, capture RX bytes on DST | ||
declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \ | ||
cat /sys/class/net/veth_dst/statistics/rx_bytes ) | ||
|
||
set +e | ||
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \ | ||
bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null" | ||
set -e | ||
|
||
declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \ | ||
cat /sys/class/net/veth_dst/statistics/rx_bytes ) | ||
|
||
declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT )) | ||
|
||
echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \ | ||
awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n", | ||
$1, ($2-$3)*100.0/$3}' | ||
|
||
# Pass the test if the actual bps is within 1% of the expected bps. | ||
# The difference is usually about 0.1% on a 20-sec test, and ==> zero | ||
# the longer the test runs. | ||
declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \ | ||
awk 'function abs(x){return ((x < 0.0) ? -x : x)} | ||
{if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" } | ||
else { print "0"} }' ) | ||
if [ "${RES}" == "0" ] ; then | ||
echo "PASS" | ||
else | ||
echo "FAIL" | ||
exit 1 | ||
fi |