From 9e7aaa7c65f170039501c4d4b24d99640e2d519a Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 14 Sep 2022 13:21:48 +0200 Subject: [PATCH 1/5] selftests: mlxsw: Use shapers in QOS tests instead of forcing speed QOS tests create congestion and verify the switch behavior. To create congestion, they need to have more traffic than the port can handle, so some of them force 1Gbps speed. The tests assume that 1Gbps speed is supported, otherwise, they will fail. Spectrum-4 ASIC will not support this speed in all ports, so to be able to run QOS tests there, some adjustments are required. Use shapers to limit the traffic instead of forcing speed. Note that for several ports, the speed configuration is just for autoneg issues, so shaper is not needed instead. In tests that already use shapers, set the existing shaper to be a child of a new TBF shaper which is added as a root qdisc and acts as a port shaper. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/qos_ets_strict.sh | 5 +++-- .../selftests/drivers/net/mlxsw/qos_mc_aware.sh | 9 +++++---- .../selftests/drivers/net/mlxsw/sch_ets.sh | 15 ++++++++------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index e9f8718af979a..690d8daa71b49 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -130,7 +130,8 @@ switch_create() ip link set dev $swp3 up mtu_set $swp3 10000 - ethtool -s $swp3 speed 1000 autoneg off + tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \ + burst 128K limit 1G vlan_create $swp1 111 vlan_create $swp2 222 @@ -193,7 +194,7 @@ switch_destroy() vlan_destroy $swp2 222 vlan_destroy $swp1 111 - ethtool -s $swp3 autoneg on + tc qdisc del dev $swp3 root handle 101: mtu_restore $swp3 ip link set dev $swp3 down lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 8f164c80e2154..c8e55fa916609 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -129,9 +129,10 @@ switch_create() vlan_create $swp2 111 vlan_create $swp3 111 - ethtool -s $swp3 speed 1000 autoneg off - tc qdisc replace dev $swp3 root handle 3: \ - prio bands 8 priomap 7 7 7 7 7 7 7 7 + tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \ + burst 128K limit 1G + tc qdisc replace dev $swp3 parent 3:3 handle 33: \ + prio bands 8 priomap 7 7 7 7 7 7 7 7 ip link add name br1 type bridge vlan_filtering 0 ip link set dev br1 up @@ -172,8 +173,8 @@ switch_destroy() ip link del dev br111 ip link del dev br1 + tc qdisc del dev $swp3 parent 3:3 handle 33: tc qdisc del dev $swp3 root handle 3: - ethtool -s $swp3 autoneg on vlan_destroy $swp3 111 vlan_destroy $swp2 111 diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index af64bc9ea8ab7..ceaa76b17a43c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -15,13 +15,15 @@ ALL_TESTS=" ets_test_dwrr " +PARENT="parent 3:3" + switch_create() { - ets_switch_create - # Create a bottleneck so that the DWRR process can kick in. - ethtool -s $h2 speed 1000 autoneg off - ethtool -s $swp2 speed 1000 autoneg off + tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \ + burst 128K limit 1G + + ets_switch_create # Set the ingress quota high and use the three egress TCs to limit the # amount of traffic that is admitted to the shared buffers. This makes @@ -55,10 +57,9 @@ switch_destroy() devlink_tc_bind_pool_th_restore $swp1 0 ingress devlink_port_pool_th_restore $swp1 0 - ethtool -s $swp2 autoneg on - ethtool -s $h2 autoneg on - ets_switch_destroy + + tc qdisc del dev $swp2 root handle 3: } # Callback from sch_ets_tests.sh From 61a00b196aaf5ba3d6ffb94e93c5d57bed449a32 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 14 Sep 2022 13:21:49 +0200 Subject: [PATCH 2/5] selftests: mlxsw: Use shapers in QOS RED tests instead of forcing speed QOS tests create congestion and verify the switch behavior. To create congestion, they need to have more traffic than the port can handle, so some of them force 1Gbps speed. The tests assume that 1Gbps speed is supported, otherwise, they will fail. Spectrum-4 ASIC will not support this speed in all ports, so to be able to run the tests there, some adjustments are required. Use shapers to limit the traffic instead of forcing speed. Note that for several ports, the speed configuration is just for autoneg issues, so shaper is not needed instead. The tests already use ETS qdisc as a root and RED qdiscs as children. Add a new TBF shaper to limit the rate of traffic, and use it as a root qdisc, then save the previous hierarchy of qdiscs under the new TBF root. In some ASICs, the shapers do not limit the traffic as accurately as forcing speed. To make the tests stable, allow the backlog size to be up to +-10% of the threshold. The aim of the tests is to make sure that with backlog << threshold, there are no drops, and that packets are dropped somewhere in vicinity of the configured threshold. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- .../drivers/net/mlxsw/sch_red_core.sh | 23 +++++++++---------- .../drivers/net/mlxsw/sch_red_ets.sh | 4 ++-- .../drivers/net/mlxsw/sch_red_root.sh | 4 ++-- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index f260f01db0e80..45b41b8f32322 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -135,14 +135,16 @@ h2_create() # cause packets to fail to queue up at $swp3 due to shared buffer # quotas, and the test to spuriously fail. # - # Prevent this by setting the speed of $h2 to 1Gbps. + # Prevent this by adding a shaper which limits the traffic in $h2 to + # 1Gbps. - ethtool -s $h2 speed 1000 autoneg off + tc qdisc replace dev $h2 root handle 10: tbf rate 1gbit \ + burst 128K limit 1G } h2_destroy() { - ethtool -s $h2 autoneg on + tc qdisc del dev $h2 root handle 10: tc qdisc del dev $h2 clsact host_destroy $h2 } @@ -150,12 +152,10 @@ h2_destroy() h3_create() { host_create $h3 3 - ethtool -s $h3 speed 1000 autoneg off } h3_destroy() { - ethtool -s $h3 autoneg on host_destroy $h3 } @@ -199,8 +199,9 @@ switch_create() done done - for intf in $swp2 $swp3 $swp4 $swp5; do - ethtool -s $intf speed 1000 autoneg off + for intf in $swp3 $swp4; do + tc qdisc replace dev $intf root handle 1: tbf rate 1gbit \ + burst 128K limit 1G done ip link set dev br1_10 up @@ -220,15 +221,13 @@ switch_destroy() devlink_port_pool_th_restore $swp3 8 - tc qdisc del dev $swp3 root 2>/dev/null - ip link set dev br2_11 down ip link set dev br2_10 down ip link set dev br1_11 down ip link set dev br1_10 down - for intf in $swp5 $swp4 $swp3 $swp2; do - ethtool -s $intf autoneg on + for intf in $swp4 $swp3; do + tc qdisc del dev $intf root handle 1: done for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do @@ -536,7 +535,7 @@ do_red_test() check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((0 <= pct && pct <= 10)) + ((-10 <= pct && pct <= 10)) check_err $? "backlog $backlog / $limit expected <= 10% distance" log_test "TC $((vlan - 10)): RED backlog > limit" diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index 7a73057206cd0..0d01c7cd82a18 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -25,7 +25,7 @@ BACKLOG2=500000 install_root_qdisc() { - tc qdisc add dev $swp3 root handle 10: $QDISC \ + tc qdisc add dev $swp3 parent 1: handle 10: $QDISC \ bands 8 priomap 7 6 5 4 3 2 1 0 } @@ -67,7 +67,7 @@ uninstall_qdisc_tc1() uninstall_root_qdisc() { - tc qdisc del dev $swp3 root + tc qdisc del dev $swp3 parent 1: } uninstall_qdisc() diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index 501d192529ac0..860205338e6fb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -18,7 +18,7 @@ install_qdisc() { local -a args=("$@") - tc qdisc add dev $swp3 root handle 108: red \ + tc qdisc add dev $swp3 parent 1: handle 108: red \ limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \ probability 1.0 avpkt 8000 burst 38 "${args[@]}" sleep 1 @@ -26,7 +26,7 @@ install_qdisc() uninstall_qdisc() { - tc qdisc del dev $swp3 root + tc qdisc del dev $swp3 parent 1: } ecn_test() From bd3f7850720c2f1086718546034742783dc3a371 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 14 Sep 2022 13:21:50 +0200 Subject: [PATCH 3/5] selftests: devlink_lib: Add function for querying maximum pool size The maximum pool size is exposed via 'devlink sb' command. The next patch will add a test which increases some pools to the maximum size. Add a function to query the value. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index de9944d42027c..601990c6881bf 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -584,3 +584,8 @@ devlink_cell_size_get() devlink sb pool show "$DEVLINK_DEV" pool 0 -j \ | jq '.pool[][].cell_size' } + +devlink_pool_size_get() +{ + devlink sb show "$DEVLINK_DEV" -j | jq '.[][][]["size"]' +} From 5ab0cf142bb7242b37ab678b09886a2aa65e8bfb Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 14 Sep 2022 13:21:51 +0200 Subject: [PATCH 4/5] selftests: mlxsw: Add QOS test for maximum use of descriptors Add an equivalent test to qos_burst, the test's purpose is same, but the new test uses simpler topology and does not require forcing low speed. In addition, it can be run Spectrum-2 and not only Spectrum-3+. The idea is to use a shaper in order to limit the traffic and create congestion. qos_burst test uses small pool, sends many small packets, and verify that packets are not dropped, which means that many descriptors can be handled. This test should check the change that commit c864769add96 ("mlxsw: Configure descriptor buffers") pushed. Instead, the new test tries to use more than 85% of maximum supported descriptors. The idea is to use big pool (as much as the ASIC supports), such that the pool size does not limit the traffic, then send many small packets, which means that many descriptors are used, and check how many packets the switch can handle. The usage of shaper allows to run the test in all ASICs, regardless of the CPU abilities, as it is able to create the congestion with low rate of packets. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/mlxsw_lib.sh | 14 + .../drivers/net/mlxsw/qos_max_descriptors.sh | 282 ++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh index a95856aafd2ab..6369927e9c378 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh @@ -61,3 +61,17 @@ mlxsw_only_on_spectrum() return 1 } + +mlxsw_max_descriptors_get() +{ + local spectrum_rev=$MLXSW_SPECTRUM_REV + + case $spectrum_rev in + 1) echo 81920 ;; + 2) echo 136960 ;; + 3) echo 204800 ;; + 4) echo 220000 ;; + *) echo "Unknown max descriptors for chip revision." > /dev/stderr + return 1 ;; + esac +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh new file mode 100755 index 0000000000000..5ac4f795e333b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -0,0 +1,282 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sends many small packets (size is less than cell size) through the +# switch. A shaper is used in $swp2, so the traffic is limited there. Packets +# are queued till they will be sent. +# +# The idea is to verify that the switch can handle at least 85% of maximum +# supported descrpitors by hardware. Then, we verify that the driver configures +# firmware to allow infinite size of egress descriptor pool, and does not use a +# lower limitation. Increase the size of the relevant pools such that the pool's +# size does not limit the traffic. + +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +# | +# +---|-----------------------------+ +# | + $swp1 | +# | | iPOOL1 | +# | | | +# | +-|------------------------+ | +# | | + $swp1.111 | | +# | | | | +# | | BR1 | | +# | | | | +# | | + $swp2.111 | | +# | +-|------------------------+ | +# | | | +# | + $swp2 | +# | | ePOOL6 | +# | | 1mbit | +# +---+-----------------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# + +ALL_TESTS=" + ping_ipv4 + max_descriptors +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh + +MAX_POOL_SIZE=$(devlink_pool_size_get) +SHAPER_RATE=1mbit + +# The current TBF qdisc interface does not allow us to configure the shaper to +# flat zero. The ASIC shaper is guaranteed to work with a granularity of +# 200Mbps. On Spectrum-2, writing a value close to zero instead of zero works +# well, but the performance on Spectrum-1 is unpredictable. Thus, do not run the +# test on Spectrum-1. +mlxsw_only_on_spectrum 2+ || exit + +h1_create() +{ + simple_if_init $h1 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + simple_if_fini $h2 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + + devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE + devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE + + # $swp1 + # ----- + + ip link set dev $swp1 up + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 16 + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 + + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + + # $swp2 + # ----- + + ip link set dev $swp2 up + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE + + tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \ + burst 128K limit 500M + tc qdisc replace dev $swp2 parent 1:1 handle 11: \ + ets bands 8 strict 8 priomap 7 6 + + # bridge + # ------ + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev br1 up + + ip link set dev $swp2.111 master br1 +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 1 + + # bridge + # ------ + + ip link set dev $swp2.111 nomaster + + ip link set dev br1 down + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp2 + # ----- + + tc qdisc del dev $swp2 parent 1:1 handle 11: + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + ip link set dev $swp2 down + + # $swp1 + # ----- + + dcb buffer set dev $swp1 prio-buffer all:0 + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " h1->h2" +} + +percentage_used() +{ + local num_packets=$1; shift + local max_packets=$1; shift + + bc <<< " + scale=2 + 100 * $num_packets / $max_packets + " +} + +max_descriptors() +{ + local cell_size=$(devlink_cell_size_get) + local exp_perc_used=85 + local max_descriptors + local pktsize=30 + + RET=0 + + max_descriptors=$(mlxsw_max_descriptors_get) || exit 1 + + local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + + log_info "Send many small packets, packet size = $pktsize bytes" + start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac + + # Sleep to wait for congestion. + sleep 5 + + local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + ((d1 == d0)) + check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))" + + # Check how many packets the switch can handle, the limitation is + # maximum descriptors. + local pkts_bytes=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) + local pkts_num=$((pkts_bytes / cell_size)) + local perc_used=$(percentage_used $pkts_num $max_descriptors) + + check_err $(bc <<< "$perc_used < $exp_perc_used") \ + "Expected > $exp_perc_used% of descriptors, handle $perc_used%" + + stop_traffic + sleep 1 + + log_test "Maximum descriptors usage. The percentage used is $perc_used%" +} + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS From 72981ef2d196d03ebab2bf9e4578a4c17e8078dd Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 14 Sep 2022 13:21:52 +0200 Subject: [PATCH 5/5] selftests: mlxsw: Remove qos_burst test The previous patch added a test which can be used instead of qos_burst.sh. Remove this test. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/qos_burst.sh | 480 ------------------ 1 file changed, 480 deletions(-) delete mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh deleted file mode 100755 index 82a47b903f928..0000000000000 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh +++ /dev/null @@ -1,480 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sends 1Gbps of traffic through the switch, into which it then -# injects a burst of traffic and tests that there are no drops. -# -# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream -# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a -# 1Gbps $swp3. -# -# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to -# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog -# is generated. -# -# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is -# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The -# expectation is that the burst is wholly absorbed by the large pool and no -# drops are caused. After the burst, there should be a backlog that is hard to -# get rid of, because $sw2 is fully subscribed. But because each individual -# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the -# test. -# -# +-----------------------+ +-----------------------+ -# | H1 | | H3 | -# | + $h1.111 | | $h3.111 + | -# | | 192.0.2.33/28 | | 192.0.2.35/28 | | -# | | | | | | -# | + $h1 | | $h3 + | -# +---|-------------------+ +--------------------+ +------------------|----+ -# | | | | -# +---|----------------------|--------------------|----------------------|----+ -# | + $swp1 $swp3 + + $swp4 $swp5 | | -# | | iPOOL1 iPOOL0 | | iPOOL2 iPOOL2 | | -# | | ePOOL4 ePOOL5 | | ePOOL4 ePOOL4 | | -# | | 1Gbps | | 1Gbps | | -# | +-|----------------------|-+ +-|----------------------|-+ | -# | | + $swp1.111 $swp3.111 + | | + $swp4.111 $swp5.111 + | | -# | | | | | | -# | | BR1 | | BR2 | | -# | | | | | | -# | | | | + $swp2.111 | | -# | +--------------------------+ +---------|----------------+ | -# | | | -# | iPOOL0: 500KB dynamic | | -# | iPOOL1: 500KB dynamic | | -# | iPOOL2: 10MB dynamic + $swp2 | -# | ePOOL4: 500KB dynamic | iPOOL0 | -# | ePOOL5: 500KB dnamic | ePOOL6 | -# | ePOOL6: 10MB dynamic | 1Gbps | -# +-------------------------------------------------------|-------------------+ -# | -# +---|-------------------+ -# | + $h2 H2 | -# | | 1Gbps | -# | | | -# | + $h2.111 | -# | 192.0.2.34/28 | -# +-----------------------+ -# -# iPOOL0+ePOOL4 are helper pools for control traffic etc. -# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream -# iPOOL2+ePOOL6 are pools for soaking the burst traffic - -ALL_TESTS=" - ping_ipv4 - test_8K - test_800 -" - -lib_dir=$(dirname $0)/../../../net/forwarding - -NUM_NETIFS=8 -source $lib_dir/lib.sh -source $lib_dir/devlink_lib.sh -source qos_lib.sh -source mlxsw_lib.sh - -_1KB=1000 -_500KB=$((500 * _1KB)) -_1MB=$((1000 * _1KB)) - -# The failure mode that this specifically tests is exhaustion of descriptor -# buffer. The point is to produce a burst that shared buffer should be able -# to accommodate, but produce it with small enough packets that the machine -# runs out of the descriptor buffer space with default configuration. -# -# The machine therefore needs to be able to produce line rate with as small -# packets as possible, and at the same time have large enough buffer that -# when filled with these small packets, it runs out of descriptors. -# Spectrum-2 is very close, but cannot perform this test. Therefore use -# Spectrum-3 as a minimum, and permit larger burst size, and therefore -# larger packets, to reduce spurious failures. -# -mlxsw_only_on_spectrum 3+ || exit - -BURST_SIZE=$((50000000)) -POOL_SIZE=$BURST_SIZE - -h1_create() -{ - simple_if_init $h1 - mtu_set $h1 10000 - - vlan_create $h1 111 v$h1 192.0.2.33/28 - ip link set dev $h1.111 type vlan egress-qos-map 0:1 -} - -h1_destroy() -{ - vlan_destroy $h1 111 - - mtu_restore $h1 - simple_if_fini $h1 -} - -h2_create() -{ - simple_if_init $h2 - mtu_set $h2 10000 - ethtool -s $h2 speed 1000 autoneg off - - vlan_create $h2 111 v$h2 192.0.2.34/28 -} - -h2_destroy() -{ - vlan_destroy $h2 111 - - ethtool -s $h2 autoneg on - mtu_restore $h2 - simple_if_fini $h2 -} - -h3_create() -{ - simple_if_init $h3 - mtu_set $h3 10000 - - vlan_create $h3 111 v$h3 192.0.2.35/28 -} - -h3_destroy() -{ - vlan_destroy $h3 111 - - mtu_restore $h3 - simple_if_fini $h3 -} - -switch_create() -{ - # pools - # ----- - - devlink_pool_size_thtype_save 0 - devlink_pool_size_thtype_save 4 - devlink_pool_size_thtype_save 1 - devlink_pool_size_thtype_save 5 - devlink_pool_size_thtype_save 2 - devlink_pool_size_thtype_save 6 - - devlink_port_pool_th_save $swp1 1 - devlink_port_pool_th_save $swp2 6 - devlink_port_pool_th_save $swp3 5 - devlink_port_pool_th_save $swp4 2 - devlink_port_pool_th_save $swp5 2 - - devlink_tc_bind_pool_th_save $swp1 1 ingress - devlink_tc_bind_pool_th_save $swp2 1 egress - devlink_tc_bind_pool_th_save $swp3 1 egress - devlink_tc_bind_pool_th_save $swp4 1 ingress - devlink_tc_bind_pool_th_save $swp5 1 ingress - - # Control traffic pools. Just reduce the size. - devlink_pool_size_thtype_set 0 dynamic $_500KB - devlink_pool_size_thtype_set 4 dynamic $_500KB - - # Stream modeling pools. - devlink_pool_size_thtype_set 1 dynamic $_500KB - devlink_pool_size_thtype_set 5 dynamic $_500KB - - # Burst soak pools. - devlink_pool_size_thtype_set 2 static $POOL_SIZE - devlink_pool_size_thtype_set 6 static $POOL_SIZE - - # $swp1 - # ----- - - ip link set dev $swp1 up - mtu_set $swp1 10000 - vlan_create $swp1 111 - ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 - - devlink_port_pool_th_set $swp1 1 16 - devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 - - # Configure qdisc... - tc qdisc replace dev $swp1 root handle 1: \ - ets bands 8 strict 8 priomap 7 6 - # ... so that we can assign prio1 traffic to PG1. - dcb buffer set dev $swp1 prio-buffer all:0 1:1 - - # $swp2 - # ----- - - ip link set dev $swp2 up - mtu_set $swp2 10000 - ethtool -s $swp2 speed 1000 autoneg off - vlan_create $swp2 111 - ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 - - devlink_port_pool_th_set $swp2 6 $POOL_SIZE - devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE - - # prio 0->TC0 (band 7), 1->TC1 (band 6) - tc qdisc replace dev $swp2 root handle 1: \ - ets bands 8 strict 8 priomap 7 6 - - # $swp3 - # ----- - - ip link set dev $swp3 up - mtu_set $swp3 10000 - ethtool -s $swp3 speed 1000 autoneg off - vlan_create $swp3 111 - ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 - - devlink_port_pool_th_set $swp3 5 16 - devlink_tc_bind_pool_th_set $swp3 1 egress 5 16 - - # prio 0->TC0 (band 7), 1->TC1 (band 6) - tc qdisc replace dev $swp3 root handle 1: \ - ets bands 8 strict 8 priomap 7 6 - - # $swp4 - # ----- - - ip link set dev $swp4 up - mtu_set $swp4 10000 - ethtool -s $swp4 speed 1000 autoneg off - vlan_create $swp4 111 - ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 - - devlink_port_pool_th_set $swp4 2 $POOL_SIZE - devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE - - # Configure qdisc... - tc qdisc replace dev $swp4 root handle 1: \ - ets bands 8 strict 8 priomap 7 6 - # ... so that we can assign prio1 traffic to PG1. - dcb buffer set dev $swp4 prio-buffer all:0 1:1 - - # $swp5 - # ----- - - ip link set dev $swp5 up - mtu_set $swp5 10000 - vlan_create $swp5 111 - ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1 - - devlink_port_pool_th_set $swp5 2 $POOL_SIZE - devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE - - # Configure qdisc... - tc qdisc replace dev $swp5 root handle 1: \ - ets bands 8 strict 8 priomap 7 6 - # ... so that we can assign prio1 traffic to PG1. - dcb buffer set dev $swp5 prio-buffer all:0 1:1 - - # bridges - # ------- - - ip link add name br1 type bridge vlan_filtering 0 - ip link set dev $swp1.111 master br1 - ip link set dev $swp3.111 master br1 - ip link set dev br1 up - - ip link add name br2 type bridge vlan_filtering 0 - ip link set dev $swp2.111 master br2 - ip link set dev $swp4.111 master br2 - ip link set dev $swp5.111 master br2 - ip link set dev br2 up -} - -switch_destroy() -{ - # Do this first so that we can reset the limits to values that are only - # valid for the original static / dynamic setting. - devlink_pool_size_thtype_restore 6 - devlink_pool_size_thtype_restore 5 - devlink_pool_size_thtype_restore 4 - devlink_pool_size_thtype_restore 2 - devlink_pool_size_thtype_restore 1 - devlink_pool_size_thtype_restore 0 - - # bridges - # ------- - - ip link set dev br2 down - ip link set dev $swp5.111 nomaster - ip link set dev $swp4.111 nomaster - ip link set dev $swp2.111 nomaster - ip link del dev br2 - - ip link set dev br1 down - ip link set dev $swp3.111 nomaster - ip link set dev $swp1.111 nomaster - ip link del dev br1 - - # $swp5 - # ----- - - dcb buffer set dev $swp5 prio-buffer all:0 - tc qdisc del dev $swp5 root - - devlink_tc_bind_pool_th_restore $swp5 1 ingress - devlink_port_pool_th_restore $swp5 2 - - vlan_destroy $swp5 111 - mtu_restore $swp5 - ip link set dev $swp5 down - - # $swp4 - # ----- - - dcb buffer set dev $swp4 prio-buffer all:0 - tc qdisc del dev $swp4 root - - devlink_tc_bind_pool_th_restore $swp4 1 ingress - devlink_port_pool_th_restore $swp4 2 - - vlan_destroy $swp4 111 - ethtool -s $swp4 autoneg on - mtu_restore $swp4 - ip link set dev $swp4 down - - # $swp3 - # ----- - - tc qdisc del dev $swp3 root - - devlink_tc_bind_pool_th_restore $swp3 1 egress - devlink_port_pool_th_restore $swp3 5 - - vlan_destroy $swp3 111 - ethtool -s $swp3 autoneg on - mtu_restore $swp3 - ip link set dev $swp3 down - - # $swp2 - # ----- - - tc qdisc del dev $swp2 root - - devlink_tc_bind_pool_th_restore $swp2 1 egress - devlink_port_pool_th_restore $swp2 6 - - vlan_destroy $swp2 111 - ethtool -s $swp2 autoneg on - mtu_restore $swp2 - ip link set dev $swp2 down - - # $swp1 - # ----- - - dcb buffer set dev $swp1 prio-buffer all:0 - tc qdisc del dev $swp1 root - - devlink_tc_bind_pool_th_restore $swp1 1 ingress - devlink_port_pool_th_restore $swp1 1 - - vlan_destroy $swp1 111 - mtu_restore $swp1 - ip link set dev $swp1 down -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - swp1=${NETIFS[p2]} - - swp2=${NETIFS[p3]} - h2=${NETIFS[p4]} - - swp3=${NETIFS[p5]} - swp4=${NETIFS[p6]} - - swp5=${NETIFS[p7]} - h3=${NETIFS[p8]} - - h2mac=$(mac_get $h2) - - vrf_prepare - - h1_create - h2_create - h3_create - switch_create -} - -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - -ping_ipv4() -{ - ping_test $h1 192.0.2.34 " h1->h2" - ping_test $h3 192.0.2.34 " h3->h2" -} - -__test_qos_burst() -{ - local pktsize=$1; shift - - RET=0 - - start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac - sleep 1 - - local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) - ((q0 == 0)) - check_err $? "Transmit queue non-zero?" - - local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) - - local cell_size=$(devlink_cell_size_get) - local cells=$((BURST_SIZE / cell_size)) - # Each packet is $pktsize of payload + headers. - local pkt_cells=$(((pktsize + 50 + cell_size - 1) / cell_size)) - # How many packets can we admit: - local pkts=$((cells / pkt_cells)) - - $MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \ - -a own -b $h2mac -c $pkts -t udp -q - sleep 1 - - local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) - ((d1 == d0)) - check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))" - - # Check that the queue is somewhat close to the burst size This - # makes sure that the lack of drops above was not due to port - # undersubscribtion. - local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) - local qe=$((90 * BURST_SIZE / 100)) - ((q0 > qe)) - check_err $? "Queue size expected >$qe, got $q0" - - stop_traffic - sleep 2 - - log_test "Burst: absorb $pkts ${pktsize}-B packets" -} - -test_8K() -{ - __test_qos_burst 8000 -} - -test_800() -{ - __test_qos_burst 800 -} - -bail_on_lldpad - -trap cleanup EXIT -setup_prepare -setup_wait -tests_run - -exit $EXIT_STATUS