diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 685fb37df8e8e..56cb7be92244c 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -23,6 +23,12 @@ enum skb_drop_reason_subsys { */ SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR, + /** + * @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons, + * see net/openvswitch/drop.h + */ + SKB_DROP_REASON_SUBSYS_OPENVSWITCH, + /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */ SKB_DROP_REASON_SUBSYS_NUM }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index e94870e77ee96..efc82c318fa2a 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -965,6 +965,7 @@ struct check_pkt_len_arg { * start of the packet or at the start of the l3 header depending on the value * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. + * @OVS_ACTION_ATTR_DROP: Explicit drop action. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1002,6 +1003,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ + OVS_ACTION_ATTR_DROP, /* u32 error code. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index cab1e02b63e05..fd66014d8a76a 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -27,6 +27,7 @@ #include #include "datapath.h" +#include "drop.h" #include "flow.h" #include "conntrack.h" #include "vport.h" @@ -781,7 +782,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); return -ENOMEM; } @@ -852,6 +853,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, struct sk_buff *skb, u16 mru, struct sw_flow_key *key) { + enum ovs_drop_reason reason; u16 orig_network_offset = 0; if (eth_p_mpls(skb->protocol)) { @@ -861,6 +863,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); + reason = OVS_DROP_FRAG_L2_TOO_LONG; goto err; } @@ -901,12 +904,13 @@ static void ovs_fragment(struct net *net, struct vport *vport, WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", ovs_vport_name(vport), ntohs(key->eth.type), mru, vport->dev->mtu); + reason = OVS_DROP_FRAG_INVALID_PROTO; goto err; } return; err: - kfree_skb(skb); + ovs_kfree_skb_reason(skb, reason); } static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, @@ -933,10 +937,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, ovs_fragment(net, vport, skb, mru, key); } else { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); } } else { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY); } } @@ -1010,7 +1014,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, return clone_execute(dp, skb, key, 0, nla_data(actions), nla_len(actions), true, false); - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL); return 0; } @@ -1036,7 +1040,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb, if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { if (last) - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); return 0; } @@ -1297,6 +1301,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, if (trace_ovs_do_execute_action_enabled()) trace_ovs_do_execute_action(dp, skb, key, a, rem); + /* Actions that rightfully have to consume the skb should do it + * and return directly. + */ switch (nla_type(a)) { case OVS_ACTION_ATTR_OUTPUT: { int port = nla_get_u32(a); @@ -1332,6 +1339,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, output_userspace(dp, skb, key, a, attr, len, OVS_CB(skb)->cutlen); OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } break; case OVS_ACTION_ATTR_HASH: @@ -1446,7 +1457,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_METER: if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_METER); return 0; } break; @@ -1477,15 +1488,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return dec_ttl_exception_handler(dp, skb, key, a); break; + + case OVS_ACTION_ATTR_DROP: { + enum ovs_drop_reason reason = nla_get_u32(a) + ? OVS_DROP_EXPLICIT_WITH_ERROR + : OVS_DROP_EXPLICIT; + + ovs_kfree_skb_reason(skb, reason); + return 0; + } } if (unlikely(err)) { - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR); return err; } } - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); return 0; } @@ -1547,7 +1567,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, /* Out of per CPU action FIFO space. Drop the 'skb' and * log an error. */ - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT); if (net_ratelimit()) { if (actions) { /* Sample action */ @@ -1599,7 +1619,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT); err = -ENETDOWN; goto out; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fa955e892210f..0cfa1e9482e63 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -29,6 +29,7 @@ #include #include "datapath.h" +#include "drop.h" #include "conntrack.h" #include "flow.h" #include "flow_netlink.h" @@ -1035,7 +1036,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, skb_push_rcsum(skb, nh_ofs); if (err) - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK); return err; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a6d2a0b1aa21e..d33cb739883f7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -41,6 +41,7 @@ #include #include "datapath.h" +#include "drop.h" #include "flow.h" #include "flow_table.h" #include "flow_netlink.h" @@ -2702,6 +2703,17 @@ static struct pernet_operations ovs_net_ops = { .size = sizeof(struct ovs_net), }; +static const char * const ovs_drop_reasons[] = { +#define S(x) (#x), + OVS_DROP_REASONS(S) +#undef S +}; + +static struct drop_reason_list drop_reason_list_ovs = { + .reasons = ovs_drop_reasons, + .n_reasons = ARRAY_SIZE(ovs_drop_reasons), +}; + static int __init dp_init(void) { int err; @@ -2743,6 +2755,9 @@ static int __init dp_init(void) if (err < 0) goto error_unreg_netdev; + drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH, + &drop_reason_list_ovs); + return 0; error_unreg_netdev: @@ -2769,6 +2784,7 @@ static void dp_cleanup(void) ovs_netdev_exit(); unregister_netdevice_notifier(&ovs_dp_device_notifier); unregister_pernet_device(&ovs_net_ops); + drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH); rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h new file mode 100644 index 0000000000000..cedf9b7b57962 --- /dev/null +++ b/net/openvswitch/drop.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * OpenvSwitch drop reason list. + */ + +#ifndef OPENVSWITCH_DROP_H +#define OPENVSWITCH_DROP_H +#include +#include + +#define OVS_DROP_REASONS(R) \ + R(OVS_DROP_LAST_ACTION) \ + R(OVS_DROP_ACTION_ERROR) \ + R(OVS_DROP_EXPLICIT) \ + R(OVS_DROP_EXPLICIT_WITH_ERROR) \ + R(OVS_DROP_METER) \ + R(OVS_DROP_RECURSION_LIMIT) \ + R(OVS_DROP_DEFERRED_LIMIT) \ + R(OVS_DROP_FRAG_L2_TOO_LONG) \ + R(OVS_DROP_FRAG_INVALID_PROTO) \ + R(OVS_DROP_CONNTRACK) \ + R(OVS_DROP_IP_TTL) \ + /* deliberate comment for trailing \ */ + +enum ovs_drop_reason { + __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH << + SKB_DROP_REASON_SUBSYS_SHIFT, +#define ENUM(x) x, + OVS_DROP_REASONS(ENUM) +#undef ENUM + + OVS_DROP_MAX, +}; + +static inline void +ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason) +{ + kfree_skb_reason(skb, (u32)reason); +} + +#endif /* OPENVSWITCH_DROP_H */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 41116361433da..88965e2068ac6 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -38,6 +38,7 @@ #include #include +#include "drop.h" #include "flow_netlink.h" struct ovs_len_tbl { @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_DROP: break; case OVS_ACTION_ATTR_CT: @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); if (!actions) return; @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, + [OVS_ACTION_ATTR_DROP] = sizeof(u32), }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3453,6 +3456,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_DROP: + if (!nla_is_last(a, rem)) + return -EINVAL; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index dced4f612a78c..9c2012d70b08e 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -16,7 +16,8 @@ tests=" connect_v4 ip4-xon: Basic ipv4 ping between two NS nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT netlink_checks ovsnl: validate netlink attrs and settings - upcall_interfaces ovs: test the upcall interfaces" + upcall_interfaces ovs: test the upcall interfaces + drop_reason drop: test drop reasons are emitted" info() { [ $VERBOSE = 0 ] || echo $* @@ -141,6 +142,25 @@ ovs_add_flow () { return 0 } +ovs_drop_record_and_run () { + local sbx=$1 + shift + + perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \ + >> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr + return $? +} + +ovs_drop_reason_count() +{ + local reason=$1 + + local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace` + local pattern="skb:kfree_skb:.*reason: $reason" + + return `echo "$perf_output" | grep "$pattern" | wc -l` +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -155,6 +175,76 @@ usage() { exit 1 } +# drop_reason test +# - drop packets and verify the right drop reason is reported +test_drop_reason() { + which perf >/dev/null 2>&1 || return $ksft_skip + + sbx_add "test_drop_reason" || return $? + + ovs_add_dp "test_drop_reason" dropreason || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + # Setup client namespace + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + + # Setup server namespace + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Allow ARP + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Allow client ICMP traffic but drop return path + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2' + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' + + ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 + ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + if [[ "$?" -ne "2" ]]; then + info "Did not detect expected drops: $?" + return 1 + fi + + # Drop UDP 6000 traffic with an explicit action and an error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \ + 'drop(42)' + # Drop UDP 7000 traffic with an explicit action with no error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \ + 'drop(0)' + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 + ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit error drops: $?" + return 1 + fi + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 + ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit drops: $?" + return 1 + fi + + return 0 +} + # arp_ping test # - client has 1500 byte MTU # - server has 1500 byte MTU @@ -393,6 +483,16 @@ test_netlink_checks () { wc -l) == 2 ] || \ return 1 + ERR_MSG="Flow actions may not be safe on all matching packets" + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ + &> /dev/null && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - error not generated" + return 1 + fi return 0 } diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index fbdac15e31344..912dc8c490858 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -301,6 +301,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), + ("OVS_ACTION_ATTR_DROP", "uint32"), ) class ctact(nla): @@ -447,6 +448,8 @@ def dpstr(self, more=False): print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) elif field[0] == "OVS_ACTION_ATTR_TRUNC": print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) elif field[1] == "flag": if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": print_str += "ct_clear" @@ -468,10 +471,21 @@ def parse(self, actstr): while len(actstr) != 0: parsed = False if actstr.startswith("drop"): - # for now, drops have no explicit action, so we - # don't need to set any attributes. The final - # act of the processing chain will just drop the packet - return + # If no reason is provided, the implicit drop is used (i.e no + # action). If some reason is given, an explicit action is used. + actstr, reason = parse_extract_field( + actstr, + "drop(", + "([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + if reason is not None: + self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) + parsed = True + else: + return elif parse_starts_block(actstr, "^(\d+)", False, True): actstr, output = parse_extract_field(