From b6520fce073b619e6f2c0d510bb3481c9386c70b Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 26 Sep 2019 12:06:45 +0200 Subject: [PATCH 01/18] netfilter: ipset: Add wildcard support to net,iface The net,iface equal functions currently compares the full interface names. In several cases, wildcard (or prefix) matching is useful. For example, when converting a large iptables rule-set to make use of ipset, I was able to significantly reduce the number of set elements by making use of wildcard matching. Wildcard matching is enabled by adding "wildcard" when adding an element to a set. Internally, this causes the IPSET_FLAG_IFACE_WILDCARD-flag to be set. When this flag is set, only the initial part of the interface name is used for comparison. Wildcard matching is done per element and not per set, as there are many cases where mixing wildcard and non-wildcard elements are useful. This means that is up to the user to handle (avoid) overlapping interface names. Signed-off-by: Kristian Evensen Signed-off-by: Jozsef Kadlecsik --- include/uapi/linux/netfilter/ipset/ip_set.h | 2 ++ net/netfilter/ipset/ip_set_hash_netiface.c | 23 ++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index eea166c52c365..11a72a938eb1f 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -205,6 +205,8 @@ enum ipset_cadt_flags { IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD), IPSET_FLAG_BIT_WITH_SKBINFO = 6, IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO), + IPSET_FLAG_BIT_IFACE_WILDCARD = 7, + IPSET_FLAG_IFACE_WILDCARD = (1 << IPSET_FLAG_BIT_IFACE_WILDCARD), IPSET_FLAG_CADT_MAX = 15, }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 1a04e09297383..be5e95a0d8762 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -25,7 +25,8 @@ /* 3 Counters support added */ /* 4 Comments support added */ /* 5 Forceadd support added */ -#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */ +/* 6 skbinfo support added */ +#define IPSET_TYPE_REV_MAX 7 /* interface wildcard support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -57,6 +58,7 @@ struct hash_netiface4_elem { u8 cidr; u8 nomatch; u8 elem; + u8 wildcard; char iface[IFNAMSIZ]; }; @@ -71,7 +73,9 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && - strcmp(ip1->iface, ip2->iface) == 0; + (ip1->wildcard ? + strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : + strcmp(ip1->iface, ip2->iface) == 0); } static int @@ -103,7 +107,8 @@ static bool hash_netiface4_data_list(struct sk_buff *skb, const struct hash_netiface4_elem *data) { - u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | + (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; @@ -229,6 +234,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); + if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) + e.wildcard = 1; } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); @@ -280,6 +287,7 @@ struct hash_netiface6_elem { u8 cidr; u8 nomatch; u8 elem; + u8 wildcard; char iface[IFNAMSIZ]; }; @@ -294,7 +302,9 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, ip1->cidr == ip2->cidr && (++*multi) && ip1->physdev == ip2->physdev && - strcmp(ip1->iface, ip2->iface) == 0; + (ip1->wildcard ? + strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 : + strcmp(ip1->iface, ip2->iface) == 0); } static int @@ -326,7 +336,8 @@ static bool hash_netiface6_data_list(struct sk_buff *skb, const struct hash_netiface6_elem *data) { - u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) | + (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0); if (data->nomatch) flags |= IPSET_FLAG_NOMATCH; @@ -440,6 +451,8 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], e.physdev = 1; if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); + if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD) + e.wildcard = 1; } ret = adtfn(set, &e, &ext, &ext, flags); From 25da5eb32cd51383f6dca7aad252376f1979c075 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Oct 2019 16:02:50 +0100 Subject: [PATCH 02/18] netfilter: nft_meta: offload support for interface index This patch adds support for offloading the NFT_META_IIF selector. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_offload.h | 1 + net/netfilter/nft_meta.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 03cf5856d76f2..ea7d1d78b92d2 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -45,6 +45,7 @@ struct nft_flow_key { struct flow_dissector_key_ip ip; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_eth_addrs eth_addrs; + struct flow_dissector_key_meta meta; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct nft_flow_match { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 317e3a9e8c5be..8fd21f4363473 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -547,6 +547,10 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, sizeof(__u8), reg); nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT); break; + case NFT_META_IIF: + NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta, + ingress_ifindex, sizeof(__u32), reg); + break; default: return -EOPNOTSUPP; } From f41f72d09ee1e9a980a1675be31120f547f2a648 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 31 Oct 2019 15:51:21 +0100 Subject: [PATCH 03/18] netfilter: nft_payload: simplify vlan header handling If the offset is within the ethernet + vlan header size boundary, then rebuild the ethernet + vlan header and use it to copy the bytes to the register. Otherwise, subtract the vlan header size from the offset and fall back to use skb_copy_bits(). There is one corner case though: If the offset plus the length of the payload instruction goes over the ethernet + vlan header boundary, then, fetch as many bytes as possible from the rebuilt ethernet + vlan header and fall back to copy the remaining bytes through skb_copy_bits(). Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- net/netfilter/nft_payload.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 5cb2d8908d2a5..2477998011653 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -28,17 +28,22 @@ static bool nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) { int mac_off = skb_mac_header(skb) - skb->data; - u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d; + u8 *vlanh, *dst_u8 = (u8 *) d; struct vlan_ethhdr veth; vlanh = (u8 *) &veth; - if (offset < ETH_HLEN) { - u8 ethlen = min_t(u8, len, ETH_HLEN - offset); + if (offset < VLAN_ETH_HLEN) { + u8 ethlen = len; if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN)) return false; veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + if (offset + len > VLAN_ETH_HLEN) + ethlen -= offset + len - VLAN_ETH_HLEN; memcpy(dst_u8, vlanh + offset, ethlen); @@ -48,25 +53,10 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) dst_u8 += ethlen; offset = ETH_HLEN; - } else if (offset >= VLAN_ETH_HLEN) { + } else { offset -= VLAN_HLEN; - goto skip; } - veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); - veth.h_vlan_encapsulated_proto = skb->protocol; - - vlanh += offset; - - vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset); - memcpy(dst_u8, vlanh, vlan_len); - - len -= vlan_len; - if (!len) - return true; - - dst_u8 += vlan_len; - skip: return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; } From 8dfd8b09aa347ec96db3b355ad5c82fc6c837bfa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 31 Oct 2019 15:51:22 +0100 Subject: [PATCH 04/18] netfilter: nf_tables: add nft_payload_rebuild_vlan_hdr() Wrap the code to rebuild the ethernet + vlan header into a function. Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- net/netfilter/nft_payload.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 2477998011653..3db9c802ea623 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -23,6 +23,19 @@ #include #include +static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off, + struct vlan_ethhdr *veth) +{ + if (skb_copy_bits(skb, mac_off, veth, ETH_HLEN)) + return false; + + veth->h_vlan_proto = skb->vlan_proto; + veth->h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth->h_vlan_encapsulated_proto = skb->protocol; + + return true; +} + /* add vlan header into the user buffer for if tag was removed by offloads */ static bool nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) @@ -35,13 +48,9 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) if (offset < VLAN_ETH_HLEN) { u8 ethlen = len; - if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN)) + if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) return false; - veth.h_vlan_proto = skb->vlan_proto; - veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); - veth.h_vlan_encapsulated_proto = skb->protocol; - if (offset + len > VLAN_ETH_HLEN) ethlen -= offset + len - VLAN_ETH_HLEN; From be193f5e21d0ec674badef9fde8eca71fb2d8546 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 2 Nov 2019 15:32:39 +0100 Subject: [PATCH 05/18] netfilter: nf_tables_offload: pass extack to nft_flow_cls_offload_setup() Otherwise this leads to a stack corruption. Fixes: c5d275276ff4 ("netfilter: nf_tables_offload: add nft_flow_cls_offload_setup()") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index cdea3010c7a07..741045eb530ee 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -159,9 +159,9 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow, const struct nft_base_chain *basechain, const struct nft_rule *rule, const struct nft_flow_rule *flow, + struct netlink_ext_ack *extack, enum flow_cls_command command) { - struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; memset(cls_flow, 0, sizeof(*cls_flow)); @@ -170,7 +170,7 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow, proto = flow->proto; nft_flow_offload_common_init(&cls_flow->common, proto, - basechain->ops.priority, &extack); + basechain->ops.priority, extack); cls_flow->command = command; cls_flow->cookie = (unsigned long) rule; if (flow) @@ -182,6 +182,7 @@ static int nft_flow_offload_rule(struct nft_chain *chain, struct nft_flow_rule *flow, enum flow_cls_command command) { + struct netlink_ext_ack extack = {}; struct flow_cls_offload cls_flow; struct nft_base_chain *basechain; @@ -189,7 +190,8 @@ static int nft_flow_offload_rule(struct nft_chain *chain, return -EOPNOTSUPP; basechain = nft_base_chain(chain); - nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, command); + nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack, + command); return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &basechain->flow_block.cb_list); @@ -207,13 +209,15 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo, { struct flow_block_cb *block_cb, *next; struct flow_cls_offload cls_flow; + struct netlink_ext_ack extack; struct nft_chain *chain; struct nft_rule *rule; chain = &basechain->chain; list_for_each_entry(rule, &chain->rules, list) { + memset(&extack, 0, sizeof(extack)); nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL, - FLOW_CLS_DESTROY); + &extack, FLOW_CLS_DESTROY); nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list); } From f6ae9f120dada00abfb47313364c35118469455f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:41:34 +0100 Subject: [PATCH 06/18] netfilter: nft_payload: add C-VLAN support If the encapsulated ethertype announces another inner VLAN header and the offset falls within the boundaries of the inner VLAN header, then adjust arithmetics to include the extra VLAN header length and fetch the bytes from the vlan header in the skbuff data area that represents this inner VLAN header. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 3db9c802ea623..0877d46b8605f 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -43,27 +43,36 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) int mac_off = skb_mac_header(skb) - skb->data; u8 *vlanh, *dst_u8 = (u8 *) d; struct vlan_ethhdr veth; + u8 vlan_hlen = 0; + + if ((skb->protocol == htons(ETH_P_8021AD) || + skb->protocol == htons(ETH_P_8021Q)) && + offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN) + vlan_hlen += VLAN_HLEN; vlanh = (u8 *) &veth; - if (offset < VLAN_ETH_HLEN) { + if (offset < VLAN_ETH_HLEN + vlan_hlen) { u8 ethlen = len; - if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) + if (vlan_hlen && + skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0) + return false; + else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) return false; - if (offset + len > VLAN_ETH_HLEN) - ethlen -= offset + len - VLAN_ETH_HLEN; + if (offset + len > VLAN_ETH_HLEN + vlan_hlen) + ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; - memcpy(dst_u8, vlanh + offset, ethlen); + memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); len -= ethlen; if (len == 0) return true; dst_u8 += ethlen; - offset = ETH_HLEN; + offset = ETH_HLEN + vlan_hlen; } else { - offset -= VLAN_HLEN; + offset -= VLAN_HLEN + vlan_hlen; } return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; From fcbad8293d52864d87d0b9f6035fd87a049d59d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 21:34:28 +0100 Subject: [PATCH 07/18] netfilter: xt_time: use time64_t The current xt_time driver suffers from the y2038 overflow on 32-bit architectures, when the time of day calculations break. Also, on both 32-bit and 64-bit architectures, there is a problem with info->date_start/stop, which is part of the user ABI and overflows in in 2106. Fix the first issue by using time64_t and explicit calls to div_u64() and div_u64_rem(), and document the seconds issue. The explicit 64-bit division is unfortunately slower on 32-bit architectures, but doing it as unsigned lets us use the optimized division-through-multiplication path in most configurations. This should be fine, as the code already does not allow any negative time of day values. Using u32 seconds values consistently would probably also work and be a little more efficient, but that doesn't feel right as it would propagate the y2106 overflow to more place rather than fewer. Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_time.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 8dbb4d48f2ed5..67cb984894153 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -77,12 +77,12 @@ static inline bool is_leap(unsigned int y) * This is done in three separate functions so that the most expensive * calculations are done last, in case a "simple match" can be found earlier. */ -static inline unsigned int localtime_1(struct xtm *r, time_t time) +static inline unsigned int localtime_1(struct xtm *r, time64_t time) { unsigned int v, w; /* Each day has 86400s, so finding the hour/minute is actually easy. */ - v = time % SECONDS_PER_DAY; + div_u64_rem(time, SECONDS_PER_DAY, &v); r->second = v % 60; w = v / 60; r->minute = w % 60; @@ -90,13 +90,13 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time) return v; } -static inline void localtime_2(struct xtm *r, time_t time) +static inline void localtime_2(struct xtm *r, time64_t time) { /* * Here comes the rest (weekday, monthday). First, divide the SSTE * by seconds-per-day to get the number of _days_ since the epoch. */ - r->dse = time / 86400; + r->dse = div_u64(time, SECONDS_PER_DAY); /* * 1970-01-01 (w=0) was a Thursday (4). @@ -105,7 +105,7 @@ static inline void localtime_2(struct xtm *r, time_t time) r->weekday = (4 + r->dse - 1) % 7 + 1; } -static void localtime_3(struct xtm *r, time_t time) +static void localtime_3(struct xtm *r, time64_t time) { unsigned int year, i, w = r->dse; @@ -160,7 +160,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_time_info *info = par->matchinfo; unsigned int packet_time; struct xtm current_time; - s64 stamp; + time64_t stamp; /* * We need real time here, but we can neither use skb->tstamp @@ -173,14 +173,14 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) * 1. match before 13:00 * 2. match after 13:00 * - * If you match against processing time (get_seconds) it + * If you match against processing time (ktime_get_real_seconds) it * may happen that the same packet matches both rules if * it arrived at the right moment before 13:00, so it would be * better to check skb->tstamp and set it via __net_timestamp() * if needed. This however breaks outgoing packets tx timestamp, * and causes them to get delayed forever by fq packet scheduler. */ - stamp = get_seconds(); + stamp = ktime_get_real_seconds(); if (info->flags & XT_TIME_LOCAL_TZ) /* Adjust for local timezone */ @@ -193,6 +193,9 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par) * - 'now' is in the weekday mask * - 'now' is in the daytime range time_start..time_end * (and by default, libxt_time will set these so as to match) + * + * note: info->date_start/stop are unsigned 32-bit values that + * can hold values beyond y2038, but not after y2106. */ if (stamp < info->date_start || stamp > info->date_stop) From 6408c40c39d8eee5caaf97f5219b7dd4e041cc59 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Nov 2019 22:32:47 +0100 Subject: [PATCH 08/18] netfilter: nft_meta: use 64-bit time arithmetic On 32-bit architectures, get_seconds() returns an unsigned 32-bit time value, which also matches the type used in the nft_meta code. This will not overflow in year 2038 as a time_t would, but it still suffers from the overflow problem later on in year 2106. Change this instance to use the time64_t type consistently and avoid the deprecated get_seconds(). The nft_meta_weekday() calculation potentially gets a little slower on 32-bit architectures, but now it has the same behavior as on 64-bit architectures and does not overflow. Fixes: 63d10e12b00d ("netfilter: nft_meta: support for time matching") Signed-off-by: Arnd Bergmann Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8fd21f4363473..8fbea031bd4a7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -33,19 +33,19 @@ static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); -static u8 nft_meta_weekday(unsigned long secs) +static u8 nft_meta_weekday(time64_t secs) { unsigned int dse; u8 wday; secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest; - dse = secs / NFT_META_SECS_PER_DAY; + dse = div_u64(secs, NFT_META_SECS_PER_DAY); wday = (4 + dse) % NFT_META_DAYS_PER_WEEK; return wday; } -static u32 nft_meta_hour(unsigned long secs) +static u32 nft_meta_hour(time64_t secs) { struct tm tm; @@ -250,10 +250,10 @@ void nft_meta_get_eval(const struct nft_expr *expr, nft_reg_store64(dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: - nft_reg_store8(dest, nft_meta_weekday(get_seconds())); + nft_reg_store8(dest, nft_meta_weekday(ktime_get_real_seconds())); break; case NFT_META_TIME_HOUR: - *dest = nft_meta_hour(get_seconds()); + *dest = nft_meta_hour(ktime_get_real_seconds()); break; default: WARN_ON(1); From 4a766d490d205fbb07712527d0b6956ecbdec5d4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Nov 2019 14:08:00 +0100 Subject: [PATCH 09/18] netfilter: nf_flow_table_offload: add flow_action_entry_next() and use it This function retrieves a spare action entry from the array of actions. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 76 +++++++++++++-------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 9be61f47303a9..b9f669c80713a 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -112,13 +112,22 @@ static void flow_offload_mangle(struct flow_action_entry *entry, memcpy(&entry->mangle.val, value, sizeof(u32)); } +static inline struct flow_action_entry * +flow_action_entry_next(struct nf_flow_rule *flow_rule) +{ + int i = flow_rule->rule->action.num_entries++; + + return &flow_rule->rule->action.entries[i]; +} + static int flow_offload_eth_src(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry0, - struct flow_action_entry *entry1) + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple; + struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); + struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); struct net_device *dev; u32 mask, val; u16 val16; @@ -145,10 +154,11 @@ static int flow_offload_eth_src(struct net *net, static int flow_offload_eth_dst(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry0, - struct flow_action_entry *entry1) + struct nf_flow_rule *flow_rule) { const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple; + struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule); + struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule); struct neighbour *n; u32 mask, val; u16 val16; @@ -175,8 +185,9 @@ static int flow_offload_eth_dst(struct net *net, static void flow_offload_ipv4_snat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry) + struct nf_flow_rule *flow_rule) { + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; @@ -201,8 +212,9 @@ static void flow_offload_ipv4_snat(struct net *net, static void flow_offload_ipv4_dnat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry) + struct nf_flow_rule *flow_rule) { + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffffffff); __be32 addr; u32 offset; @@ -246,8 +258,9 @@ static int flow_offload_l4proto(const struct flow_offload *flow) static void flow_offload_port_snat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry) + struct nf_flow_rule *flow_rule) { + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffff0000); __be16 port; u32 offset; @@ -272,8 +285,9 @@ static void flow_offload_port_snat(struct net *net, static void flow_offload_port_dnat(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry) + struct nf_flow_rule *flow_rule) { + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); u32 mask = ~htonl(0xffff); __be16 port; u32 offset; @@ -297,9 +311,10 @@ static void flow_offload_port_dnat(struct net *net, static void flow_offload_ipv4_checksum(struct net *net, const struct flow_offload *flow, - struct flow_action_entry *entry) + struct nf_flow_rule *flow_rule) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); entry->id = FLOW_ACTION_CSUM; entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR; @@ -316,8 +331,9 @@ static void flow_offload_ipv4_checksum(struct net *net, static void flow_offload_redirect(const struct flow_offload *flow, enum flow_offload_tuple_dir dir, - struct flow_action_entry *entry) + struct nf_flow_rule *flow_rule) { + struct flow_action_entry *entry = flow_action_entry_next(flow_rule); struct rtable *rt; rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; @@ -330,39 +346,25 @@ int nf_flow_rule_route(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { - int i; - - if (flow_offload_eth_src(net, flow, dir, - &flow_rule->rule->action.entries[0], - &flow_rule->rule->action.entries[1]) < 0) + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || + flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; - if (flow_offload_eth_dst(net, flow, dir, - &flow_rule->rule->action.entries[2], - &flow_rule->rule->action.entries[3]) < 0) - return -1; - - i = 4; if (flow->flags & FLOW_OFFLOAD_SNAT) { - flow_offload_ipv4_snat(net, flow, dir, - &flow_rule->rule->action.entries[i++]); - flow_offload_port_snat(net, flow, dir, - &flow_rule->rule->action.entries[i++]); + flow_offload_ipv4_snat(net, flow, dir, flow_rule); + flow_offload_port_snat(net, flow, dir, flow_rule); } if (flow->flags & FLOW_OFFLOAD_DNAT) { - flow_offload_ipv4_dnat(net, flow, dir, - &flow_rule->rule->action.entries[i++]); - flow_offload_port_dnat(net, flow, dir, - &flow_rule->rule->action.entries[i++]); + flow_offload_ipv4_dnat(net, flow, dir, flow_rule); + flow_offload_port_dnat(net, flow, dir, flow_rule); } if (flow->flags & FLOW_OFFLOAD_SNAT || flow->flags & FLOW_OFFLOAD_DNAT) - flow_offload_ipv4_checksum(net, flow, - &flow_rule->rule->action.entries[i++]); + flow_offload_ipv4_checksum(net, flow, flow_rule); - flow_offload_redirect(flow, dir, &flow_rule->rule->action.entries[i++]); + flow_offload_redirect(flow, dir, flow_rule); - return i; + return 0; } EXPORT_SYMBOL_GPL(nf_flow_rule_route); @@ -375,7 +377,7 @@ nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload *flow = offload->flow; const struct flow_offload_tuple *tuple; struct nf_flow_rule *flow_rule; - int err = -ENOMEM, num_actions; + int err = -ENOMEM; flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); if (!flow_rule) @@ -394,12 +396,10 @@ nf_flow_offload_rule_alloc(struct net *net, if (err < 0) goto err_flow_match; - num_actions = flowtable->type->action(net, flow, dir, flow_rule); - if (num_actions < 0) + flow_rule->rule->action.num_entries = 0; + if (flowtable->type->action(net, flow, dir, flow_rule) < 0) goto err_flow_match; - flow_rule->rule->action.num_entries = num_actions; - return flow_rule; err_flow_match: From 5c27d8d76ce810c6254cf5917a6019d824f34bd2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Nov 2019 14:08:01 +0100 Subject: [PATCH 10/18] netfilter: nf_flow_table_offload: add IPv6 support Add nf_flow_rule_route_ipv6() and use it from the IPv6 and the inet flowtable type definitions. Rename the nf_flow_rule_route() function to nf_flow_rule_route_ipv4(). Adjust maximum number of actions, which now becomes 16 to leave sufficient room for the IPv6 address mangling for NAT. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 9 ++- net/ipv4/netfilter/nf_flow_table_ipv4.c | 2 +- net/ipv6/netfilter/nf_flow_table_ipv6.c | 2 +- net/netfilter/nf_flow_table_inet.c | 25 +++++- net/netfilter/nf_flow_table_offload.c | 100 ++++++++++++++++++++++-- 5 files changed, 127 insertions(+), 11 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index eea66de328d3e..f0897b3c97fb8 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -163,9 +163,12 @@ void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct net_device *dev, enum flow_block_command cmd); -int nf_flow_rule_route(struct net *net, const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule); +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule); int nf_flow_table_offload_init(void); void nf_flow_table_offload_exit(void); diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index 168b72e18be05..e32e41b99f0f0 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -10,7 +10,7 @@ static struct nf_flowtable_type flowtable_ipv4 = { .family = NFPROTO_IPV4, .init = nf_flow_table_init, .setup = nf_flow_table_offload_setup, - .action = nf_flow_rule_route, + .action = nf_flow_rule_route_ipv4, .free = nf_flow_table_free, .hook = nf_flow_offload_ip_hook, .owner = THIS_MODULE, diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index f069bc0dc0563..a8566ee12e831 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -11,7 +11,7 @@ static struct nf_flowtable_type flowtable_ipv6 = { .family = NFPROTO_IPV6, .init = nf_flow_table_init, .setup = nf_flow_table_offload_setup, - .action = nf_flow_rule_route, + .action = nf_flow_rule_route_ipv6, .free = nf_flow_table_free, .hook = nf_flow_offload_ipv6_hook, .owner = THIS_MODULE, diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index bfb910b874ce1..88bedf1ff1ae1 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -21,11 +21,34 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; } +static int nf_flow_rule_route_inet(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; + int err; + + switch (flow_tuple->l3proto) { + case NFPROTO_IPV4: + err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule); + break; + case NFPROTO_IPV6: + err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule); + break; + default: + err = -1; + break; + } + + return err; +} + static struct nf_flowtable_type flowtable_inet = { .family = NFPROTO_INET, .init = nf_flow_table_init, .setup = nf_flow_table_offload_setup, - .action = nf_flow_rule_route, + .action = nf_flow_rule_route_inet, .free = nf_flow_table_free, .hook = nf_flow_offload_inet_hook, .owner = THIS_MODULE, diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b9f669c80713a..a14932748bcf0 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -236,6 +236,71 @@ static void flow_offload_ipv4_dnat(struct net *net, (u8 *)&addr, (u8 *)&mask); } +static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, + unsigned int offset, + u8 *addr, u8 *mask) +{ + struct flow_action_entry *entry; + int i; + + for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { + entry = flow_action_entry_next(flow_rule); + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, + offset + i, + &addr[i], mask); + } +} + +static void flow_offload_ipv6_snat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + u32 mask = ~htonl(0xffffffff); + const u8 *addr; + u32 offset; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr; + offset = offsetof(struct ipv6hdr, saddr); + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr; + offset = offsetof(struct ipv6hdr, daddr); + break; + default: + return; + } + + flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask); +} + +static void flow_offload_ipv6_dnat(struct net *net, + const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + u32 mask = ~htonl(0xffffffff); + const u8 *addr; + u32 offset; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr; + offset = offsetof(struct ipv6hdr, daddr); + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr; + offset = offsetof(struct ipv6hdr, saddr); + break; + default: + return; + } + + flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask); +} + static int flow_offload_l4proto(const struct flow_offload *flow) { u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto; @@ -342,9 +407,9 @@ static void flow_offload_redirect(const struct flow_offload *flow, dev_hold(rt->dst.dev); } -int nf_flow_rule_route(struct net *net, const struct flow_offload *flow, - enum flow_offload_tuple_dir dir, - struct nf_flow_rule *flow_rule) +int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) { if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) @@ -366,7 +431,32 @@ int nf_flow_rule_route(struct net *net, const struct flow_offload *flow, return 0; } -EXPORT_SYMBOL_GPL(nf_flow_rule_route); +EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4); + +int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || + flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) + return -1; + + if (flow->flags & FLOW_OFFLOAD_SNAT) { + flow_offload_ipv6_snat(net, flow, dir, flow_rule); + flow_offload_port_snat(net, flow, dir, flow_rule); + } + if (flow->flags & FLOW_OFFLOAD_DNAT) { + flow_offload_ipv6_dnat(net, flow, dir, flow_rule); + flow_offload_port_dnat(net, flow, dir, flow_rule); + } + + flow_offload_redirect(flow, dir, flow_rule); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6); + +#define NF_FLOW_RULE_ACTION_MAX 16 static struct nf_flow_rule * nf_flow_offload_rule_alloc(struct net *net, @@ -383,7 +473,7 @@ nf_flow_offload_rule_alloc(struct net *net, if (!flow_rule) goto err_flow; - flow_rule->rule = flow_rule_alloc(10); + flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX); if (!flow_rule->rule) goto err_flow_rule; From 28f8bfd1ac948403ebd5c8070ae1e25421560059 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 12 Nov 2019 17:14:37 +0100 Subject: [PATCH 11/18] netfilter: Support iif matches in POSTROUTING Instead of generally passing NULL to NF_HOOK_COND() for input device, pass skb->dev which contains input device for routed skbs. Note that iptables (both legacy and nft) reject rules with input interface match from being added to POSTROUTING chains, but nftables allows this. Cc: Eric Garver Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_output.c | 4 ++-- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/xfrm6_output.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3d8baaaf7086d..9d83cb320dcb7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -422,7 +422,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); @@ -430,7 +430,7 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, dev, + net, sk, skb, indev, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index ecff3fce98073..89ba7c87de5df 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -92,7 +92,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb_dst(skb)->dev, + net, sk, skb, skb->dev, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 71827b56c0063..945508a7cb0f1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -160,7 +160,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); skb->protocol = htons(ETH_P_IPV6); @@ -173,7 +173,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, NULL, dev, + net, sk, skb, indev, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index eecac1b7148e5..fbe51d40bd7e9 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -187,7 +187,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb_dst(skb)->dev, + net, sk, skb, skb->dev, skb_dst(skb)->dev, __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } From ea13ca305177bd02de62087228a9f1e6793ccf2b Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 13 Nov 2019 12:46:39 +0800 Subject: [PATCH 12/18] netfilter: nf_flow_table_offload: Fix check ndo_setup_tc when setup_block It should check the ndo_setup_tc in the nf_flow_table_offload_setup. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index a14932748bcf0..c54c9a6cc981b 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -812,6 +812,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)) return 0; + if (!dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + bo.net = dev_net(dev); bo.block = &flowtable->flow_block; bo.command = cmd; From 458a1828e9f788d4c1da325069fed2c2eaa000fa Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 13 Nov 2019 12:46:40 +0800 Subject: [PATCH 13/18] netfilter: nf_flow_table: remove unnecessary parameter in flow_offload_fill_dir The ct object is already in the flow_offload structure, remove it. Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 8468d2d02284f..9889d52eda820 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -18,11 +18,11 @@ static DEFINE_MUTEX(flowtable_lock); static LIST_HEAD(flowtables); static void -flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, +flow_offload_fill_dir(struct flow_offload *flow, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; - struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; + struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; ft->dir = dir; @@ -57,8 +57,8 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct) flow->ct = ct; - flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_ORIGINAL); - flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_REPLY); + flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); + flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); if (ct->status & IPS_SRC_NAT) flow->flags |= FLOW_OFFLOAD_SNAT; From 6ca61c7a8bac2768359f67003ac696260fd0985e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Nov 2019 14:17:19 +0100 Subject: [PATCH 14/18] netfilter: nf_tables_offload: remove reference to flow rule from deletion path The cookie is sufficient to delete the rule from the hardware. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 741045eb530ee..528886bb34813 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -437,8 +437,7 @@ int nft_flow_rule_offload_commit(struct net *net) err = nft_flow_offload_rule(trans->ctx.chain, nft_trans_rule(trans), - nft_trans_flow_rule(trans), - FLOW_CLS_DESTROY); + NULL, FLOW_CLS_DESTROY); break; } From 23403cd8898dbc9808d3eb2f63bc1db8a340b751 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Nov 2019 14:17:24 +0100 Subject: [PATCH 15/18] netfilter: nf_tables_offload: release flow_rule on error from commit path If hardware offload commit path fails, release all flow_rule objects. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 528886bb34813..6d5f3cd7f1b74 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -422,14 +422,14 @@ int nft_flow_rule_offload_commit(struct net *net) continue; if (trans->ctx.flags & NLM_F_REPLACE || - !(trans->ctx.flags & NLM_F_APPEND)) - return -EOPNOTSUPP; - + !(trans->ctx.flags & NLM_F_APPEND)) { + err = -EOPNOTSUPP; + break; + } err = nft_flow_offload_rule(trans->ctx.chain, nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); - nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) @@ -442,7 +442,23 @@ int nft_flow_rule_offload_commit(struct net *net) } if (err) - return err; + break; + } + + list_for_each_entry(trans, &net->nft.commit_list, list) { + if (trans->ctx.family != NFPROTO_NETDEV) + continue; + + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + continue; + + nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + break; + default: + break; + } } return err; From 63b48c73ff567bbab1f940d6e8f3f48607077a13 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Nov 2019 14:17:28 +0100 Subject: [PATCH 16/18] netfilter: nf_tables_offload: undo updates if transaction fails The nft_flow_rule_offload_commit() function might fail after several successful commands, thus, leaving the hardware filtering policy in inconsistent state. This patch adds nft_flow_rule_offload_abort() function which undoes the updates that have been already processed if one command in this transaction fails. Hence, the hardware ruleset is left as it was before this aborted transaction. The deletion path needs to create the flow_rule object too, in case that an existing rule needs to be re-added from the abort path. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 11 +++++++ net/netfilter/nf_tables_offload.c | 54 ++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2dc636faa322c..4f0d880a84965 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -361,6 +361,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) { + struct nft_flow_rule *flow; struct nft_trans *trans; int err; @@ -368,6 +369,16 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) if (trans == NULL) return -ENOMEM; + if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) { + flow = nft_flow_rule_create(ctx->net, rule); + if (IS_ERR(flow)) { + nft_trans_destroy(trans); + return PTR_ERR(flow); + } + + nft_trans_flow_rule(trans) = flow; + } + err = nf_tables_delrule_deactivate(ctx, rule); if (err < 0) { nft_trans_destroy(trans); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 6d5f3cd7f1b74..68f17a6921d82 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -389,6 +389,55 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy, return nft_flow_block_chain(basechain, NULL, cmd); } +static void nft_flow_rule_offload_abort(struct net *net, + struct nft_trans *trans) +{ + int err = 0; + + list_for_each_entry_continue_reverse(trans, &net->nft.commit_list, list) { + if (trans->ctx.family != NFPROTO_NETDEV) + continue; + + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + nft_trans_chain_update(trans)) + continue; + + err = nft_flow_offload_chain(trans->ctx.chain, NULL, + FLOW_BLOCK_UNBIND); + break; + case NFT_MSG_DELCHAIN: + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + continue; + + err = nft_flow_offload_chain(trans->ctx.chain, NULL, + FLOW_BLOCK_BIND); + break; + case NFT_MSG_NEWRULE: + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + continue; + + err = nft_flow_offload_rule(trans->ctx.chain, + nft_trans_rule(trans), + NULL, FLOW_CLS_DESTROY); + break; + case NFT_MSG_DELRULE: + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + continue; + + err = nft_flow_offload_rule(trans->ctx.chain, + nft_trans_rule(trans), + nft_trans_flow_rule(trans), + FLOW_CLS_REPLACE); + break; + } + + if (WARN_ON_ONCE(err)) + break; + } +} + int nft_flow_rule_offload_commit(struct net *net) { struct nft_trans *trans; @@ -441,8 +490,10 @@ int nft_flow_rule_offload_commit(struct net *net) break; } - if (err) + if (err) { + nft_flow_rule_offload_abort(net, trans); break; + } } list_for_each_entry(trans, &net->nft.commit_list, list) { @@ -451,6 +502,7 @@ int nft_flow_rule_offload_commit(struct net *net) switch (trans->msg_type) { case NFT_MSG_NEWRULE: + case NFT_MSG_DELRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; From d7c03a9f5c2577b29a7699bbaa1c1cbcfb56afd3 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 15 Nov 2019 19:21:26 +0800 Subject: [PATCH 17/18] netfilter: nf_tables: check if bind callback fails and unbind if hook registration fails Undo the callback binding before unregistering the existing hooks. This should also check for error of the bind setup call. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4f0d880a84965..9340b976d85ca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6006,12 +6006,20 @@ static int nft_register_flowtable_net_hooks(struct net *net, } } - flowtable->data.type->setup(&flowtable->data, hook->ops.dev, - FLOW_BLOCK_BIND); - err = nf_register_net_hook(net, &hook->ops); + err = flowtable->data.type->setup(&flowtable->data, + hook->ops.dev, + FLOW_BLOCK_BIND); if (err < 0) goto err_unregister_net_hooks; + err = nf_register_net_hook(net, &hook->ops); + if (err < 0) { + flowtable->data.type->setup(&flowtable->data, + hook->ops.dev, + FLOW_BLOCK_UNBIND); + goto err_unregister_net_hooks; + } + i++; } From ff4bf2f42a40e7dff28379f085b64df322c70b45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 15 Nov 2019 11:36:35 +0100 Subject: [PATCH 18/18] netfilter: nf_tables: add nft_unregister_flowtable_hook() Unbind flowtable callback if hook is unregistered. This patch is implicitly fixing the error path of nf_tables_newflowtable() and nft_flowtable_event(). Fixes: 8bb69f3b2918 ("netfilter: nf_tables: add flowtable offload control plane") Reported-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9340b976d85ca..ff04cdc87f760 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5975,16 +5975,22 @@ nft_flowtable_type_get(struct net *net, u8 family) return ERR_PTR(-ENOENT); } +static void nft_unregister_flowtable_hook(struct net *net, + struct nft_flowtable *flowtable, + struct nft_hook *hook) +{ + nf_unregister_net_hook(net, &hook->ops); + flowtable->data.type->setup(&flowtable->data, hook->ops.dev, + FLOW_BLOCK_UNBIND); +} + static void nft_unregister_flowtable_net_hooks(struct net *net, struct nft_flowtable *flowtable) { struct nft_hook *hook; - list_for_each_entry(hook, &flowtable->hook_list, list) { - nf_unregister_net_hook(net, &hook->ops); - flowtable->data.type->setup(&flowtable->data, hook->ops.dev, - FLOW_BLOCK_UNBIND); - } + list_for_each_entry(hook, &flowtable->hook_list, list) + nft_unregister_flowtable_hook(net, flowtable, hook); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -6030,9 +6036,7 @@ static int nft_register_flowtable_net_hooks(struct net *net, if (i-- <= 0) break; - nf_unregister_net_hook(net, &hook->ops); - flowtable->data.type->setup(&flowtable->data, hook->ops.dev, - FLOW_BLOCK_UNBIND); + nft_unregister_flowtable_hook(net, flowtable, hook); list_del_rcu(&hook->list); kfree_rcu(hook, rcu); } @@ -6139,7 +6143,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, return 0; err5: list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nf_unregister_net_hook(net, &hook->ops); + nft_unregister_flowtable_hook(net, flowtable, hook); list_del_rcu(&hook->list); kfree_rcu(hook, rcu); } @@ -6484,7 +6488,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev, if (hook->ops.dev != dev) continue; - nf_unregister_net_hook(dev_net(dev), &hook->ops); + nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook); list_del_rcu(&hook->list); kfree_rcu(hook, rcu); break;