diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6a2019aaa4644..7bbab8f2b73d4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -362,6 +362,10 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net) return net_generic(net, nf_conntrack_net_id); } +int nf_ct_skb_network_trim(struct sk_buff *skb, int family); +int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb, + u16 zone, u8 family, u8 *proto, u16 *mru); + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f71b41c7ce2ff..4d67371608574 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -189,6 +189,9 @@ config NF_CONNTRACK_LABELS to connection tracking entries. It can be used with xtables connlabel match and the nftables ct expression. +config NF_CONNTRACK_OVS + bool + config NF_CT_PROTO_DCCP bool 'DCCP protocol connection tracking support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ba2a6b5e93d90..5ffef1cd61435 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -11,6 +11,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 48ea6d0264b5e..0c4db2f2ac43e 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -242,104 +242,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); -/* 'skb' should already be pulled to nh_ofs. */ -int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, u16 proto) -{ - const struct nf_conntrack_helper *helper; - const struct nf_conn_help *help; - unsigned int protoff; - int err; - - if (ctinfo == IP_CT_RELATED_REPLY) - return NF_ACCEPT; - - help = nfct_help(ct); - if (!help) - return NF_ACCEPT; - - helper = rcu_dereference(help->helper); - if (!helper) - return NF_ACCEPT; - - if (helper->tuple.src.l3num != NFPROTO_UNSPEC && - helper->tuple.src.l3num != proto) - return NF_ACCEPT; - - switch (proto) { - case NFPROTO_IPV4: - protoff = ip_hdrlen(skb); - proto = ip_hdr(skb)->protocol; - break; - case NFPROTO_IPV6: { - u8 nexthdr = ipv6_hdr(skb)->nexthdr; - __be16 frag_off; - int ofs; - - ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, - &frag_off); - if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { - pr_debug("proto header not found\n"); - return NF_ACCEPT; - } - protoff = ofs; - proto = nexthdr; - break; - } - default: - WARN_ONCE(1, "helper invoked on non-IP family!"); - return NF_DROP; - } - - if (helper->tuple.dst.protonum != proto) - return NF_ACCEPT; - - err = helper->help(skb, protoff, ct, ctinfo); - if (err != NF_ACCEPT) - return err; - - /* Adjust seqs after helper. This is needed due to some helpers (e.g., - * FTP with NAT) adusting the TCP payload size when mangling IP - * addresses and/or port numbers in the text-based control connection. - */ - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && - !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) - return NF_DROP; - return NF_ACCEPT; -} -EXPORT_SYMBOL_GPL(nf_ct_helper); - -int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family, - u8 proto, bool nat, struct nf_conntrack_helper **hp) -{ - struct nf_conntrack_helper *helper; - struct nf_conn_help *help; - int ret = 0; - - helper = nf_conntrack_helper_try_module_get(name, family, proto); - if (!helper) - return -EINVAL; - - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (!help) { - nf_conntrack_helper_put(helper); - return -ENOMEM; - } -#if IS_ENABLED(CONFIG_NF_NAT) - if (nat) { - ret = nf_nat_helper_try_module_get(name, family, proto); - if (ret) { - nf_conntrack_helper_put(helper); - return ret; - } - } -#endif - rcu_assign_pointer(help->helper, helper); - *hp = helper; - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_add_helper); - /* appropriate ct lock protecting must be taken by caller */ static int unhelp(struct nf_conn *ct, void *me) { diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c new file mode 100644 index 0000000000000..52b776bdf526d --- /dev/null +++ b/net/netfilter/nf_conntrack_ovs.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Support ct functions for openvswitch and used by OVS and TC conntrack. */ + +#include +#include +#include +#include +#include + +/* 'skb' should already be pulled to nh_ofs. */ +int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, u16 proto) +{ + const struct nf_conntrack_helper *helper; + const struct nf_conn_help *help; + unsigned int protoff; + int err; + + if (ctinfo == IP_CT_RELATED_REPLY) + return NF_ACCEPT; + + help = nfct_help(ct); + if (!help) + return NF_ACCEPT; + + helper = rcu_dereference(help->helper); + if (!helper) + return NF_ACCEPT; + + if (helper->tuple.src.l3num != NFPROTO_UNSPEC && + helper->tuple.src.l3num != proto) + return NF_ACCEPT; + + switch (proto) { + case NFPROTO_IPV4: + protoff = ip_hdrlen(skb); + proto = ip_hdr(skb)->protocol; + break; + case NFPROTO_IPV6: { + u8 nexthdr = ipv6_hdr(skb)->nexthdr; + __be16 frag_off; + int ofs; + + ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { + pr_debug("proto header not found\n"); + return NF_ACCEPT; + } + protoff = ofs; + proto = nexthdr; + break; + } + default: + WARN_ONCE(1, "helper invoked on non-IP family!"); + return NF_DROP; + } + + if (helper->tuple.dst.protonum != proto) + return NF_ACCEPT; + + err = helper->help(skb, protoff, ct, ctinfo); + if (err != NF_ACCEPT) + return err; + + /* Adjust seqs after helper. This is needed due to some helpers (e.g., + * FTP with NAT) adusting the TCP payload size when mangling IP + * addresses and/or port numbers in the text-based control connection. + */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) + return NF_DROP; + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nf_ct_helper); + +int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family, + u8 proto, bool nat, struct nf_conntrack_helper **hp) +{ + struct nf_conntrack_helper *helper; + struct nf_conn_help *help; + int ret = 0; + + helper = nf_conntrack_helper_try_module_get(name, family, proto); + if (!helper) + return -EINVAL; + + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (!help) { + nf_conntrack_helper_put(helper); + return -ENOMEM; + } +#if IS_ENABLED(CONFIG_NF_NAT) + if (nat) { + ret = nf_nat_helper_try_module_get(name, family, proto); + if (ret) { + nf_conntrack_helper_put(helper); + return ret; + } + } +#endif + rcu_assign_pointer(help->helper, helper); + *hp = helper; + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_add_helper); + +/* Trim the skb to the length specified by the IP/IPv6 header, + * removing any trailing lower-layer padding. This prepares the skb + * for higher-layer processing that assumes skb->len excludes padding + * (such as nf_ip_checksum). The caller needs to pull the skb to the + * network header, and ensure ip_hdr/ipv6_hdr points to valid data. + */ +int nf_ct_skb_network_trim(struct sk_buff *skb, int family) +{ + unsigned int len; + + switch (family) { + case NFPROTO_IPV4: + len = skb_ip_totlen(skb); + break; + case NFPROTO_IPV6: + len = sizeof(struct ipv6hdr) + + ntohs(ipv6_hdr(skb)->payload_len); + break; + default: + len = skb->len; + } + + return pskb_trim_rcsum(skb, len); +} +EXPORT_SYMBOL_GPL(nf_ct_skb_network_trim); + +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ +int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb, + u16 zone, u8 family, u8 *proto, u16 *mru) +{ + int err; + + if (family == NFPROTO_IPV4) { + enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + local_bh_disable(); + err = ip_defrag(net, skb, user); + local_bh_enable(); + if (err) + return err; + + *mru = IPCB(skb)->frag_max_size; +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + } else if (family == NFPROTO_IPV6) { + enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; + + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); + err = nf_ct_frag6_gather(net, skb, user); + if (err) { + if (err != -EINPROGRESS) + kfree_skb(skb); + return err; + } + + *proto = ipv6_hdr(skb)->nexthdr; + *mru = IP6CB(skb)->frag_max_size; +#endif + } else { + kfree_skb(skb); + return -EPFNOSUPPORT; + } + + skb_clear_hash(skb); + skb->ignore_df = 1; + + return 0; +} +EXPORT_SYMBOL_GPL(nf_ct_handle_fragments); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 747d537a3f066..29a7081858cd7 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -15,6 +15,7 @@ config OPENVSWITCH select NET_MPLS_GSO select DST_CACHE select NET_NSH + select NF_CONNTRACK_OVS if NF_CONNTRACK select NF_NAT_OVS if NF_NAT help Open vSwitch is a multilayer Ethernet switch targeted at virtualized diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index f95272ebfa084..331730fd35803 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -435,52 +435,21 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, return 0; } -/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero - * value if 'skb' is freed. - */ -static int handle_fragments(struct net *net, struct sw_flow_key *key, - u16 zone, struct sk_buff *skb) +static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key, + u16 zone, int family, struct sk_buff *skb) { struct ovs_skb_cb ovs_cb = *OVS_CB(skb); int err; - if (key->eth.type == htons(ETH_P_IP)) { - enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - err = ip_defrag(net, skb, user); - if (err) - return err; - - ovs_cb.mru = IPCB(skb)->frag_max_size; -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - } else if (key->eth.type == htons(ETH_P_IPV6)) { - enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - - memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - err = nf_ct_frag6_gather(net, skb, user); - if (err) { - if (err != -EINPROGRESS) - kfree_skb(skb); - return err; - } - - key->ip.proto = ipv6_hdr(skb)->nexthdr; - ovs_cb.mru = IP6CB(skb)->frag_max_size; -#endif - } else { - kfree_skb(skb); - return -EPFNOSUPPORT; - } + err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru); + if (err) + return err; /* The key extracted from the fragment that completed this datagram * likely didn't have an L4 header, so regenerate it. */ ovs_flow_key_update_l3l4(skb, key); - key->ip.frag = OVS_FRAG_TYPE_NONE; - skb_clear_hash(skb); - skb->ignore_df = 1; *OVS_CB(skb) = ovs_cb; return 0; @@ -1091,36 +1060,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, return 0; } -/* Trim the skb to the length specified by the IP/IPv6 header, - * removing any trailing lower-layer padding. This prepares the skb - * for higher-layer processing that assumes skb->len excludes padding - * (such as nf_ip_checksum). The caller needs to pull the skb to the - * network header, and ensure ip_hdr/ipv6_hdr points to valid data. - */ -static int ovs_skb_network_trim(struct sk_buff *skb) -{ - unsigned int len; - int err; - - switch (skb->protocol) { - case htons(ETH_P_IP): - len = skb_ip_totlen(skb); - break; - case htons(ETH_P_IPV6): - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); - break; - default: - len = skb->len; - } - - err = pskb_trim_rcsum(skb, len); - if (err) - kfree_skb(skb); - - return err; -} - /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero * value if 'skb' is freed. */ @@ -1135,12 +1074,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); - err = ovs_skb_network_trim(skb); - if (err) + err = nf_ct_skb_network_trim(skb, info->family); + if (err) { + kfree_skb(skb); return err; + } if (key->ip.frag != OVS_FRAG_TYPE_NONE) { - err = handle_fragments(net, key, info->zone.id, skb); + err = ovs_ct_handle_fragments(net, key, info->zone.id, + info->family, skb); if (err) return err; } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f5acb535413d6..4f7b52f5a11c3 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -984,6 +984,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE + select NF_CONNTRACK_OVS select NF_NAT_OVS if NF_NAT help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b126f03c1bb65..9cc0bc7c71ed7 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -726,31 +726,6 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, return false; } -/* Trim the skb to the length specified by the IP/IPv6 header, - * removing any trailing lower-layer padding. This prepares the skb - * for higher-layer processing that assumes skb->len excludes padding - * (such as nf_ip_checksum). The caller needs to pull the skb to the - * network header, and ensure ip_hdr/ipv6_hdr points to valid data. - */ -static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) -{ - unsigned int len; - - switch (family) { - case NFPROTO_IPV4: - len = skb_ip_totlen(skb); - break; - case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); - break; - default: - len = skb->len; - } - - return pskb_trim_rcsum(skb, len); -} - static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) { u8 family = NFPROTO_UNSPEC; @@ -810,6 +785,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, struct nf_conn *ct; int err = 0; bool frag; + u8 proto; u16 mru; /* Previously seen (loopback)? Ignore. */ @@ -825,50 +801,14 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); - mru = tc_skb_cb(skb)->mru; - - if (family == NFPROTO_IPV4) { - enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - local_bh_disable(); - err = ip_defrag(net, skb, user); - local_bh_enable(); - if (err && err != -EINPROGRESS) - return err; - - if (!err) { - *defrag = true; - mru = IPCB(skb)->frag_max_size; - } - } else { /* NFPROTO_IPV6 */ -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - - memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - err = nf_ct_frag6_gather(net, skb, user); - if (err && err != -EINPROGRESS) - goto out_free; - - if (!err) { - *defrag = true; - mru = IP6CB(skb)->frag_max_size; - } -#else - err = -EOPNOTSUPP; - goto out_free; -#endif - } + err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru); + if (err) + return err; - if (err != -EINPROGRESS) - tc_skb_cb(skb)->mru = mru; - skb_clear_hash(skb); - skb->ignore_df = 1; - return err; + *defrag = true; + tc_skb_cb(skb)->mru = mru; -out_free: - kfree_skb(skb); - return err; + return 0; } static void tcf_ct_params_free(struct tcf_ct_params *params) @@ -1011,7 +951,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, if (err) goto drop; - err = tcf_ct_skb_network_trim(skb, family); + err = nf_ct_skb_network_trim(skb, family); if (err) goto drop;