diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4681ecfb85ac2..c9bb0f892f555 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1); } -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse); -static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 0); + return __xfrm_decode_session(net, skb, fl, family, 0); } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { - return __xfrm_decode_session(skb, fl, family, 1); + return __xfrm_decode_session(net, skb, fl, family, 1); } int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); @@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { return 1; } -static inline int xfrm_decode_session_reverse(struct sk_buff *skb, +static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b8607763d113a..e63a3bf996176 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net, } else return rt; - err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); + err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d1e7d0ceb7edd..9ab9b3ebe0cd1 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; default: goto tx_err; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index bd135165482aa..591a2737808e4 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { + xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8fb4a791881a4..f624270971269 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, return dst; } - err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); + err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 73c85d4e0e9cd..e550240c85e1c 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; default: goto tx_err; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 857713d7a38a5..53d255838e6ab 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { + xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 48cc60084d28b..c77963517bf87 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int struct flowi fl; int err; - err = xfrm_decode_session(skb, &fl, family); + err = xfrm_decode_session(net, skb, &fl, family); if (err < 0) return err; diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index b864740846902..656f437f5f531 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -538,7 +538,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); if (!dst) { fl.u.ip6.flowi6_oif = dev->ifindex; fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; @@ -553,7 +553,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); if (!dst) { struct rtable *rt; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c4c4fc29ccf5a..6aea8b2f45e02 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates { struct hlist_head *res[XFRM_POL_CAND_MAX]; }; +struct xfrm_flow_keys { + struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + } addrs; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_icmp icmp; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_keyid gre; +}; + +static struct flow_dissector xfrm_session_dissector __ro_after_init; + static DEFINE_SPINLOCK(xfrm_if_cb_lock); static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; @@ -2853,7 +2868,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; - xfrm_decode_session(skb, &fl, dst->ops->family); + xfrm_decode_session(net, skb, &fl, dst->ops->family); skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); @@ -2889,7 +2904,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; - xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family); skb->mark = skb_mark; dst_hold(xfrm_dst_path(skb_dst(skb))); @@ -3367,209 +3382,92 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star } static void -decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { - const struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl; - u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; - int oif = 0; - - if (skb_dst(skb) && skb_dst(skb)->dev) - oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); - fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = reverse ? skb->skb_iif : oif; - - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; - - if (!ip_is_fragment(iph)) { - switch (iph->protocol) { - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports; - - xprth = skb_network_header(skb) + ihl * 4; - ports = (__be16 *)xprth; - - fl4->fl4_sport = ports[!!reverse]; - fl4->fl4_dport = ports[!reverse]; - } - break; - case IPPROTO_ICMP: - if (xprth + 2 < skb->data || - pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp; - - xprth = skb_network_header(skb) + ihl * 4; - icmp = xprth; - fl4->fl4_icmp_type = icmp[0]; - fl4->fl4_icmp_code = icmp[1]; - } - break; - case IPPROTO_GRE: - if (xprth + 12 < skb->data || - pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags; - __be32 *gre_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - greflags = (__be16 *)xprth; - gre_hdr = (__be32 *)xprth; - - if (greflags[0] & GRE_KEY) { - if (greflags[0] & GRE_CSUM) - gre_hdr++; - fl4->fl4_gre_key = gre_hdr[1]; - } - } - break; - default: - break; - } + if (reverse) { + fl4->saddr = flkeys->addrs.ipv4.dst; + fl4->daddr = flkeys->addrs.ipv4.src; + fl4->fl4_sport = flkeys->ports.dst; + fl4->fl4_dport = flkeys->ports.src; + } else { + fl4->saddr = flkeys->addrs.ipv4.src; + fl4->daddr = flkeys->addrs.ipv4.dst; + fl4->fl4_sport = flkeys->ports.src; + fl4->fl4_dport = flkeys->ports.dst; } + + fl4->flowi4_proto = flkeys->basic.ip_proto; + fl4->flowi4_tos = flkeys->ip.tos; + fl4->fl4_icmp_type = flkeys->icmp.type; + fl4->fl4_icmp_type = flkeys->icmp.code; + fl4->fl4_gre_key = flkeys->gre.keyid; } #if IS_ENABLED(CONFIG_IPV6) static void -decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi6 *fl6 = &fl->u.ip6; - int onlyproto = 0; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - u32 offset = sizeof(*hdr); - struct ipv6_opt_hdr *exthdr; - const unsigned char *nh = skb_network_header(skb); - u16 nhoff = IP6CB(skb)->nhoff; - int oif = 0; - u8 nexthdr; - - if (!nhoff) - nhoff = offsetof(struct ipv6hdr, nexthdr); - - nexthdr = nh[nhoff]; - - if (skb_dst(skb) && skb_dst(skb)->dev) - oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); - fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = reverse ? skb->skb_iif : oif; - - fl6->daddr = reverse ? hdr->saddr : hdr->daddr; - fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - - while (nh + offset + sizeof(*exthdr) < skb->data || - pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { - nh = skb_network_header(skb); - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - - switch (nexthdr) { - case NEXTHDR_FRAGMENT: - onlyproto = 1; - fallthrough; - case NEXTHDR_ROUTING: - case NEXTHDR_HOP: - case NEXTHDR_DEST: - offset += ipv6_optlen(exthdr); - nexthdr = exthdr->nexthdr; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (!onlyproto && (nh + offset + 4 < skb->data || - pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports; - - nh = skb_network_header(skb); - ports = (__be16 *)(nh + offset); - fl6->fl6_sport = ports[!!reverse]; - fl6->fl6_dport = ports[!reverse]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_ICMPV6: - if (!onlyproto && (nh + offset + 2 < skb->data || - pskb_may_pull(skb, nh + offset + 2 - skb->data))) { - u8 *icmp; - - nh = skb_network_header(skb); - icmp = (u8 *)(nh + offset); - fl6->fl6_icmp_type = icmp[0]; - fl6->fl6_icmp_code = icmp[1]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_GRE: - if (!onlyproto && - (nh + offset + 12 < skb->data || - pskb_may_pull(skb, nh + offset + 12 - skb->data))) { - struct gre_base_hdr *gre_hdr; - __be32 *gre_key; - - nh = skb_network_header(skb); - gre_hdr = (struct gre_base_hdr *)(nh + offset); - gre_key = (__be32 *)(gre_hdr + 1); - - if (gre_hdr->flags & GRE_KEY) { - if (gre_hdr->flags & GRE_CSUM) - gre_key++; - fl6->fl6_gre_key = *gre_key; - } - } - fl6->flowi6_proto = nexthdr; - return; -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPPROTO_MH: - offset += ipv6_optlen(exthdr); - if (!onlyproto && (nh + offset + 3 < skb->data || - pskb_may_pull(skb, nh + offset + 3 - skb->data))) { - struct ip6_mh *mh; - - nh = skb_network_header(skb); - mh = (struct ip6_mh *)(nh + offset); - fl6->fl6_mh_type = mh->ip6mh_type; - } - fl6->flowi6_proto = nexthdr; - return; -#endif - default: - fl6->flowi6_proto = nexthdr; - return; - } + if (reverse) { + fl6->saddr = flkeys->addrs.ipv6.dst; + fl6->daddr = flkeys->addrs.ipv6.src; + fl6->fl6_sport = flkeys->ports.dst; + fl6->fl6_dport = flkeys->ports.src; + } else { + fl6->saddr = flkeys->addrs.ipv6.src; + fl6->daddr = flkeys->addrs.ipv6.dst; + fl6->fl6_sport = flkeys->ports.src; + fl6->fl6_dport = flkeys->ports.dst; } + + fl6->flowi6_proto = flkeys->basic.ip_proto; + fl6->fl6_icmp_type = flkeys->icmp.type; + fl6->fl6_icmp_type = flkeys->icmp.code; + fl6->fl6_gre_key = flkeys->gre.keyid; } #endif -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { + struct xfrm_flow_keys flkeys; + + memset(&flkeys, 0, sizeof(flkeys)); + __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys, + NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + switch (family) { case AF_INET: - decode_session4(skb, fl, reverse); + decode_session4(&flkeys, fl, reverse); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - decode_session6(skb, fl, reverse); + decode_session6(&flkeys, fl, reverse); break; #endif default: return -EAFNOSUPPORT; } + fl->flowi_mark = skb->mark; + if (reverse) { + fl->flowi_oif = skb->skb_iif; + } else { + int oif = 0; + + if (skb_dst(skb) && skb_dst(skb)->dev) + oif = skb_dst(skb)->dev->ifindex; + + fl->flowi_oif = oif; + } + return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); @@ -3618,7 +3516,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { + if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -3774,7 +3672,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct dst_entry *dst; int res = 1; - if (xfrm_decode_session(skb, &fl, family) < 0) { + if (xfrm_decode_session(net, skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } @@ -4253,8 +4151,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { .exit = xfrm_net_exit, }; +static const struct flow_dissector_key xfrm_flow_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct xfrm_flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct xfrm_flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct xfrm_flow_keys, ports), + }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct xfrm_flow_keys, gre), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IP, + .offset = offsetof(struct xfrm_flow_keys, ip), + }, + { + .key_id = FLOW_DISSECTOR_KEY_ICMP, + .offset = offsetof(struct xfrm_flow_keys, icmp), + }, +}; + void __init xfrm_init(void) { + skb_flow_dissector_init(&xfrm_session_dissector, + xfrm_flow_dissector_keys, + ARRAY_SIZE(xfrm_flow_dissector_keys)); + register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); xfrm_input_init();