From 9ee0034b8f49aaaa7e7c2da8db1038915db99c19 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:52 -0700 Subject: [PATCH 01/11] net: flow: Add l3mdev flow update Add l3mdev hook to set FLOWI_FLAG_SKIP_NH_OIF flag and update oif/iif in flow struct if its oif or iif points to a device enslaved to an L3 Master device. Only 1 needs to be converted to match the l3mdev FIB rule. This moves the flow adjustment for l3mdev to a single point catching all lookups. It is redundant for existing hooks (those are removed in later patches) but is needed for missed lookups such as PMTU updates. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 ++++++ net/ipv4/fib_rules.c | 3 +++ net/ipv6/fib6_rules.c | 3 +++ net/l3mdev/l3mdev.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e90095091aa0d..81e175e805372 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -49,6 +49,8 @@ struct l3mdev_ops { int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +void l3mdev_update_flow(struct net *net, struct flowi *fl); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -290,6 +292,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } +static inline +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6e9ea69e5f751..770bebed6b288 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, }; int err; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp)); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); #ifdef CONFIG_IP_ROUTE_CLASSID if (arg.rule) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 5857c1fc8b672..eea23b57c6a5a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index c4a1c3e84e126..43610e5acc4e6 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -222,3 +222,38 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, return rc; } + +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ + struct net_device *dev; + int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_oif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + goto out; + } + } + } + + if (fl->flowi_iif) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_iif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + } + } + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(l3mdev_update_flow); From a8e3e1a9f02094145580ea7920c6a1d9aabd5539 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:53 -0700 Subject: [PATCH 02/11] net: l3mdev: Add hook to output path This patch adds the infrastructure to the output path to pass an skb to an l3mdev device if it has a hook registered. This is the Tx parallel to l3mdev_ip{6}_rcv in the receive path and is the basis for removing the existing hook that returns the vrf dst on the fib lookup. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 48 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 8 +++++++ net/ipv6/ip6_output.c | 8 +++++++ net/ipv6/output_core.c | 7 ++++++ net/ipv6/raw.c | 7 ++++++ 5 files changed, 78 insertions(+) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 81e175e805372..53d5274920e3a 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,7 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include #include /** @@ -18,6 +19,10 @@ * * @l3mdev_fib_table: Get FIB table id to use for lookups * + * @l3mdev_l3_rcv: Hook in L3 receive path + * + * @l3mdev_l3_out: Hook in L3 output path + * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * * @l3mdev_get_saddr: Get source address for a flow @@ -29,6 +34,9 @@ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); + struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, + struct sock *sk, struct sk_buff *skb, + u16 proto); /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, @@ -201,6 +209,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return l3mdev_l3_rcv(skb, AF_INET6); } +static inline +struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) +{ + struct net_device *dev = skb_dst(skb)->dev; + + if (netif_is_l3_slave(dev)) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && master->l3mdev_ops->l3mdev_l3_out) + skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, + skb, proto); + } + + return skb; +} + +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET); +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET6); +} #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) @@ -286,6 +322,18 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return skb; } +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b913f5bf07576..41e10e34769c6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -99,6 +99,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 993fd9666f1b3..6ea6caace3a89 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -236,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out((struct sock *)sk, skb); + if (unlikely(!skb)) + return 0; + /* hooks should never assume socket lock is held. * we promote our socket to non const */ diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b5c22..7cca8ac66fe9b 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 590dd1f7746fc..54404f08efcca 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (err) goto error_fault; + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); From 5f02ce24c2696fec33f2a5dfcf753996f5fdd211 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:54 -0700 Subject: [PATCH 03/11] net: l3mdev: Allow the l3mdev to be a loopback Allow an L3 master device to act as the loopback for that L3 domain. For IPv4 the device can also have the address 127.0.0.1. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 +++--- net/ipv4/route.c | 8 ++++++-- net/ipv6/route.c | 12 ++++++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 53d5274920e3a..3ee110518584b 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -90,7 +90,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. @@ -99,7 +99,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; - const struct net_device *master; + struct net_device *master; if (!dev) return NULL; @@ -254,7 +254,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e992783c1d0c..f49b2c534e92a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2018,7 +2018,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, return ERR_PTR(-EINVAL); if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) - if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && + !(dev_out->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl4->daddr)) @@ -2302,7 +2304,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, else fl4->saddr = fl4->daddr; } - dev_out = net->loopback_dev; + + /* L3 master device is the loopback for that domain */ + dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 09d43ff11a8d2..2c681113c055b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2558,8 +2558,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT); + struct net_device *dev = net->loopback_dev; + struct rt6_info *rt; + + /* use L3 Master device as loopback for host routes if device + * is enslaved and address is not link local or multicast + */ + if (!rt6_need_strict(addr)) + dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; + + rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); From ebfc102c566d0d9c174ff9b721fd35ebda01f7eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:55 -0700 Subject: [PATCH 04/11] net: vrf: Flip IPv4 output path from FIB lookup hook to out hook Flip the IPv4 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv4 output processing to send the packet to the VRF driver on xmit. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/route.c | 4 --- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 1ce7420322ee3..08540b96ec18e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -227,6 +227,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, } #endif +/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ +static int vrf_ip_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, skb_dst(skb)->dev, dst_output); + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { @@ -292,7 +306,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } - ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else @@ -531,6 +545,53 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rtable *rth; + + rcu_read_lock(); + + rth = rcu_dereference(vrf->rth); + if (likely(rth)) { + dst = &rth->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_out(vrf_dev, sk, skb); + } + + return skb; +} + /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { @@ -1067,6 +1128,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, + .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_get_rt6_dst = vrf_get_rt6_dst, .l3mdev_get_saddr6 = vrf_get_saddr6, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f49b2c534e92a..ad83f85fb2400 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2246,10 +2246,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } - - rth = l3mdev_get_rtable(dev_out, fl4); - if (rth) - goto out; } if (!fl4->daddr) { From 4c1feac58e06270321cc500b85c2d94a11495775 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:56 -0700 Subject: [PATCH 05/11] net: vrf: Flip IPv6 output path from FIB lookup hook to out hook Flip the IPv6 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv6 output processing to send the packet to the VRF driver on xmit. Link scope addresses (linklocal and multicast) need special handling: specifically the oif the flow struct can not be changed because we want the lookup tied to the enslaved interface. ie., the source address and the returned route MUST point to the interface scope passed in. Convert the existing vrf_get_rt6_dst to handle only link scope addresses. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 124 ++++++++++++++++++++++++++++--------------- include/net/l3mdev.h | 8 +-- net/ipv6/route.c | 11 ++-- net/l3mdev/l3mdev.c | 15 +++--- 4 files changed, 100 insertions(+), 58 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 08540b96ec18e..f5372edf6edc5 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -137,6 +137,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) +static int vrf_ip6_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, + sk, skb, NULL, skb_dst(skb)->dev, dst_output); + + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { @@ -207,7 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); - ret = ip6_local_out(net, skb->sk, skb); + ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else @@ -391,6 +405,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rt6_info *rt6; + + /* don't divert link scope packets */ + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + + rcu_read_lock(); + + rt6 = rcu_dereference(vrf->rt6); + if (likely(rt6)) { + dst = &rt6->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { @@ -477,6 +528,13 @@ static int vrf_rt6_create(struct net_device *dev) return rc; } #else +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + return skb; +} + static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } @@ -587,6 +645,8 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); + case AF_INET6: + return vrf_ip6_out(vrf_dev, sk, skb); } return skb; @@ -1031,53 +1091,33 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, - struct flowi6 *fl6) +/* send to link-local or multicast address via interface enslaved to + * VRF device. Force lookup to VRF table without changing flow struct + */ +static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, + struct flowi6 *fl6) { - bool need_strict = rt6_need_strict(&fl6->daddr); - struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); + int flags = RT6_LOOKUP_F_IFACE; struct dst_entry *dst = NULL; struct rt6_info *rt; - /* send to link-local or multicast address */ - if (need_strict) { - int flags = RT6_LOOKUP_F_IFACE; - - /* VRF device does not have a link-local address and - * sending packets to link-local or mcast addresses over - * a VRF device does not make sense - */ - if (fl6->flowi6_oif == dev->ifindex) { - struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst; - - dst_hold(dst); - return dst; - } - - if (!ipv6_addr_any(&fl6->saddr)) - flags |= RT6_LOOKUP_F_HAS_SADDR; - - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); - if (rt) - dst = &rt->dst; - - } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { - - rcu_read_lock(); - - rt = rcu_dereference(vrf->rt6); - if (likely(rt)) { - dst = &rt->dst; - dst_hold(dst); - } - - rcu_read_unlock(); + /* VRF device does not have a link-local address and + * sending packets to link-local or mcast addresses over + * a VRF device does not make sense + */ + if (fl6->flowi6_oif == dev->ifindex) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + return dst; } - /* make sure oif is set to VRF device for lookup */ - if (!need_strict) - fl6->flowi6_oif = dev->ifindex; + if (!ipv6_addr_any(&fl6->saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + if (rt) + dst = &rt->dst; return dst; } @@ -1130,7 +1170,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) - .l3mdev_get_rt6_dst = vrf_get_rt6_dst, + .l3mdev_link_scope_lookup = vrf_link_scope_lookup, .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3ee110518584b..51aab20a4d0a5 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -27,7 +27,7 @@ * * @l3mdev_get_saddr: Get source address for a flow * - * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device + * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { @@ -45,7 +45,7 @@ struct l3mdev_ops { struct flowi4 *fl4); /* IPv6 ops */ - struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); int (*l3mdev_get_saddr6)(struct net_device *dev, const struct sock *sk, @@ -177,7 +177,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -299,7 +299,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2c681113c055b..87e0a01ce7441 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1188,12 +1188,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags) { - struct dst_entry *dst; bool any_src; - dst = l3mdev_get_rt6_dst(net, fl6); - if (dst) - return dst; + if (rt6_need_strict(&fl6->daddr)) { + struct dst_entry *dst; + + dst = l3mdev_link_scope_lookup(net, fl6); + if (dst) + return dst; + } fl6->flowi6_iif = LOOPBACK_IFINDEX; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 43610e5acc4e6..ac9d928d0a9e6 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -100,15 +100,14 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** - * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns - * cached route for L3 master device if relevant - * to flow + * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link + * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup */ -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, - struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, + struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; @@ -121,15 +120,15 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_rt6_dst) - dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + dev->l3mdev_ops->l3mdev_link_scope_lookup) + dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; } -EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); +EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_get_saddr - get source address for a flow based on an interface From e0d56fdd734224666e7bd5fafbc620286d2a7ee8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:57 -0700 Subject: [PATCH 06/11] net: l3mdev: remove redundant calls A previous patch added l3mdev flow update making these hooks redundant. Remove them. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 +-- net/ipv4/route.c | 12 ++---------- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/ip6_output.c | 2 -- net/ipv6/ndisc.c | 11 ++--------- net/ipv6/route.c | 7 +------ net/ipv6/tcp_ipv6.c | 8 ++------ net/ipv6/xfrm6_policy.c | 2 +- 8 files changed, 10 insertions(+), 37 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 41e10e34769c6..05d105832bdbb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1582,8 +1582,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; + oif = oif ? : skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ad83f85fb2400..b52496fd51075 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1831,7 +1831,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, * Now we are ready to route packet. */ fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev); + fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; @@ -2150,7 +2150,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, unsigned int flags = 0; struct fib_result res; struct rtable *rth; - int master_idx; int orig_oif; int err = -ENETUNREACH; @@ -2160,9 +2159,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, orig_oif = fl4->flowi4_oif; - master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif); - if (master_idx) - fl4->flowi4_oif = master_idx; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? @@ -2263,8 +2259,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2577,9 +2572,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; - if (netif_index_is_l3_master(net, fl4.flowi4_oif)) - fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF; - if (iif) { struct net_device *dev; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b644a23c3db06..3155ed73d3b3c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -112,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) int oif = 0; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6ea6caace3a89..1cb41b3650488 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1070,8 +1070,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (!fl6->flowi6_oif) - fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fe65cdc28a459..d8e671457d105 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; - int oif = l3mdev_fib_oif(skb->dev); + int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; - int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, oif); - - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87e0a01ce7441..ad4a7ff301fcc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1164,7 +1164,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = l3mdev_fib_oif(skb->dev), + .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -3349,11 +3349,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; - if (netif_index_is_l3_master(net, oif)) { - fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF; - } - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 04529a3d42cb9..54cf7197c7ab5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else { - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; - - fl6.flowi6_oif = oif; - } + else + fl6.flowi6_oif = oif ? : skb->skb_iif; fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6cc97003e4a95..b7b7e863a2bbd 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; From d66f6c0a8f3c0bcc4ee7a9b1da4b0ebe7ee555a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:58 -0700 Subject: [PATCH 07/11] net: ipv4: Remove l3mdev_get_saddr No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 38 -------------------------------------- include/net/l3mdev.h | 12 ------------ include/net/route.h | 10 ---------- net/ipv4/raw.c | 6 ------ net/ipv4/udp.c | 6 ------ net/l3mdev/l3mdev.c | 31 ------------------------------- 6 files changed, 103 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f5372edf6edc5..9ad2a169485ff 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -863,43 +863,6 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, return rth; } -/* called under rcu_read_lock */ -static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) -{ - struct fib_result res = { .tclassid = 0 }; - struct net *net = dev_net(dev); - u32 orig_tos = fl4->flowi4_tos; - u8 flags = fl4->flowi4_flags; - u8 scope = fl4->flowi4_scope; - u8 tos = RT_FL_TOS(fl4); - int rc; - - if (unlikely(!fl4->daddr)) - return 0; - - fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; - fl4->flowi4_iif = LOOPBACK_IFINDEX; - /* make sure oif is set to VRF device for lookup */ - fl4->flowi4_oif = dev->ifindex; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); - - rc = fib_lookup(net, fl4, &res, 0); - if (!rc) { - if (res.type == RTN_LOCAL) - fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; - else - fib_select_path(net, &res, fl4, -1); - } - - fl4->flowi4_flags = flags; - fl4->flowi4_tos = orig_tos; - fl4->flowi4_scope = scope; - - return rc; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1166,7 +1129,6 @@ static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, - .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 51aab20a4d0a5..1129e1d8cd6ea 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -25,8 +25,6 @@ * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * - * @l3mdev_get_saddr: Get source address for a flow - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -41,8 +39,6 @@ struct l3mdev_ops { /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); - int (*l3mdev_get_saddr)(struct net_device *dev, - struct flowi4 *fl4); /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, @@ -175,8 +171,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -292,12 +286,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return false; } -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - return 0; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { diff --git a/include/net/route.h b/include/net/route.h index ad777d79af945..0429d47cad25c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_route_connect_init(fl4, dst, src, tos, oif, protocol, sport, dport, sk); - if (!src && oif) { - int rc; - - rc = l3mdev_get_saddr(net, oif, fl4); - if (rc < 0) - return ERR_PTR(rc); - - src = fl4->saddr; - } if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 438f50c1a6765..90a85c9558724 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, &fl4); - if (err < 0) - goto done; - } - if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 058c31286ce17..7d96dc2d3d08f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1021,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flow_flags, faddr, saddr, dport, inet->inet_sport); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, fl4); - if (err < 0) - goto out; - } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ac9d928d0a9e6..be40df60703cf 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,37 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -/** - * l3mdev_get_saddr - get source address for a flow based on an interface - * enslaved to an L3 master device - * @net: network namespace for device index lookup - * @ifindex: Interface index - * @fl4: IPv4 flow struct - */ - -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; - - if (ifindex) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr); - int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6) { From 8a966fc016b67d2a8ab4a83d22ded8cde032a0eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:59 -0700 Subject: [PATCH 08/11] net: ipv6: Remove l3mdev_get_saddr6 No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 41 ----------------------------------------- include/net/l3mdev.h | 11 ----------- net/ipv6/ip6_output.c | 9 +-------- net/l3mdev/l3mdev.c | 24 ------------------------ 4 files changed, 1 insertion(+), 84 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9ad2a169485ff..3a34f547c5781 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1084,46 +1084,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, return dst; } - -/* called under rcu_read_lock */ -static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net *net = dev_net(dev); - struct dst_entry *dst; - struct rt6_info *rt; - int err; - - if (rt6_need_strict(&fl6->daddr)) { - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, - RT6_LOOKUP_F_IFACE); - if (unlikely(!rt)) - return 0; - - dst = &rt->dst; - } else { - __u8 flags = fl6->flowi6_flags; - - fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; - fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_route_output(net, sk, fl6); - rt = (struct rt6_info *)dst; - - fl6->flowi6_flags = flags; - } - - err = dst->error; - if (!err) { - err = ip6_route_get_saddr(net, rt, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, - &fl6->saddr); - } - - dst_release(dst); - - return err; -} #endif static const struct l3mdev_ops vrf_l3mdev_ops = { @@ -1133,7 +1093,6 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, - .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 1129e1d8cd6ea..a5e506eb51de5 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -43,9 +43,6 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); - int (*l3mdev_get_saddr6)(struct net_device *dev, - const struct sock *sk, - struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -172,8 +169,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) } struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -292,12 +287,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) return NULL; } -static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - return 0; -} - static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1cb41b3650488..6001e781164eb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -926,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; - if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && - (!*dst || !(*dst)->error)) { - err = l3mdev_get_saddr6(net, sk, fl6); - if (err) - goto out_err; - } - /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -1024,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, out_err_release: dst_release(*dst); *dst = NULL; -out_err: + if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index be40df60703cf..8da86ceca33dd 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,30 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net_device *dev; - int rc = 0; - - if (fl6->flowi6_oif) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr6) - rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr6); - /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device From ca28b8f2b8f316b9973693c72770c98da3e9500e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:00 -0700 Subject: [PATCH 09/11] net: l3mdev: Remove l3mdev_fib_oif No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a5e506eb51de5..a586035c97cbf 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -107,26 +107,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } -/* get index of an interface to use for FIB lookups. For devices - * enslaved to an L3 master device FIB lookups are based on the - * master index - */ -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex; -} - -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - int oif; - - rcu_read_lock(); - oif = l3mdev_fib_oif_rcu(dev); - rcu_read_unlock(); - - return oif; -} - u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -248,15 +228,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) return NULL; } -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; From afb460fe0ef0af6d98ed51006153acb01278df2d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:01 -0700 Subject: [PATCH 10/11] net: l3mdev: remove get_rtable method No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 21 --------------------- include/net/l3mdev.h | 21 --------------------- 2 files changed, 42 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3a34f547c5781..ccce59fbb2b32 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -843,26 +843,6 @@ static u32 vrf_fib_table(const struct net_device *dev) return vrf->tb_id; } -static struct rtable *vrf_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - struct rtable *rth = NULL; - - if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { - struct net_vrf *vrf = netdev_priv(dev); - - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) - dst_hold(&rth->dst); - - rcu_read_unlock(); - } - - return rth; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1088,7 +1068,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, - .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a586035c97cbf..3832099289c5a 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -23,8 +23,6 @@ * * @l3mdev_l3_out: Hook in L3 output path * - * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -36,10 +34,6 @@ struct l3mdev_ops { struct sock *sk, struct sk_buff *skb, u16 proto); - /* IPv4 ops */ - struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, - const struct flowi4 *fl4); - /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); @@ -120,15 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable) - return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4); - - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { struct net_device *dev; @@ -241,12 +226,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { return false; From c71ad3d45a5e928e617ca436f3ce88bb773fb766 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:02 -0700 Subject: [PATCH 11/11] net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 5 ++--- include/net/flow.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ccce59fbb2b32..55674b0e65b7d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -165,7 +165,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, - .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, }; int ret = NET_XMIT_DROP; struct dst_entry *dst; @@ -265,8 +265,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; struct net *net = dev_net(vrf_dev); diff --git a/include/net/flow.h b/include/net/flow.h index d47ef4bb5423a..035aa77169678 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -34,8 +34,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_L3MDEV_SRC 0x04 -#define FLOWI_FLAG_SKIP_NH_OIF 0x08 +#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; };