Skip to content

Commit

Permalink
route: move lwtunnel state to dst_entry
Browse files Browse the repository at this point in the history
Currently, the lwtunnel state resides in per-protocol data. This is
a problem if we encapsulate ipv6 traffic in an ipv4 tunnel (or vice versa).
The xmit function of the tunnel does not know whether the packet has been
routed to it by ipv4 or ipv6, yet it needs the lwtstate data. Moving the
lwtstate data to dst_entry makes such inter-protocol tunneling possible.

As a bonus, this brings a nice diffstat.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Jiri Benc authored and David S. Miller committed Aug 20, 2015
1 parent 7c383fb commit 61adedf
Show file tree
Hide file tree
Showing 17 changed files with 48 additions and 130 deletions.
1 change: 0 additions & 1 deletion drivers/net/vrf.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ static struct rtable *vrf_rtable_create(struct net_device *dev)
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_uncached_list = NULL;
rth->rt_lwtstate = NULL;
}

return rth;
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1909,7 +1909,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
u32 flags = vxlan->flags;

/* FIXME: Support IPv6 */
info = skb_tunnel_info(skb, AF_INET);
info = skb_tunnel_info(skb);

if (rdst) {
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
Expand Down Expand Up @@ -2105,7 +2105,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
struct vxlan_fdb *f;

/* FIXME: Support IPv6 */
info = skb_tunnel_info(skb, AF_INET);
info = skb_tunnel_info(skb);

skb_reset_mac_header(skb);
eth = eth_hdr(skb);
Expand Down
3 changes: 2 additions & 1 deletion include/net/dst.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct dst_entry {
#else
void *__pad1;
#endif
struct lwtunnel_state *lwtstate;
int (*input)(struct sk_buff *);
int (*output)(struct sock *sk, struct sk_buff *skb);

Expand Down Expand Up @@ -89,7 +90,7 @@ struct dst_entry {
* (L1_CACHE_SIZE would be too much)
*/
#ifdef CONFIG_64BIT
long __pad_to_align_refcnt[2];
long __pad_to_align_refcnt[1];
#endif
/*
* __refcnt wants to be on a different cache line from
Expand Down
15 changes: 5 additions & 10 deletions include/net/dst_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,17 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
return NULL;
}

static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb,
int family)
static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
struct rtable *rt;
struct dst_entry *dst;

if (md_dst)
return &md_dst->u.tun_info;

switch (family) {
case AF_INET:
rt = (struct rtable *)skb_dst(skb);
if (rt && rt->rt_lwtstate)
return lwt_tun_info(rt->rt_lwtstate);
break;
}
dst = skb_dst(skb);
if (dst && dst->lwtstate)
return lwt_tun_info(dst->lwtstate);

return NULL;
}
Expand Down
1 change: 0 additions & 1 deletion include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ struct rt6_info {
/* more non-fragment space at head required */
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
struct lwtunnel_state *rt6i_lwtstate;
};

static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
Expand Down
12 changes: 0 additions & 12 deletions include/net/lwtunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
int lwtunnel_output6(struct sock *sk, struct sk_buff *skb);
int lwtunnel_input(struct sk_buff *skb);
int lwtunnel_input6(struct sk_buff *skb);

#else

Expand Down Expand Up @@ -164,21 +162,11 @@ static inline int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
return -EOPNOTSUPP;
}

static inline int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

static inline int lwtunnel_input(struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

static inline int lwtunnel_input6(struct sk_buff *skb)
{
return -EOPNOTSUPP;
}

#endif

#endif /* __NET_LWTUNNEL_H */
1 change: 0 additions & 1 deletion include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ struct rtable {

struct list_head rt_uncached;
struct uncached_list *rt_uncached_list;
struct lwtunnel_state *rt_lwtstate;
};

static inline bool rt_is_input_route(const struct rtable *rt)
Expand Down
3 changes: 3 additions & 0 deletions net/core/dst.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <net/net_namespace.h>
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>

#include <net/dst.h>
#include <net/dst_metadata.h>
Expand Down Expand Up @@ -184,6 +185,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
dst->lwtstate = NULL;
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
Expand Down Expand Up @@ -264,6 +266,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
kfree(dst);
else
kmem_cache_free(dst->ops->kmem_cachep, dst);
lwtstate_put(dst->lwtstate);

dst = child;
if (dst) {
Expand Down
2 changes: 1 addition & 1 deletion net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -1489,7 +1489,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
struct ip_tunnel_info *info = skb_tunnel_info(skb, AF_INET);
struct ip_tunnel_info *info = skb_tunnel_info(skb);

if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
return -EINVAL;
Expand Down
70 changes: 10 additions & 60 deletions net/core/lwtunnel.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,16 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
}
EXPORT_SYMBOL(lwtunnel_cmp_encap);

int __lwtunnel_output(struct sock *sk, struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
int ret = -EINVAL;

if (!lwtstate)
if (!dst)
goto drop;
lwtstate = dst->lwtstate;

if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
lwtstate->type > LWTUNNEL_ENCAP_MAX)
Expand All @@ -209,47 +211,18 @@ int __lwtunnel_output(struct sock *sk, struct sk_buff *skb,

return ret;
}

int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
{
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct lwtunnel_state *lwtstate = NULL;

if (rt) {
lwtstate = rt->rt6i_lwtstate;
skb->dev = rt->dst.dev;
}

skb->protocol = htons(ETH_P_IPV6);

return __lwtunnel_output(sk, skb, lwtstate);
}
EXPORT_SYMBOL(lwtunnel_output6);

int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = (struct rtable *)skb_dst(skb);
struct lwtunnel_state *lwtstate = NULL;

if (rt) {
lwtstate = rt->rt_lwtstate;
skb->dev = rt->dst.dev;
}

skb->protocol = htons(ETH_P_IP);

return __lwtunnel_output(sk, skb, lwtstate);
}
EXPORT_SYMBOL(lwtunnel_output);

int __lwtunnel_input(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
int lwtunnel_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
int ret = -EINVAL;

if (!lwtstate)
if (!dst)
goto drop;
lwtstate = dst->lwtstate;

if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
lwtstate->type > LWTUNNEL_ENCAP_MAX)
Expand All @@ -272,27 +245,4 @@ int __lwtunnel_input(struct sk_buff *skb,

return ret;
}

int lwtunnel_input6(struct sk_buff *skb)
{
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct lwtunnel_state *lwtstate = NULL;

if (rt)
lwtstate = rt->rt6i_lwtstate;

return __lwtunnel_input(skb, lwtstate);
}
EXPORT_SYMBOL(lwtunnel_input6);

int lwtunnel_input(struct sk_buff *skb)
{
struct rtable *rt = (struct rtable *)skb_dst(skb);
struct lwtunnel_state *lwtstate = NULL;

if (rt)
lwtstate = rt->rt_lwtstate;

return __lwtunnel_input(skb, lwtstate);
}
EXPORT_SYMBOL(lwtunnel_input);
2 changes: 1 addition & 1 deletion net/ipv4/ip_gre.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
__be16 df, flags;
int err;

tun_info = skb_tunnel_info(skb, AF_INET);
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX))
goto err_free_skb;

Expand Down
20 changes: 7 additions & 13 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
list_del(&rt->rt_uncached);
spin_unlock_bh(&ul->lock);
}
lwtstate_put(rt->rt_lwtstate);
}

void rt_flush_dev(struct net_device *dev)
Expand Down Expand Up @@ -1408,7 +1407,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
rt->rt_lwtstate = lwtstate_get(nh->nh_lwtstate);
rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
Expand Down Expand Up @@ -1494,7 +1493,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
Expand Down Expand Up @@ -1624,19 +1622,18 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(in_slow_tot);

rth->dst.input = ip_forward;
rth->dst.output = ip_output;

rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
if (lwtunnel_output_redirect(rth->rt_lwtstate)) {
rth->rt_lwtstate->orig_output = rth->dst.output;
if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
rth->dst.lwtstate->orig_output = rth->dst.output;
rth->dst.output = lwtunnel_output;
}
if (lwtunnel_input_redirect(rth->rt_lwtstate)) {
rth->rt_lwtstate->orig_input = rth->dst.input;
if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
rth->dst.lwtstate->orig_input = rth->dst.input;
rth->dst.input = lwtunnel_input;
}
skb_dst_set(skb, &rth->dst);
Expand Down Expand Up @@ -1695,7 +1692,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
by fib_lookup.
*/

tun_info = skb_tunnel_info(skb, AF_INET);
tun_info = skb_tunnel_info(skb);
if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
else
Expand Down Expand Up @@ -1815,7 +1812,6 @@ out: return err;
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;

RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
Expand Down Expand Up @@ -2006,7 +2002,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->rt_lwtstate = NULL;
RT_CACHE_STAT_INC(out_slow_tot);

if (flags & RTCF_LOCAL)
Expand All @@ -2029,7 +2024,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
}

rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
if (lwtunnel_output_redirect(rth->rt_lwtstate))
if (lwtunnel_output_redirect(rth->dst.lwtstate))
rth->dst.output = lwtunnel_output;

return rth;
Expand Down Expand Up @@ -2293,7 +2288,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_uses_gateway = ort->rt_uses_gateway;

INIT_LIST_HEAD(&rt->rt_uncached);
rt->rt_lwtstate = NULL;
dst_free(new);
}

Expand Down
14 changes: 4 additions & 10 deletions net/ipv6/ila.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,13 @@ static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
static int ila_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rt6_info *rt6 = NULL;

if (skb->protocol != htons(ETH_P_IPV6))
goto drop;

rt6 = (struct rt6_info *)dst;
update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));

update_ipv6_locator(skb, ila_params_lwtunnel(rt6->rt6i_lwtstate));

return rt6->rt6i_lwtstate->orig_output(sk, skb);
return dst->lwtstate->orig_output(sk, skb);

drop:
kfree_skb(skb);
Expand All @@ -108,16 +105,13 @@ static int ila_output(struct sock *sk, struct sk_buff *skb)
static int ila_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rt6_info *rt6 = NULL;

if (skb->protocol != htons(ETH_P_IPV6))
goto drop;

rt6 = (struct rt6_info *)dst;

update_ipv6_locator(skb, ila_params_lwtunnel(rt6->rt6i_lwtstate));
update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));

return rt6->rt6i_lwtstate->orig_input(skb);
return dst->lwtstate->orig_input(skb);

drop:
kfree_skb(skb);
Expand Down
1 change: 0 additions & 1 deletion net/ipv6/ip6_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
static void rt6_release(struct rt6_info *rt)
{
if (atomic_dec_and_test(&rt->rt6i_ref)) {
lwtstate_put(rt->rt6i_lwtstate);
rt6_free_pcpu(rt);
dst_free(&rt->dst);
}
Expand Down
Loading

0 comments on commit 61adedf

Please sign in to comment.