From 39eb8cd1758886072ceb93607827722a11592ba2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 31 Mar 2017 07:13:59 -0700 Subject: [PATCH 1/6] net: mpls: rt_nhn_alive and nh_flags should be accessed using READ_ONCE The number of alive nexthops for a route (rt->rt_nhn_alive) and the flags for a next hop (nh->nh_flags) are modified by netdev event handlers. The event handlers run with rtnl_lock held so updates are always done with the lock held. The packet path accesses the fields under the rcu lock. Since those fields can change at any moment in the packet path, both fields should be accessed using READ_ONCE. Updates to both fields should use WRITE_ONCE. Update mpls_select_multipath (packet path) and mpls_ifdown and mpls_ifup (event handlers) accordingly. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 36 +++++++++++++++++++++++++----------- net/mpls/internal.h | 8 ++++++++ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 06ffafde70da3..6bdd2f95b5762 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -189,10 +189,15 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) return hash; } +/* number of alive nexthops (rt->rt_nhn_alive) and the flags for + * a next hop (nh->nh_flags) are modified by netdev event handlers. + * Since those fields can change at any moment, use READ_ONCE to + * access both. + */ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, struct sk_buff *skb) { - int alive = ACCESS_ONCE(rt->rt_nhn_alive); + unsigned int alive; u32 hash = 0; int nh_index = 0; int n = 0; @@ -203,7 +208,8 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (rt->rt_nhn == 1) goto out; - if (alive <= 0) + alive = READ_ONCE(rt->rt_nhn_alive); + if (alive == 0) return NULL; hash = mpls_multipath_hash(rt, skb); @@ -211,7 +217,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive == rt->rt_nhn) goto out; for_nexthops(rt) { - if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + unsigned int nh_flags = READ_ONCE(nh->nh_flags); + + if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) continue; if (n == nh_index) return nh; @@ -1302,7 +1310,6 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN; unsigned int alive, deleted; unsigned index; @@ -1316,22 +1323,27 @@ static void mpls_ifdown(struct net_device *dev, int event) alive = 0; deleted = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; + if (rtnl_dereference(nh->nh_dev) != dev) goto next; switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nh->nh_flags |= RTNH_F_DEAD; + nh_flags |= RTNH_F_DEAD; /* fall through */ case NETDEV_CHANGE: - nh->nh_flags |= RTNH_F_LINKDOWN; + nh_flags |= RTNH_F_LINKDOWN; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); + + if (nh->nh_flags != nh_flags) + WRITE_ONCE(nh->nh_flags, nh_flags); next: - if (!(nh->nh_flags & nh_flags)) + if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; if (!rtnl_dereference(nh->nh_dev)) deleted++; @@ -1345,7 +1357,7 @@ static void mpls_ifdown(struct net_device *dev, int event) } } -static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +static void mpls_ifup(struct net_device *dev, unsigned int flags) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); @@ -1361,20 +1373,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) alive = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; struct net_device *nh_dev = rtnl_dereference(nh->nh_dev); - if (!(nh->nh_flags & nh_flags)) { + if (!(nh_flags & flags)) { alive++; continue; } if (nh_dev != dev) continue; alive++; - nh->nh_flags &= ~nh_flags; + nh_flags &= ~flags; + WRITE_ONCE(nh->nh_flags, flags); } endfor_nexthops(rt); - ACCESS_ONCE(rt->rt_nhn_alive) = alive; + WRITE_ONCE(rt->rt_nhn_alive, alive); } } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 62928d8fabd1b..91419fe63464e 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -83,6 +83,10 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + + /* nh_flags is accessed under RCU in the packet path; it is + * modified handling netdev events with rtnl lock held + */ unsigned int nh_flags; u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; @@ -124,6 +128,10 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_max_alen; u8 rt_ttl_propagate; unsigned int rt_nhn; + + /* rt_nhn_alive is accessed under RCU in the packet path; it + * is modified handling netdev events with rtnl lock held + */ unsigned int rt_nhn_alive; struct mpls_nh rt_nh[0]; }; From 77ef013aadadd248843ad90022f36ba177b24e7f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 31 Mar 2017 07:14:00 -0700 Subject: [PATCH 2/6] net: mpls: Convert number of nexthops to u8 Number of nexthops and number of alive nexthops are tracked using an unsigned int. A route should never have more than 255 nexthops so convert both to u8. Update all references and intermediate variables to consistently use u8 as well. Shrinks the size of mpls_route from 32 bytes to 24 bytes with a 2-byte hole before the nexthops. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 28 +++++++++++++++++----------- net/mpls/internal.h | 5 +++-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 6bdd2f95b5762..665dec84f0010 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -197,10 +197,10 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, struct sk_buff *skb) { - unsigned int alive; u32 hash = 0; int nh_index = 0; int n = 0; + u8 alive; /* No need to look further into packet if there's only * one path @@ -466,7 +466,7 @@ struct mpls_route_config { int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) +static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen) { u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); struct mpls_route *rt; @@ -744,11 +744,11 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, return err; } -static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, - u8 cfg_via_alen, u8 *max_via_alen) +static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, + u8 cfg_via_alen, u8 *max_via_alen) { - int nhs = 0; int remaining = len; + u8 nhs = 0; if (!rtnh) { *max_via_alen = cfg_via_alen; @@ -773,7 +773,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, via_alen); } + /* number of nexthops is tracked by a u8. + * Check for overflow. + */ + if (nhs == 255) + return 0; nhs++; + rtnh = rtnh_next(rtnh, &remaining); } @@ -787,8 +793,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; int remaining = cfg->rc_mp_len; - int nhs = 0; int err = 0; + u8 nhs = 0; change_nexthops(rt) { int attrlen; @@ -842,7 +848,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) int err = -EINVAL; u8 max_via_alen; unsigned index; - int nhs; + u8 nhs; index = cfg->rc_label; @@ -1310,7 +1316,7 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - unsigned int alive, deleted; + u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -1362,7 +1368,7 @@ static void mpls_ifup(struct net_device *dev, unsigned int flags) struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); unsigned index; - int alive; + u8 alive; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { @@ -1786,8 +1792,8 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, } else { struct rtnexthop *rtnh; struct nlattr *mp; - int dead = 0; - int linkdown = 0; + u8 linkdown = 0; + u8 dead = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 91419fe63464e..2ac97433c3b7a 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -127,12 +127,13 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_payload_type; u8 rt_max_alen; u8 rt_ttl_propagate; - unsigned int rt_nhn; + u8 rt_nhn; /* rt_nhn_alive is accessed under RCU in the packet path; it * is modified handling netdev events with rtnl lock held */ - unsigned int rt_nhn_alive; + u8 rt_nhn_alive; + u16 rt_reserved1; struct mpls_nh rt_nh[0]; }; From 59b209667a3bef240ca5da111ce1931f29fa179f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 31 Mar 2017 07:14:01 -0700 Subject: [PATCH 3/6] net: mpls: change mpls_route layout Move labels to the end of mpls_nh as a 0-sized array and within mpls_route move the via for a nexthop after the mpls_nh. The new layout becomes: +----------------------+ | mpls_route | +----------------------+ | mpls_nh 0 | +----------------------+ | alignment padding | 4 bytes for odd number of labels; 0 for even +----------------------+ | via[rt_max_alen] 0 | +----------------------+ | alignment padding | via's aligned on sizeof(unsigned long) +----------------------+ | ... | +----------------------+ | mpls_nh n-1 | +----------------------+ | via[rt_max_alen] n-1 | +----------------------+ Memory allocated for nexthop + via is constant across all nexthops and their via. It is based on the maximum number of labels across all nexthops and the maximum via length. The size is saved in the mpls_route as rt_nh_size. Accessing a nexthop becomes rt->rt_nh + index * rt->rt_nh_size. The offset of the via address from a nexthop is saved as rt_via_offset so that given an mpls_nh pointer the via for that hop is simply nh + rt->rt_via_offset. With prior code, memory allocated per mpls_route with 1 nexthop: via is an ethernet address - 64 bytes via is an ipv4 address - 64 via is an ipv6 address - 72 With this patch set, memory allocated per mpls_route with 1 nexthop and 1 or 2 labels: via is an ethernet address - 56 bytes via is an ipv4 address - 56 via is an ipv6 address - 64 The 8-byte reduction is due to the previous patch; the change introduced by this patch has no impact on the size of allocations for 1 or 2 labels. Performance impact of this change was examined using network namespaces with veth pairs connecting namespaces. ns0 inserts the packet to the label-switched path using an lwt route with encap mpls. ns1 adds 1 or 2 labels depending on test, ns2 (and ns3 for 2-label test) pops the label and forwards. ns3 (or ns4) for a 2-label is the destination. Similar series of namespaces used for 2-nexthop test. Intent is to measure changes to latency (overhead in manipulating the packet) in the forwarding path. Tests used netperf with UDP_RR. IPv4: current patches 1 label, 1 nexthop 29908 30115 2 label, 1 nexthop 29071 29612 1 label, 2 nexthop 29582 29776 2 label, 2 nexthop 29086 29149 IPv6: current patches 1 label, 1 nexthop 24502 24960 2 label, 1 nexthop 24041 24407 1 label, 2 nexthop 23795 23899 2 label, 2 nexthop 23074 22959 In short, the change has no effect to a modest increase in performance. This is expected since this patch does not really have an impact on routes with 1 or 2 labels (the current limit) and 1 or 2 nexthops. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 37 +++++++++++++++++++++---------------- net/mpls/internal.h | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 31 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 665dec84f0010..1863b94133e4b 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,6 +24,8 @@ #include #include "internal.h" +#define MAX_NEW_LABELS 2 + /* Maximum number of labels to look ahead at when selecting a path of * a multipath route */ @@ -60,10 +62,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible); static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) { - u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); - int nh_index = nh - rt->rt_nh; - - return nh0_via + rt->rt_max_alen * nh_index; + return (u8 *)nh + rt->rt_via_offset; } static const u8 *mpls_nh_via(const struct mpls_route *rt, @@ -189,6 +188,11 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) return hash; } +static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) +{ + return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size); +} + /* number of alive nexthops (rt->rt_nhn_alive) and the flags for * a next hop (nh->nh_flags) are modified by netdev event handlers. * Since those fields can change at any moment, use READ_ONCE to @@ -206,7 +210,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, * one path */ if (rt->rt_nhn == 1) - goto out; + return rt->rt_nh; alive = READ_ONCE(rt->rt_nhn_alive); if (alive == 0) @@ -227,7 +231,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } endfor_nexthops(rt); out: - return &rt->rt_nh[nh_index]; + return mpls_get_nexthop(rt, nh_index); } static bool mpls_egress(struct net *net, struct mpls_route *rt, @@ -466,19 +470,20 @@ struct mpls_route_config { int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen) +/* all nexthops within a route have the same size based on max + * number of labels and max via length for a hop + */ +static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) { - u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); + u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); struct mpls_route *rt; - rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), - VIA_ALEN_ALIGN) + - num_nh * max_alen_aligned, - GFP_KERNEL); + rt = kzalloc(sizeof(*rt) + num_nh * nh_size, GFP_KERNEL); if (rt) { rt->rt_nhn = num_nh; rt->rt_nhn_alive = num_nh; - rt->rt_max_alen = max_alen_aligned; + rt->rt_nh_size = nh_size; + rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); } return rt; @@ -892,7 +897,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -ENOMEM; - rt = mpls_rt_alloc(nhs, max_via_alen); + rt = mpls_rt_alloc(nhs, max_via_alen, MAX_NEW_LABELS); if (!rt) goto errout; @@ -1964,7 +1969,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(1, lo->addr_len); + rt0 = mpls_rt_alloc(1, lo->addr_len, MAX_NEW_LABELS); if (!rt0) goto nort0; RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); @@ -1978,7 +1983,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(1, lo->addr_len); + rt2 = mpls_rt_alloc(1, lo->addr_len, MAX_NEW_LABELS); if (!rt2) goto nort2; RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 2ac97433c3b7a..cc324c0220495 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -64,7 +64,6 @@ struct mpls_dev { struct sk_buff; #define LABEL_NOT_SPECIFIED (1 << 20) -#define MAX_NEW_LABELS 2 /* This maximum ha length copied from the definition of struct neighbour */ #define VIA_ALEN_ALIGN sizeof(unsigned long) @@ -88,12 +87,26 @@ struct mpls_nh { /* next hop label forwarding entry */ * modified handling netdev events with rtnl lock held */ unsigned int nh_flags; - u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; u8 nh_via_table; + u8 nh_reserved1; + + u32 nh_label[0]; }; +/* offset of via from beginning of mpls_nh */ +#define MPLS_NH_VIA_OFF(num_labels) \ + ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \ + VIA_ALEN_ALIGN) + +/* all nexthops within a route have the same size based on the + * max number of labels and max via length across all nexthops + */ +#define MPLS_NH_SIZE(num_labels, max_via_alen) \ + (MPLS_NH_VIA_OFF((num_labels)) + \ + ALIGN((max_via_alen), VIA_ALEN_ALIGN)) + enum mpls_ttl_propagation { MPLS_TTL_PROP_DEFAULT, MPLS_TTL_PROP_ENABLED, @@ -108,16 +121,16 @@ enum mpls_ttl_propagation { * +----------------------+ * | mpls_nh 0 | * +----------------------+ - * | ... | - * +----------------------+ - * | mpls_nh n-1 | - * +----------------------+ - * | alignment padding | + * | alignment padding | 4 bytes for odd number of labels * +----------------------+ * | via[rt_max_alen] 0 | * +----------------------+ + * | alignment padding | via's aligned on sizeof(unsigned long) + * +----------------------+ * | ... | * +----------------------+ + * | mpls_nh n-1 | + * +----------------------+ * | via[rt_max_alen] n-1 | * +----------------------+ */ @@ -128,26 +141,28 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_max_alen; u8 rt_ttl_propagate; u8 rt_nhn; - /* rt_nhn_alive is accessed under RCU in the packet path; it * is modified handling netdev events with rtnl lock held */ u8 rt_nhn_alive; - u16 rt_reserved1; + u8 rt_nh_size; + u8 rt_via_offset; + u8 rt_reserved1; struct mpls_nh rt_nh[0]; }; #define for_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (rt)->rt_nh; \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define change_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \ + __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define endfor_nexthops(rt) } From df1c631648c55bfb247339279f9bc573c7f283f4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 31 Mar 2017 07:14:02 -0700 Subject: [PATCH 4/6] net: mpls: Limit memory allocation for mpls_route Limit memory allocation size for mpls_route to 4096. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 1863b94133e4b..f84c52b6eafca 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -26,6 +26,9 @@ #define MAX_NEW_LABELS 2 +/* max memory we will use for mpls_route */ +#define MAX_MPLS_ROUTE_MEM 4096 + /* Maximum number of labels to look ahead at when selecting a path of * a multipath route */ @@ -477,14 +480,20 @@ static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) { u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); struct mpls_route *rt; + size_t size; - rt = kzalloc(sizeof(*rt) + num_nh * nh_size, GFP_KERNEL); - if (rt) { - rt->rt_nhn = num_nh; - rt->rt_nhn_alive = num_nh; - rt->rt_nh_size = nh_size; - rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); - } + size = sizeof(*rt) + num_nh * nh_size; + if (size > MAX_MPLS_ROUTE_MEM) + return ERR_PTR(-EINVAL); + + rt = kzalloc(size, GFP_KERNEL); + if (!rt) + return ERR_PTR(-ENOMEM); + + rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; + rt->rt_nh_size = nh_size; + rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); return rt; } @@ -898,8 +907,10 @@ static int mpls_route_add(struct mpls_route_config *cfg) err = -ENOMEM; rt = mpls_rt_alloc(nhs, max_via_alen, MAX_NEW_LABELS); - if (!rt) + if (IS_ERR(rt)) { + err = PTR_ERR(rt); goto errout; + } rt->rt_protocol = cfg->rc_protocol; rt->rt_payload_type = cfg->rc_payload_type; @@ -1970,7 +1981,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; rt0 = mpls_rt_alloc(1, lo->addr_len, MAX_NEW_LABELS); - if (!rt0) + if (IS_ERR(rt0)) goto nort0; RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; @@ -1984,7 +1995,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; rt2 = mpls_rt_alloc(1, lo->addr_len, MAX_NEW_LABELS); - if (!rt2) + if (IS_ERR(rt2)) goto nort2; RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; From a4ac8c986d3f72ccbaf6d6782511fb645e568306 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 31 Mar 2017 07:14:03 -0700 Subject: [PATCH 5/6] net: mpls: bump maximum number of labels Allow users to push down more labels per MPLS route. With the previous patches, no memory allocations are based on MAX_NEW_LABELS; the limit is only used to keep userspace in check. At this point MAX_NEW_LABELS is only used for mpls_route_config (copying route data from userspace) and processing nexthops looking for the max number of labels across the route spec. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 103 ++++++++++++++++++++++++++++++-------------- net/mpls/internal.h | 2 +- 2 files changed, 71 insertions(+), 34 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index f84c52b6eafca..10daefd7f9388 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,7 +24,10 @@ #include #include "internal.h" -#define MAX_NEW_LABELS 2 +/* put a reasonable limit on the number of labels + * we will accept from userspace + */ +#define MAX_NEW_LABELS 30 /* max memory we will use for mpls_route */ #define MAX_MPLS_ROUTE_MEM 4096 @@ -698,9 +701,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, return -ENOMEM; err = -EINVAL; - /* Ensure only a supported number of labels are present */ - if (cfg->rc_output_labels > MAX_NEW_LABELS) - goto errout; nh->nh_labels = cfg->rc_output_labels; for (i = 0; i < nh->nh_labels; i++) @@ -725,7 +725,7 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, static int mpls_nh_build(struct net *net, struct mpls_route *rt, struct mpls_nh *nh, int oif, struct nlattr *via, - struct nlattr *newdst) + struct nlattr *newdst, u8 max_labels) { int err = -ENOMEM; @@ -733,7 +733,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; if (newdst) { - err = nla_get_labels(newdst, MAX_NEW_LABELS, + err = nla_get_labels(newdst, max_labels, &nh->nh_labels, nh->nh_label); if (err) goto errout; @@ -759,21 +759,19 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, } static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, - u8 cfg_via_alen, u8 *max_via_alen) + u8 cfg_via_alen, u8 *max_via_alen, + u8 *max_labels) { int remaining = len; u8 nhs = 0; - if (!rtnh) { - *max_via_alen = cfg_via_alen; - return 1; - } - *max_via_alen = 0; + *max_labels = 0; while (rtnh_ok(rtnh, remaining)) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); int attrlen; + u8 n_labels = 0; attrlen = rtnh_attrlen(rtnh); nla = nla_find(attrs, attrlen, RTA_VIA); @@ -787,6 +785,13 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, via_alen); } + nla = nla_find(attrs, attrlen, RTA_NEWDST); + if (nla && + nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0) + return 0; + + *max_labels = max_t(u8, *max_labels, n_labels); + /* number of nexthops is tracked by a u8. * Check for overflow. */ @@ -802,7 +807,7 @@ static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, } static int mpls_nh_build_multi(struct mpls_route_config *cfg, - struct mpls_route *rt) + struct mpls_route *rt, u8 max_labels) { struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; @@ -835,7 +840,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, } err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst, + max_labels); if (err) goto errout; @@ -862,6 +868,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) int err = -EINVAL; u8 max_via_alen; unsigned index; + u8 max_labels; u8 nhs; index = cfg->rc_label; @@ -900,13 +907,21 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -EINVAL; - nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, - cfg->rc_via_alen, &max_via_alen); + if (cfg->rc_mp) { + nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, + cfg->rc_via_alen, &max_via_alen, + &max_labels); + } else { + max_via_alen = cfg->rc_via_alen; + max_labels = cfg->rc_output_labels; + nhs = 1; + } + if (nhs == 0) goto errout; err = -ENOMEM; - rt = mpls_rt_alloc(nhs, max_via_alen, MAX_NEW_LABELS); + rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto errout; @@ -917,7 +932,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) - err = mpls_nh_build_multi(cfg, rt); + err = mpls_nh_build_multi(cfg, rt, max_labels); else err = mpls_nh_build_from_cfg(cfg, rt); if (err) @@ -1531,16 +1546,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, - u32 max_labels, u8 *labels, u32 label[]) + u8 max_labels, u8 *labels, u32 label[]) { unsigned len = nla_len(nla); - unsigned nla_labels; struct mpls_shim_hdr *nla_label; + u8 nla_labels; bool bos; int i; - /* len needs to be an even multiple of 4 (the label size) */ - if (len & 3) + /* len needs to be an even multiple of 4 (the label size). Number + * of labels is a u8 so check for overflow. + */ + if (len & 3 || len / 4 > 255) return -EINVAL; /* Limit the number of new labels allowed */ @@ -1548,6 +1565,10 @@ int nla_get_labels(const struct nlattr *nla, if (nla_labels > max_labels) return -EINVAL; + /* when label == NULL, caller wants number of labels */ + if (!label) + goto out; + nla_label = nla_data(nla); bos = true; for (i = nla_labels - 1; i >= 0; i--, bos = false) { @@ -1571,6 +1592,7 @@ int nla_get_labels(const struct nlattr *nla, label[i] = dec.label; } +out: *labels = nla_labels; return 0; } @@ -1632,7 +1654,6 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); if (rtm->rtm_family != AF_MPLS) goto errout; @@ -1731,27 +1752,43 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; - return mpls_route_del(&cfg); + err = mpls_route_del(cfg); +out: + kfree(cfg); + + return err; } static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; - return mpls_route_add(&cfg); + err = mpls_route_add(cfg); +out: + kfree(cfg); + + return err; } static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, @@ -1980,7 +2017,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(1, lo->addr_len, MAX_NEW_LABELS); + rt0 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt0)) goto nort0; RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); @@ -1994,7 +2031,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(1, lo->addr_len, MAX_NEW_LABELS); + rt2 = mpls_rt_alloc(1, lo->addr_len, 0); if (IS_ERR(rt2)) goto nort2; RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index cc324c0220495..c5d2f5bc37ec5 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -197,7 +197,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, +int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, u32 label[]); int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, u8 via[]); From 1511009cd68015c2e04135bfefa4bf5020baa8d9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 31 Mar 2017 07:14:04 -0700 Subject: [PATCH 6/6] net: mpls: Increase max number of labels for lwt encap Alow users to push down more labels per MPLS encap. Similar to LSR case, move label array to the end of mpls_iptunnel_encap and allocate based on the number of labels for the route. For consistency with the LSR case, re-use the same maximum number of labels. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/mpls_iptunnel.h | 5 ++--- net/mpls/af_mpls.c | 5 ----- net/mpls/internal.h | 5 +++++ net/mpls/mpls_iptunnel.c | 13 ++++++++++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index a18af6a16eb52..9d22bf67ac866 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -14,13 +14,12 @@ #ifndef _NET_MPLS_IPTUNNEL_H #define _NET_MPLS_IPTUNNEL_H 1 -#define MAX_NEW_LABELS 2 - struct mpls_iptunnel_encap { - u32 label[MAX_NEW_LABELS]; u8 labels; u8 ttl_propagate; u8 default_ttl; + u8 reserved1; + u32 label[0]; }; static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 10daefd7f9388..5928d22ba9c86 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,11 +24,6 @@ #include #include "internal.h" -/* put a reasonable limit on the number of labels - * we will accept from userspace - */ -#define MAX_NEW_LABELS 30 - /* max memory we will use for mpls_route */ #define MAX_MPLS_ROUTE_MEM 4096 diff --git a/net/mpls/internal.h b/net/mpls/internal.h index c5d2f5bc37ec5..4db6a59713220 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -2,6 +2,11 @@ #define MPLS_INTERNAL_H #include +/* put a reasonable limit on the number of labels + * we will accept from userspace + */ +#define MAX_NEW_LABELS 30 + struct mpls_entry_decoded { u32 label; u8 ttl; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 22f71fce0bfb8..fe00e98667cf6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -164,6 +164,7 @@ static int mpls_build_state(struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; + u8 n_labels; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, @@ -175,12 +176,18 @@ static int mpls_build_state(struct nlattr *nla, return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); + /* determine number of labels */ + if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], + MAX_NEW_LABELS, &n_labels, NULL)) + return -EINVAL; + + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + + n_labels * sizeof(u32)); if (!newts) return -ENOMEM; tun_encap_info = mpls_lwtunnel_encap(newts); - ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels, &tun_encap_info->labels, tun_encap_info->label); if (ret) goto errout; @@ -257,7 +264,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) a_hdr->default_ttl != b_hdr->default_ttl) return 1; - for (l = 0; l < MAX_NEW_LABELS; l++) + for (l = 0; l < a_hdr->labels; l++) if (a_hdr->label[l] != b_hdr->label[l]) return 1; return 0;