Skip to content

Commit

Permalink
Merge branch 'net-dst_entry-shrink'
Browse files Browse the repository at this point in the history
David Miller says:

====================
net: Significantly shrink the size of routes.

Through a combination of several things, our route structures are
larger than they need to be.

Mostly this stems from having members in dst_entry which are only used
by one class of routes.  So the majority of the work in this series is
about "un-commoning" these members and pushing them into the type
specific structures.

Unfortunately, IPSEC needed the most surgery.  The majority of the
changes here had to do with bundle creation and management.

The other issue is the refcount alignment in dst_entry.  Once we get
rid of the not-so-common members, it really opens the door to removing
that alignment entirely.

I think the new layout looks really nice, so I'll reproduce it here:

	struct net_device       *dev;
	struct  dst_ops	        *ops;
	unsigned long		_metrics;
	unsigned long           expires;
	struct xfrm_state	*xfrm;
	int			(*input)(struct sk_buff *);
	int			(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
	unsigned short		flags;
	short			obsolete;
	unsigned short		header_len;
	unsigned short		trailer_len;
	atomic_t		__refcnt;
	int			__use;
	unsigned long		lastuse;
	struct lwtunnel_state   *lwtstate;
	struct rcu_head		rcu_head;
	short			error;
	short			__pad;
	__u32			tclassid;

(This is for 64-bit, on 32-bit the __refcnt comes at the very end)

So, the good news:

1) struct dst_entry shrinks from 160 to 112 bytes.

2) struct rtable shrinks from 216 to 168 bytes.

3) struct rt6_info shrinks from 384 to 320 bytes.

Enjoy.

v2:
	Collapse some patches logically based upon feedback.
	Fix the strange patch #7.

v3:	xfrm_dst_path() needs inline keyword
	Properly align __refcnt on 32-bit.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Nov 30, 2017
2 parents b78a6aa + 7149f81 commit 3d8068c
Show file tree
Hide file tree
Showing 20 changed files with 204 additions and 183 deletions.
1 change: 1 addition & 0 deletions include/net/dn_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev,
*/
struct dn_route {
struct dst_entry dst;
struct dn_route __rcu *dn_next;

struct neighbour *n;

Expand Down
39 changes: 11 additions & 28 deletions include/net/dst.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,9 @@ struct sk_buff;

struct dst_entry {
struct net_device *dev;
struct rcu_head rcu_head;
struct dst_entry *child;
struct dst_ops *ops;
unsigned long _metrics;
unsigned long expires;
struct dst_entry *path;
struct dst_entry *from;
#ifdef CONFIG_XFRM
struct xfrm_state *xfrm;
#else
Expand All @@ -59,8 +55,6 @@ struct dst_entry {
#define DST_XFRM_QUEUE 0x0040
#define DST_METADATA 0x0080

short error;

/* A non-zero value of dst->obsolete forces by-hand validation
* of the route entry. Positive values are set by the generic
* dst layer to indicate that the entry has been forcefully
Expand All @@ -76,35 +70,24 @@ struct dst_entry {
#define DST_OBSOLETE_KILL -2
unsigned short header_len; /* more space at head required */
unsigned short trailer_len; /* space to reserve at tail */
unsigned short __pad3;

#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 tclassid;
#else
__u32 __pad2;
#endif

#ifdef CONFIG_64BIT
/*
* Align __refcnt to a 64 bytes alignment
* (L1_CACHE_SIZE would be too much)
*/
long __pad_to_align_refcnt[2];
#endif
/*
* __refcnt wants to be on a different cache line from
* input/output/ops or performance tanks badly
*/
atomic_t __refcnt; /* client references */
#ifdef CONFIG_64BIT
atomic_t __refcnt; /* 64-bit offset 64 */
#endif
int __use;
unsigned long lastuse;
struct lwtunnel_state *lwtstate;
union {
struct dst_entry *next;
struct rtable __rcu *rt_next;
struct rt6_info __rcu *rt6_next;
struct dn_route __rcu *dn_next;
};
struct rcu_head rcu_head;
short error;
short __pad;
__u32 tclassid;
#ifndef CONFIG_64BIT
atomic_t __refcnt; /* 32-bit offset 64 */
#endif
};

struct dst_metrics {
Expand Down Expand Up @@ -250,7 +233,7 @@ static inline void dst_hold(struct dst_entry *dst)
{
/*
* If your kernel compilation stops here, please check
* __pad_to_align_refcnt declaration in struct dst_entry
* the placement of __refcnt in struct dst_entry
*/
BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63);
WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
Expand Down
14 changes: 7 additions & 7 deletions include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ struct rt6_exception {

struct rt6_info {
struct dst_entry dst;
struct rt6_info __rcu *rt6_next;
struct rt6_info *from;

/*
* Tail elements of dst_entry (__refcnt etc.)
Expand Down Expand Up @@ -176,11 +178,11 @@ struct rt6_info {

#define for_each_fib6_node_rt_rcu(fn) \
for (rt = rcu_dereference((fn)->leaf); rt; \
rt = rcu_dereference(rt->dst.rt6_next))
rt = rcu_dereference(rt->rt6_next))

#define for_each_fib6_walker_rt(w) \
for (rt = (w)->leaf; rt; \
rt = rcu_dereference_protected(rt->dst.rt6_next, 1))
rt = rcu_dereference_protected(rt->rt6_next, 1))

static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
Expand All @@ -203,11 +205,9 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
{
struct rt6_info *rt;

for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES);
rt = (struct rt6_info *)rt->dst.from);
for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from);
if (rt && rt != rt0)
rt0->dst.expires = rt->dst.expires;

dst_set_expires(&rt0->dst, timeout);
rt0->rt6i_flags |= RTF_EXPIRES;
}
Expand Down Expand Up @@ -242,8 +242,8 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
u32 cookie = 0;

if (rt->rt6i_flags & RTF_PCPU ||
(unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
rt = (struct rt6_info *)(rt->dst.from);
(unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
rt = rt->from;

rt6_get_cookie_safe(rt, &cookie);

Expand Down
38 changes: 35 additions & 3 deletions include/net/xfrm.h
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,7 @@ static inline bool xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_c

/* A struct encoding bundle of transformations to apply to some set of flow.
*
* dst->child points to the next element of bundle.
* xdst->child points to the next element of bundle.
* dst->xfrm points to an instanse of transformer.
*
* Due to unfortunate limitations of current routing cache, which we
Expand All @@ -984,6 +984,8 @@ struct xfrm_dst {
struct rt6_info rt6;
} u;
struct dst_entry *route;
struct dst_entry *child;
struct dst_entry *path;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
u32 xfrm_genid;
Expand All @@ -994,7 +996,35 @@ struct xfrm_dst {
u32 path_cookie;
};

static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
if (dst->xfrm) {
const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;

return xdst->path;
}
#endif
return (struct dst_entry *) dst;
}

static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
if (dst->xfrm) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
return xdst->child;
}
#endif
return NULL;
}

#ifdef CONFIG_XFRM
static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child)
{
xdst->child = child;
}

static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{
xfrm_pols_put(xdst->pols, xdst->num_pols);
Expand Down Expand Up @@ -1866,12 +1896,14 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
struct xfrm_dst *xdst;

if (!x || !x->type_offload)
return false;

if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
!dst->child->xfrm)
xdst = (struct xfrm_dst *) dst;
if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
!xdst->child->xfrm)
return true;

return false;
Expand Down
1 change: 0 additions & 1 deletion net/bridge/br_nf_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ void br_netfilter_rtable_init(struct net_bridge *br)

atomic_set(&rt->dst.__refcnt, 1);
rt->dst.dev = br->dev;
rt->dst.path = &rt->dst;
dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
Expand Down
14 changes: 8 additions & 6 deletions net/core/dst.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/prefetch.h>
#include <net/lwtunnel.h>
#include <net/xfrm.h>

#include <net/dst.h>
#include <net/dst_metadata.h>
Expand Down Expand Up @@ -62,15 +63,12 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_ref, int initial_obsolete,
unsigned short flags)
{
dst->child = NULL;
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
dst->from = NULL;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
Expand All @@ -88,7 +86,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
}
Expand Down Expand Up @@ -116,12 +113,17 @@ EXPORT_SYMBOL(dst_alloc);

struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
struct dst_entry *child = NULL;

smp_rmb();

child = dst->child;
#ifdef CONFIG_XFRM
if (dst->xfrm) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;

child = xdst->child;
}
#endif
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);

Expand Down
12 changes: 6 additions & 6 deletions net/core/pktgen.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ struct pktgen_dev {
__u8 ipsmode; /* IPSEC mode (config) */
__u8 ipsproto; /* IPSEC type (config) */
__u32 spi;
struct dst_entry dst;
struct xfrm_dst xdst;
struct dst_ops dstops;
#endif
char result[512];
Expand Down Expand Up @@ -2609,7 +2609,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
* supports both transport/tunnel mode + ESP/AH type.
*/
if ((x->props.mode == XFRM_MODE_TUNNEL) && (pkt_dev->spi != 0))
skb->_skb_refdst = (unsigned long)&pkt_dev->dst | SKB_DST_NOREF;
skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;

rcu_read_lock_bh();
err = x->outer_mode->output(x, skb);
Expand Down Expand Up @@ -3742,10 +3742,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
* performance under such circumstance.
*/
pkt_dev->dstops.family = AF_INET;
pkt_dev->dst.dev = pkt_dev->odev;
dst_init_metrics(&pkt_dev->dst, pktgen_dst_metrics, false);
pkt_dev->dst.child = &pkt_dev->dst;
pkt_dev->dst.ops = &pkt_dev->dstops;
pkt_dev->xdst.u.dst.dev = pkt_dev->odev;
dst_init_metrics(&pkt_dev->xdst.u.dst, pktgen_dst_metrics, false);
pkt_dev->xdst.child = &pkt_dev->xdst.u.dst;
pkt_dev->xdst.u.dst.ops = &pkt_dev->dstops;
#endif

return add_dev_to_thread(t, pkt_dev);
Expand Down
Loading

0 comments on commit 3d8068c

Please sign in to comment.