Skip to content

Commit

Permalink
Merge branch 'net-Kernel-side-filtering-for-route-dumps'
Browse files Browse the repository at this point in the history
David Ahern says:

====================
net: Kernel side filtering for route dumps

Implement kernel side filtering of route dumps by protocol (e.g., which
routing daemon installed the route), route type (e.g., unicast), table
id and nexthop device.

iproute2 has been doing this filtering in userspace for years; pushing
the filters to the kernel side reduces the amount of data the kernel
sends and reduces wasted cycles on both sides processing unwanted data.
These initial options provide a huge improvement for efficiently
examining routes on large scale systems.

v2
- better handling of requests for a specific table. Rather than walking
  the hash of all tables, lookup the specific table and dump it
- refactor mr_rtm_dumproute moving the loop over the table into a
  helper that can be invoked directly
- add hook to return NLM_F_DUMP_FILTERED in DONE message to ensure
  it is returned even when the dump returns nothing
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 16, 2018
2 parents e856795 + e4e92fb commit 2c59f06
Show file tree
Hide file tree
Showing 13 changed files with 387 additions and 95 deletions.
11 changes: 9 additions & 2 deletions include/linux/mroute_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/fib_notifier.h>
#include <net/ip_fib.h>

/**
* struct vif_device - interface representor for multicast routing
Expand Down Expand Up @@ -283,14 +284,20 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);

int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mr_mfc *c, struct rtmsg *rtm);
int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
struct netlink_callback *cb,
int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock, struct fib_dump_filter *filter);
int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct mr_table *(*iter)(struct net *net,
struct mr_table *mrt),
int (*fill)(struct mr_table *mrt,
struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock);
spinlock_t *lock, struct fib_dump_filter *filter);

int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
int (*rules_dump)(struct net *net,
Expand Down Expand Up @@ -340,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
struct sk_buff *skb,
u32 portid, u32 seq, struct mr_mfc *c,
int cmd, int flags),
spinlock_t *lock)
spinlock_t *lock, struct fib_dump_filter *filter)
{
return -EINVAL;
}
Expand Down
1 change: 1 addition & 0 deletions include/linux/netlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ struct netlink_callback {
u16 family;
u16 min_dump_alloc;
bool strict_check;
u16 answer_flags;
unsigned int prev_seq, seq;
long args[6];
};
Expand Down
1 change: 1 addition & 0 deletions include/net/ip6_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ struct rt6_rtnl_dump_arg {
struct sk_buff *skb;
struct netlink_callback *cb;
struct net *net;
struct fib_dump_filter filter;
};

int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
Expand Down
17 changes: 14 additions & 3 deletions include/net/ip_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,24 @@ struct fib_table {
unsigned long __data[0];
};

struct fib_dump_filter {
u32 table_id;
/* filter_set is an optimization that an entry is set */
bool filter_set;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
struct net_device *dev;
};

int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct fib_result *res, int fib_flags);
int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
struct netlink_ext_ack *extack);
int fib_table_delete(struct net *, struct fib_table *, struct fib_config *,
struct netlink_ext_ack *extack);
int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
struct netlink_callback *cb);
struct netlink_callback *cb, struct fib_dump_filter *filter);
int fib_table_flush(struct net *net, struct fib_table *table);
struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
void fib_table_flush_external(struct fib_table *table);
Expand Down Expand Up @@ -453,6 +463,7 @@ static inline void fib_proc_exit(struct net *net)

u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);

int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb);
#endif /* _NET_FIB_H */
76 changes: 65 additions & 11 deletions net/ipv4/fib_frontend.c
Original file line number Diff line number Diff line change
Expand Up @@ -802,10 +802,16 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}

int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct fib_dump_filter *filter,
struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
struct nlattr *tb[RTA_MAX + 1];
struct rtmsg *rtm;
int err, i;

ASSERT_RTNL();

if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
Expand All @@ -814,8 +820,7 @@ int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,

rtm = nlmsg_data(nlh);
if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
rtm->rtm_type) {
rtm->rtm_scope) {
NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
return -EINVAL;
}
Expand All @@ -824,9 +829,42 @@ int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
return -EINVAL;
}

if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request");
return -EINVAL;
filter->flags = rtm->rtm_flags;
filter->protocol = rtm->rtm_protocol;
filter->rt_type = rtm->rtm_type;
filter->table_id = rtm->rtm_table;

err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv4_policy, extack);
if (err < 0)
return err;

for (i = 0; i <= RTA_MAX; ++i) {
int ifindex;

if (!tb[i])
continue;

switch (i) {
case RTA_TABLE:
filter->table_id = nla_get_u32(tb[i]);
break;
case RTA_OIF:
ifindex = nla_get_u32(tb[i]);
filter->dev = __dev_get_by_index(net, ifindex);
if (!filter->dev)
return -ENODEV;
break;
default:
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
return -EINVAL;
}
}

if (filter->flags || filter->protocol || filter->rt_type ||
filter->table_id || filter->dev) {
filter->filter_set = 1;
cb->answer_flags = NLM_F_DUMP_FILTERED;
}

return 0;
Expand All @@ -837,22 +875,38 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct fib_dump_filter filter = {};
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
int dumped = 0, err;

if (cb->strict_check) {
err = ip_valid_fib_dump_req(nlh, cb->extack);
err = ip_valid_fib_dump_req(net, nlh, &filter, cb);
if (err < 0)
return err;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);

filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED);
}

if (nlmsg_len(nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED)
/* fib entries are never clones and ipv4 does not use prefix flag */
if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED))
return skb->len;

if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
return -ENOENT;
}

err = fib_table_dump(tb, skb, cb, &filter);
return skb->len ? : err;
}

s_h = cb->args[0];
s_e = cb->args[1];

Expand All @@ -867,7 +921,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
err = fib_table_dump(tb, skb, cb);
err = fib_table_dump(tb, skb, cb, &filter);
if (err < 0) {
if (likely(skb->len))
goto out;
Expand Down
37 changes: 26 additions & 11 deletions net/ipv4/fib_trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -2003,38 +2003,53 @@ void fib_free_table(struct fib_table *tb)
}

static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *skb, struct netlink_callback *cb,
struct fib_dump_filter *filter)
{
unsigned int flags = NLM_F_MULTI;
__be32 xkey = htonl(l->key);
struct fib_alias *fa;
int i, s_i;

if (filter->filter_set)
flags |= NLM_F_DUMP_FILTERED;

s_i = cb->args[4];
i = 0;

/* rcu_read_lock is hold by caller */
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
int err;

if (i < s_i) {
i++;
continue;
}
if (i < s_i)
goto next;

if (tb->tb_id != fa->tb_id) {
i++;
continue;
if (tb->tb_id != fa->tb_id)
goto next;

if (filter->filter_set) {
if (filter->rt_type && fa->fa_type != filter->rt_type)
goto next;

if ((filter->protocol &&
fa->fa_info->fib_protocol != filter->protocol))
goto next;

if (filter->dev &&
!fib_info_nh_uses_dev(fa->fa_info, filter->dev))
goto next;
}

err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
tb->tb_id, fa->fa_type,
xkey, KEYLENGTH - fa->fa_slen,
fa->fa_tos, fa->fa_info, NLM_F_MULTI);
fa->fa_tos, fa->fa_info, flags);
if (err < 0) {
cb->args[4] = i;
return err;
}
next:
i++;
}

Expand All @@ -2044,7 +2059,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,

/* rcu_read_lock needs to be hold by caller from readside */
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
struct netlink_callback *cb, struct fib_dump_filter *filter)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
Expand All @@ -2057,7 +2072,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
int err;

err = fn_trie_dump_leaf(l, tb, skb, cb);
err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
if (err < 0) {
cb->args[3] = key;
cb->args[2] = count;
Expand Down
22 changes: 19 additions & 3 deletions net/ipv4/ipmr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2527,15 +2527,31 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,

static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
if (cb->strict_check) {
int err = ip_valid_fib_dump_req(cb->nlh, cb->extack);
struct fib_dump_filter filter = {};
int err;

if (cb->strict_check) {
err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
&filter, cb);
if (err < 0)
return err;
}

if (filter.table_id) {
struct mr_table *mrt;

mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
return -ENOENT;
}
err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
&mfc_unres_lock, &filter);
return skb->len ? : err;
}

return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
_ipmr_fill_mroute, &mfc_unres_lock);
_ipmr_fill_mroute, &mfc_unres_lock, &filter);
}

static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
Expand Down
Loading

0 comments on commit 2c59f06

Please sign in to comment.