Skip to content

Commit

Permalink
Merge branch 'nf-ingress'
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter ingress support (v4)

This is the v4 round of patches to add the Netfilter ingress hook, it basically
comes in two steps:

1) Add the CONFIG_NET_INGRESS switch to wrap the ingress static key around it.
   The idea is to use the same global static key to avoid adding more code to
   the hot path.

2) Add the Netfilter ingress hook after the tc ingress hook, under the global
   ingress_needed static key. As I said, the netfilter ingress hook also has
   its own static key, that is nested under the global static key. Please, see
   patch 5/5 for performance numbers and more information.

I originally started this next round, as it was suggested, exploring the
independent static key for netfilter ingress just after tc ingress, but the
results that I gathered from that patch are not good for non-users:

Result: OK: 6425927(c6425843+d83) usec, 100000000 (60byte,0frags)
  15561955pps 7469Mb/sec (7469738400bps) errors: 100000000

this roughly means 500Kpps less performance wrt. the base numbers, so that's
the reason why I discarded that approach and I focused on this.

The idea of this patchset is to open the window to nf_tables, which comes with
features that will work out-of-the-box (once the boiler plate code to support
the 'netdev' table family is in place), to avoid repeating myself [1], the most
relevant features are:

1) Multi-dimensional key dictionary lookups.
2) Arbitrary stateful flow tables.
3) Transactions and good support for dynamic updates.

But there are also interest aspects to consider from userspace, such as the
ability to support new layer 2 protocols without kernel updates, a well-defined
netlink interface, userspace libraries and utilities for third party
applications, among others.

I hope we can be happy with this approach.

Please, apply. Thanks.

[1] http://marc.info/?l=netfilter-devel&m=143033337020328&w=2
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 14, 2015
2 parents a104a6b + e687ad6 commit 5a99e7f
Show file tree
Hide file tree
Showing 10 changed files with 159 additions and 23 deletions.
3 changes: 3 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1656,6 +1656,9 @@ struct net_device {
struct tcf_proto __rcu *ingress_cl_list;
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
struct list_head nf_hooks_ingress;
#endif

unsigned char broadcast[MAX_ADDR_LEN];
#ifdef CONFIG_RFS_ACCEL
Expand Down
39 changes: 25 additions & 14 deletions include/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@ struct nf_hook_state {
struct net_device *in;
struct net_device *out;
struct sock *sk;
struct list_head *hook_list;
int (*okfn)(struct sock *, struct sk_buff *);
};

static inline void nf_hook_state_init(struct nf_hook_state *p,
struct list_head *hook_list,
unsigned int hook,
int thresh, u_int8_t pf,
struct net_device *indev,
Expand All @@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
p->in = indev;
p->out = outdev;
p->sk = sk;
p->hook_list = hook_list;
p->okfn = okfn;
}

Expand All @@ -79,16 +82,17 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
const struct nf_hook_state *state);

struct nf_hook_ops {
struct list_head list;
struct list_head list;

/* User fills in from here down. */
nf_hookfn *hook;
struct module *owner;
void *priv;
u_int8_t pf;
unsigned int hooknum;
nf_hookfn *hook;
struct net_device *dev;
struct module *owner;
void *priv;
u_int8_t pf;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
int priority;
int priority;
};

struct nf_sockopt_ops {
Expand Down Expand Up @@ -131,26 +135,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
#ifdef HAVE_JUMP_LABEL
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];

static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
u_int8_t pf, unsigned int hook)
{
if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook))
return static_key_false(&nf_hooks_needed[pf][hook]);

return !list_empty(&nf_hooks[pf][hook]);
return !list_empty(nf_hook_list);
}
#else
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
u_int8_t pf, unsigned int hook)
{
return !list_empty(&nf_hooks[pf][hook]);
return !list_empty(nf_hook_list);
}
#endif

static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
{
return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
}

int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);

/**
* nf_hook_thresh - call a netfilter hook
*
*
* Returns 1 if the hook has allowed the packet to pass. The function
* okfn must be invoked by the caller in this case. Any other return
* value indicates the packet has been consumed by the hook.
Expand All @@ -166,8 +177,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
if (nf_hooks_active(pf, hook)) {
struct nf_hook_state state;

nf_hook_state_init(&state, hook, thresh, pf,
indev, outdev, sk, okfn);
nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
pf, indev, outdev, sk, okfn);
return nf_hook_slow(skb, &state);
}
return 1;
Expand Down
41 changes: 41 additions & 0 deletions include/linux/netfilter_ingress.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef _NETFILTER_INGRESS_H_
#define _NETFILTER_INGRESS_H_

#include <linux/netfilter.h>
#include <linux/netdevice.h>

#ifdef CONFIG_NETFILTER_INGRESS
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
NFPROTO_NETDEV, NF_NETDEV_INGRESS);
}

static inline int nf_hook_ingress(struct sk_buff *skb)
{
struct nf_hook_state state;

nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
skb->dev, NULL, NULL);
return nf_hook_slow(skb, &state);
}

static inline void nf_hook_ingress_init(struct net_device *dev)
{
INIT_LIST_HEAD(&dev->nf_hooks_ingress);
}
#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return 0;
}

static inline int nf_hook_ingress(struct sk_buff *skb)
{
return 0;
}

static inline void nf_hook_ingress_init(struct net_device *dev) {}
#endif /* CONFIG_NETFILTER_INGRESS */
#endif /* _NETFILTER_INGRESS_H_ */
2 changes: 1 addition & 1 deletion include/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)

struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);

#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_INGRESS
void net_inc_ingress_queue(void);
void net_dec_ingress_queue(void);
#endif
Expand Down
6 changes: 6 additions & 0 deletions include/uapi/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,17 @@ enum nf_inet_hooks {
NF_INET_NUMHOOKS
};

enum nf_dev_hooks {
NF_NETDEV_INGRESS,
NF_NETDEV_NUMHOOKS
};

enum {
NFPROTO_UNSPEC = 0,
NFPROTO_INET = 1,
NFPROTO_IPV4 = 2,
NFPROTO_ARP = 3,
NFPROTO_NETDEV = 5,
NFPROTO_BRIDGE = 7,
NFPROTO_IPV6 = 10,
NFPROTO_DECNET = 12,
Expand Down
3 changes: 3 additions & 0 deletions net/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES
Newly written code should NEVER need this option but do
compat-independent messages instead!

config NET_INGRESS
bool

menu "Networking options"

source "net/packet/Kconfig"
Expand Down
43 changes: 40 additions & 3 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>

#include "net-sysfs.h"

Expand Down Expand Up @@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);

#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly;

void net_inc_ingress_queue(void)
Expand Down Expand Up @@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,

return skb;
}
#else
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
return skb;
}
#endif

/**
Expand Down Expand Up @@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
}
}

#ifdef CONFIG_NETFILTER_INGRESS
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
if (nf_hook_ingress_active(skb)) {
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}

return nf_hook_ingress(skb);
}
return 0;
}
#else
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
return 0;
}
#endif

static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
{
struct packet_type *ptype, *pt_prev;
Expand Down Expand Up @@ -3798,13 +3828,17 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
}

skip_taps:
#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto unlock;
}

if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto unlock;
}
#endif
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
ncls:
#endif
Expand Down Expand Up @@ -6967,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;

nf_hook_ingress_init(dev);

return dev;

free_all:
Expand Down
7 changes: 7 additions & 0 deletions net/netfilter/Kconfig
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
menu "Core Netfilter Configuration"
depends on NET && INET && NETFILTER

config NETFILTER_INGRESS
bool "Netfilter ingress support"
select NET_INGRESS
help
This allows you to classify packets from ingress using the Netfilter
infrastructure.

config NETFILTER_NETLINK
tristate

Expand Down
37 changes: 32 additions & 5 deletions net/netfilter/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);

int nf_register_hook(struct nf_hook_ops *reg)
{
struct list_head *nf_hook_list;
struct nf_hook_ops *elem;

mutex_lock(&nf_hook_mutex);
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
switch (reg->pf) {
case NFPROTO_NETDEV:
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS) {
BUG_ON(reg->dev == NULL);
nf_hook_list = &reg->dev->nf_hooks_ingress;
net_inc_ingress_queue();
break;
}
#endif
/* Fall through. */
default:
nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
break;
}

list_for_each_entry(elem, nf_hook_list, list) {
if (reg->priority < elem->priority)
break;
}
Expand All @@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex);
switch (reg->pf) {
case NFPROTO_NETDEV:
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS) {
net_dec_ingress_queue();
break;
}
break;
#endif
default:
break;
}
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
Expand Down Expand Up @@ -166,11 +195,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();

elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
struct nf_hook_ops, list);
elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
next_hook:
verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
&elem);
verdict = nf_iterate(state->hook_list, skb, state, &elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
Expand Down
1 change: 1 addition & 0 deletions net/sched/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ config NET_SCH_PIE
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
select NET_INGRESS
---help---
Say Y here if you want to use classifiers for incoming packets.
If unsure, say Y.
Expand Down

0 comments on commit 5a99e7f

Please sign in to comment.