Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS for net-next:

1) Add new run_estimation toggle to IPVS to stop the estimation_timer
   logic, from Dust Li.

2) Relax superfluous dynset check on NFT_SET_TIMEOUT.

3) Add egress hook, from Lukas Wunner.

4) Nowadays, almost all hook functions in x_table land just call the hook
   evaluation loop. Remove remaining hook wrappers from iptables and IPVS.
   From Florian Westphal.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Oct 18, 2021
2 parents c87350c + ffdd33d commit 7adaf56
Show file tree
Hide file tree
Showing 40 changed files with 389 additions and 349 deletions.
11 changes: 11 additions & 0 deletions Documentation/networking/ipvs-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,14 @@ sync_version - INTEGER

Kernels with this sync_version entry are able to receive messages
of both version 1 and version 2 of the synchronisation protocol.

run_estimation - BOOLEAN
0 - disabled
not 0 - enabled (default)

If disabled, the estimation will be stop, and you can't see
any update on speed estimation data.

You can always re-enable estimation by setting this value to 1.
But be careful, the first estimation after re-enable is not
accurate.
3 changes: 3 additions & 0 deletions drivers/net/ifb.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_netdev.h>
#include <net/pkt_sched.h>
#include <net/net_namespace.h>

Expand Down Expand Up @@ -75,8 +76,10 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
}

while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
/* Skip tc and netfilter to prevent redirection loop. */
skb->redirected = 0;
skb->tc_skip_classify = 1;
nf_skip_egress(skb, true);

u64_stats_update_begin(&txp->tsync);
txp->tx_packets++;
Expand Down
4 changes: 4 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type {
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
* egress processing
* @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
Expand Down Expand Up @@ -2160,6 +2161,9 @@ struct net_device {
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
#endif
#ifdef CONFIG_NETFILTER_EGRESS
struct nf_hook_entries __rcu *nf_hooks_egress;
#endif

#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
Expand Down
5 changes: 2 additions & 3 deletions include/linux/netfilter_arp/arp_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ int arpt_register_table(struct net *net, const struct xt_table *table,
const struct nf_hook_ops *ops);
void arpt_unregister_table(struct net *net, const char *name);
void arpt_unregister_table_pre_exit(struct net *net, const char *name);
extern unsigned int arpt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);

#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
Expand Down
5 changes: 2 additions & 3 deletions include/linux/netfilter_bridge/ebtables.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,8 @@ extern int ebt_register_table(struct net *net,
const struct nf_hook_ops *ops);
extern void ebt_unregister_table(struct net *net, const char *tablename);
void ebt_unregister_table_pre_exit(struct net *net, const char *tablename);
extern unsigned int ebt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct ebt_table *table);
extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);

/* True if the hook mask denotes that the rule is in a base chain,
* used in the check() functions */
Expand Down
58 changes: 0 additions & 58 deletions include/linux/netfilter_ingress.h

This file was deleted.

6 changes: 3 additions & 3 deletions include/linux/netfilter_ipv4/ip_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ struct ipt_error {
}

extern void *ipt_alloc_initial_table(const struct xt_table *);
extern unsigned int ipt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
extern unsigned int ipt_do_table(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state);

#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
Expand Down
5 changes: 2 additions & 3 deletions include/linux/netfilter_ipv6/ip6_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
const struct nf_hook_ops *ops);
void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
void ip6t_unregister_table_exit(struct net *net, const char *name);
extern unsigned int ip6t_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);

#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
Expand Down
146 changes: 146 additions & 0 deletions include/linux/netfilter_netdev.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NETFILTER_NETDEV_H_
#define _NETFILTER_NETDEV_H_

#include <linux/netfilter.h>
#include <linux/netdevice.h>

#ifdef CONFIG_NETFILTER_INGRESS
static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
{
#ifdef CONFIG_JUMP_LABEL
if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
return false;
#endif
return rcu_access_pointer(skb->dev->nf_hooks_ingress);
}

/* caller must hold rcu_read_lock */
static inline int nf_hook_ingress(struct sk_buff *skb)
{
struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
struct nf_hook_state state;
int ret;

/* Must recheck the ingress hook head, in the event it became NULL
* after the check in nf_hook_ingress_active evaluated to true.
*/
if (unlikely(!e))
return 0;

nf_hook_state_init(&state, NF_NETDEV_INGRESS,
NFPROTO_NETDEV, skb->dev, NULL, NULL,
dev_net(skb->dev), NULL);
ret = nf_hook_slow(skb, &state, e, 0);
if (ret == 0)
return -1;

return ret;
}

#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
{
return 0;
}

static inline int nf_hook_ingress(struct sk_buff *skb)
{
return 0;
}
#endif /* CONFIG_NETFILTER_INGRESS */

#ifdef CONFIG_NETFILTER_EGRESS
static inline bool nf_hook_egress_active(void)
{
#ifdef CONFIG_JUMP_LABEL
if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS]))
return false;
#endif
return true;
}

/**
* nf_hook_egress - classify packets before transmission
* @skb: packet to be classified
* @rc: result code which shall be returned by __dev_queue_xmit() on failure
* @dev: netdev whose egress hooks shall be applied to @skb
*
* Returns @skb on success or %NULL if the packet was consumed or filtered.
* Caller must hold rcu_read_lock.
*
* On ingress, packets are classified first by tc, then by netfilter.
* On egress, the order is reversed for symmetry. Conceptually, tc and
* netfilter can be thought of as layers, with netfilter layered above tc:
* When tc redirects a packet to another interface, netfilter is not applied
* because the packet is on the tc layer.
*
* The nf_skip_egress flag controls whether netfilter is applied on egress.
* It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the
* packet passes through tc and netfilter. Because __dev_queue_xmit() may be
* called recursively by tunnel drivers such as vxlan, the flag is reverted to
* false after sch_handle_egress(). This ensures that netfilter is applied
* both on the overlay and underlying network.
*/
static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
struct net_device *dev)
{
struct nf_hook_entries *e;
struct nf_hook_state state;
int ret;

#ifdef CONFIG_NETFILTER_SKIP_EGRESS
if (skb->nf_skip_egress)
return skb;
#endif

e = rcu_dereference(dev->nf_hooks_egress);
if (!e)
return skb;

nf_hook_state_init(&state, NF_NETDEV_EGRESS,
NFPROTO_NETDEV, dev, NULL, NULL,
dev_net(dev), NULL);
ret = nf_hook_slow(skb, &state, e, 0);

if (ret == 1) {
return skb;
} else if (ret < 0) {
*rc = NET_XMIT_DROP;
return NULL;
} else { /* ret == 0 */
*rc = NET_XMIT_SUCCESS;
return NULL;
}
}
#else /* CONFIG_NETFILTER_EGRESS */
static inline bool nf_hook_egress_active(void)
{
return false;
}

static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
struct net_device *dev)
{
return skb;
}
#endif /* CONFIG_NETFILTER_EGRESS */

static inline void nf_skip_egress(struct sk_buff *skb, bool skip)
{
#ifdef CONFIG_NETFILTER_SKIP_EGRESS
skb->nf_skip_egress = skip;
#endif
}

static inline void nf_hook_netdev_init(struct net_device *dev)
{
#ifdef CONFIG_NETFILTER_INGRESS
RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
#endif
#ifdef CONFIG_NETFILTER_EGRESS
RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
#endif
}

#endif /* _NETFILTER_NETDEV_H_ */
4 changes: 4 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ typedef unsigned char *sk_buff_data_t;
* @tc_at_ingress: used within tc_classify to distinguish in/egress
* @redirected: packet was redirected by packet classifier
* @from_ingress: packet was redirected from the ingress path
* @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
Expand Down Expand Up @@ -868,6 +869,9 @@ struct sk_buff {
#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1;
#endif
#ifdef CONFIG_NETFILTER_SKIP_EGRESS
__u8 nf_skip_egress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
#endif
Expand Down
11 changes: 11 additions & 0 deletions include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,7 @@ struct netns_ipvs {
int sysctl_conn_reuse_mode;
int sysctl_schedule_icmp;
int sysctl_ignore_tunneled;
int sysctl_run_estimation;

/* ip_vs_lblc */
int sysctl_lblc_expiration;
Expand Down Expand Up @@ -1071,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
return ipvs->sysctl_cache_bypass;
}

static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_run_estimation;
}

#else

static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
Expand Down Expand Up @@ -1163,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
return 0;
}

static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
{
return 1;
}

#endif

/* IPVS core functions
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ enum nf_inet_hooks {

enum nf_dev_hooks {
NF_NETDEV_INGRESS,
NF_NETDEV_EGRESS,
NF_NETDEV_NUMHOOKS
};

Expand Down
2 changes: 1 addition & 1 deletion net/bridge/netfilter/ebtable_broute.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
NFPROTO_BRIDGE, s->in, NULL, NULL,
s->net, NULL);

ret = ebt_do_table(skb, &state, priv);
ret = ebt_do_table(priv, skb, &state);
if (ret != NF_DROP)
return ret;

Expand Down
Loading

0 comments on commit 7adaf56

Please sign in to comment.