Skip to content

Commit

Permalink
Merge tag 'nf-next-23-04-22' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

1) Reduce jumpstack footprint: Stash chain in last rule marker in blob for
   tracing. Remove last rule and chain from jumpstack. From Florian Westphal.

2) nf_tables validates all tables before committing the new rules.
   Unfortunately, this has two drawbacks:

   - Since addition of the transaction mutex pernet state gets written to
     outside of the locked section from the cleanup callback, this is
     wrong so do this cleanup directly after table has passed all checks.

   - Revalidate tables that saw no changes. This can be avoided by
     keeping the validation state per table, not per netns.

   From Florian Westphal.

3) Get rid of a few redundant pointers in the traceinfo structure.
   The three removed pointers are used in the expression evaluation loop,
   so gcc keeps them in registers. Passing them to the (inlined) helpers
   thus doesn't increase nft_do_chain text size, while stack is reduced
   by another 24 bytes on 64bit arches. From Florian Westphal.

4) IPVS cleanups in several ways without implementing any functional
   changes, aside from removing some debugging output:

   - Update width of source for ip_vs_sync_conn_options
     The operation is safe, use an annotation to describe it properly.

   - Consistently use array_size() in ip_vs_conn_init()
     It seems better to use helpers consistently.

   - Remove {Enter,Leave}Function. These seem to be well past their
     use-by date.

   - Correct spelling in comments.

   From Simon Horman.

5) Extended netlink error report for netdevice in flowtables and
   netdev/chains. Allow for incrementally add/delete devices to netdev
   basechain. Allow to create netdev chain without device.

* tag 'nf-next-23-04-22' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: nf_tables: allow to create netdev chain without device
  netfilter: nf_tables: support for deleting devices in an existing netdev chain
  netfilter: nf_tables: support for adding new devices to an existing netdev chain
  netfilter: nf_tables: rename function to destroy hook list
  netfilter: nf_tables: do not send complete notification of deletions
  netfilter: nf_tables: extended netlink error reporting for netdevice
  ipvs: Correct spelling in comments
  ipvs: Remove {Enter,Leave}Function
  ipvs: Consistently use array_size() in ip_vs_conn_init()
  ipvs: Update width of source for ip_vs_sync_conn_options
  netfilter: nf_tables: do not store rule in traceinfo structure
  netfilter: nf_tables: do not store verdict in traceinfo structure
  netfilter: nf_tables: do not store pktinfo in traceinfo structure
  netfilter: nf_tables: remove unneeded conditional
  netfilter: nf_tables: make validation state per table
  netfilter: nf_tables: don't write table validation state without mutex
  netfilter: nf_tables: don't store chain address on jump
  netfilter: nf_tables: don't store address of last rule on jump
  netfilter: nf_tables: merge nft_rules_old structure and end of ruleblob marker
====================

Link: https://lore.kernel.org/r/20230421235021.216950-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Apr 24, 2023
2 parents 2efb07b + 207296f commit ffcddca
Show file tree
Hide file tree
Showing 12 changed files with 463 additions and 382 deletions.
1 change: 0 additions & 1 deletion include/linux/netfilter/nfnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ struct nfnetlink_subsystem {
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action);
void (*cleanup)(struct net *net);
bool (*valid_genid)(struct net *net, u32 genid);
};

Expand Down
32 changes: 7 additions & 25 deletions include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,26 +265,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
pr_err(msg, ##__VA_ARGS__); \
} while (0)

#ifdef CONFIG_IP_VS_DEBUG
#define EnterFunction(level) \
do { \
if (level <= ip_vs_get_debug_level()) \
printk(KERN_DEBUG \
pr_fmt("Enter: %s, %s line %i\n"), \
__func__, __FILE__, __LINE__); \
} while (0)
#define LeaveFunction(level) \
do { \
if (level <= ip_vs_get_debug_level()) \
printk(KERN_DEBUG \
pr_fmt("Leave: %s, %s line %i\n"), \
__func__, __FILE__, __LINE__); \
} while (0)
#else
#define EnterFunction(level) do {} while (0)
#define LeaveFunction(level) do {} while (0)
#endif

/* The port number of FTP service (in network order). */
#define FTPPORT cpu_to_be16(21)
#define FTPDATA cpu_to_be16(20)
Expand Down Expand Up @@ -604,7 +584,7 @@ struct ip_vs_conn {
spinlock_t lock; /* lock for state transition */
volatile __u16 state; /* state info */
volatile __u16 old_state; /* old state, to be used for
* state transition triggerd
* state transition triggered
* synchronization
*/
__u32 fwmark; /* Fire wall mark from skb */
Expand All @@ -630,8 +610,10 @@ struct ip_vs_conn {
*/
struct ip_vs_app *app; /* bound ip_vs_app object */
void *app_data; /* Application private data */
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
struct_group(sync_conn_opt,
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
);

const struct ip_vs_pe *pe;
char *pe_data;
Expand All @@ -653,7 +635,7 @@ struct ip_vs_service_user_kern {
u16 protocol;
union nf_inet_addr addr; /* virtual ip address */
__be16 port;
u32 fwmark; /* firwall mark of service */
u32 fwmark; /* firewall mark of service */

/* virtual service options */
char *sched_name;
Expand Down Expand Up @@ -1054,7 +1036,7 @@ struct netns_ipvs {
struct ipvs_sync_daemon_cfg bcfg; /* Backup Configuration */
/* net name space ptr */
struct net *net; /* Needed by timer routines */
/* Number of heterogeneous destinations, needed becaus heterogeneous
/* Number of heterogeneous destinations, needed because heterogeneous
* are not supported when synchronization is enabled.
*/
unsigned int mixed_address_family_dests;
Expand Down
35 changes: 24 additions & 11 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,18 @@ struct nft_rule_dp {
__attribute__((aligned(__alignof__(struct nft_expr))));
};

struct nft_rule_dp_last {
struct nft_rule_dp end; /* end of nft_rule_blob marker */
struct rcu_head h; /* call_rcu head */
struct nft_rule_blob *blob; /* ptr to free via call_rcu */
const struct nft_chain *chain; /* for nftables tracing */
};

static inline const struct nft_rule_dp *nft_rule_next(const struct nft_rule_dp *rule)
{
return (void *)rule + sizeof(*rule) + rule->dlen;
}

struct nft_rule_blob {
unsigned long size;
unsigned char data[]
Expand Down Expand Up @@ -1197,6 +1209,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @genmask: generation mask
* @afinfo: address family info
* @name: name of the table
* @validate_state: internal, set when transaction adds jumps
*/
struct nft_table {
struct list_head list;
Expand All @@ -1215,6 +1228,7 @@ struct nft_table {
char *name;
u16 udlen;
u8 *udata;
u8 validate_state;
};

static inline bool nft_table_has_owner(const struct nft_table *table)
Expand Down Expand Up @@ -1394,30 +1408,24 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
* @type: event type (enum nft_trace_types)
* @skbid: hash of skb to be used as trace id
* @packet_dumped: packet headers sent in a previous traceinfo message
* @pkt: pktinfo currently processed
* @basechain: base chain currently processed
* @chain: chain currently processed
* @rule: rule that was evaluated
* @verdict: verdict given by rule
*/
struct nft_traceinfo {
bool trace;
bool nf_trace;
bool packet_dumped;
enum nft_trace_types type:8;
u32 skbid;
const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
const struct nft_chain *chain;
const struct nft_rule_dp *rule;
const struct nft_verdict *verdict;
};

void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_chain *basechain);

void nft_trace_notify(struct nft_traceinfo *info);
void nft_trace_notify(const struct nft_pktinfo *pkt,
const struct nft_verdict *verdict,
const struct nft_rule_dp *rule,
struct nft_traceinfo *info);

#define MODULE_ALIAS_NFT_CHAIN(family, name) \
MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
Expand Down Expand Up @@ -1601,6 +1609,8 @@ struct nft_trans_chain {
struct nft_stats __percpu *stats;
u8 policy;
u32 chain_id;
struct nft_base_chain *basechain;
struct list_head hook_list;
};

#define nft_trans_chain_update(trans) \
Expand All @@ -1613,6 +1623,10 @@ struct nft_trans_chain {
(((struct nft_trans_chain *)trans->data)->policy)
#define nft_trans_chain_id(trans) \
(((struct nft_trans_chain *)trans->data)->chain_id)
#define nft_trans_basechain(trans) \
(((struct nft_trans_chain *)trans->data)->basechain)
#define nft_trans_chain_hooks(trans) \
(((struct nft_trans_chain *)trans->data)->hook_list)

struct nft_trans_table {
bool update;
Expand Down Expand Up @@ -1688,7 +1702,6 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
u8 validate_state;
};

extern unsigned int nf_tables_net_id;
Expand Down
12 changes: 6 additions & 6 deletions net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,7 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)

int __init ip_vs_conn_init(void)
{
size_t tab_array_size;
int idx;

/* Compute size and mask */
Expand All @@ -1494,8 +1495,9 @@ int __init ip_vs_conn_init(void)
/*
* Allocate the connection hash table and initialize its list heads
*/
ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size,
sizeof(*ip_vs_conn_tab)));
tab_array_size = array_size(ip_vs_conn_tab_size,
sizeof(*ip_vs_conn_tab));
ip_vs_conn_tab = vmalloc(tab_array_size);
if (!ip_vs_conn_tab)
return -ENOMEM;

Expand All @@ -1508,10 +1510,8 @@ int __init ip_vs_conn_init(void)
return -ENOMEM;
}

pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
(long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
pr_info("Connection hash table configured (size=%d, memory=%zdKbytes)\n",
ip_vs_conn_tab_size, tab_array_size / 1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));

Expand Down
8 changes: 0 additions & 8 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
__be16 vport;
unsigned int flags;

EnterFunction(12);
vaddr = &svc->addr;
vport = svc->port;
daddr = &iph->saddr;
Expand Down Expand Up @@ -1208,7 +1207,6 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
cp->flags, refcount_read(&cp->refcnt));
LeaveFunction(12);
return cp;
}

Expand Down Expand Up @@ -1316,13 +1314,11 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
ip_vs_update_conntrack(skb, cp, 0);
ip_vs_conn_put(cp);

LeaveFunction(11);
return NF_ACCEPT;

drop:
ip_vs_conn_put(cp);
kfree_skb(skb);
LeaveFunction(11);
return NF_STOLEN;
}

Expand All @@ -1341,8 +1337,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
int af = state->pf;
struct sock *sk;

EnterFunction(11);

/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return NF_ACCEPT;
Expand Down Expand Up @@ -2365,7 +2359,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
struct netns_ipvs *ipvs;
struct net *net;

EnterFunction(2);
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
ip_vs_unregister_hooks(ipvs, AF_INET);
Expand All @@ -2374,7 +2367,6 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
LeaveFunction(2);
}

static struct pernet_operations ipvs_core_ops = {
Expand Down
26 changes: 1 addition & 25 deletions net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1061,8 +1061,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
unsigned int atype;
int ret;

EnterFunction(2);

#ifdef CONFIG_IP_VS_IPV6
if (udest->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
Expand Down Expand Up @@ -1111,7 +1109,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
spin_lock_init(&dest->dst_lock);
__ip_vs_update_dest(svc, dest, udest, 1);

LeaveFunction(2);
return 0;

err_stats:
Expand All @@ -1134,8 +1131,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
__be16 dport = udest->port;
int ret;

EnterFunction(2);

if (udest->weight < 0) {
pr_err("%s(): server weight less than zero\n", __func__);
return -ERANGE;
Expand Down Expand Up @@ -1183,7 +1178,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)

ret = ip_vs_start_estimator(svc->ipvs, &dest->stats);
if (ret < 0)
goto err;
return ret;
__ip_vs_update_dest(svc, dest, udest, 1);
} else {
/*
Expand All @@ -1192,9 +1187,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ret = ip_vs_new_dest(svc, udest);
}

err:
LeaveFunction(2);

return ret;
}

Expand All @@ -1209,8 +1201,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
union nf_inet_addr daddr;
__be16 dport = udest->port;

EnterFunction(2);

if (udest->weight < 0) {
pr_err("%s(): server weight less than zero\n", __func__);
return -ERANGE;
Expand Down Expand Up @@ -1242,7 +1232,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
}

__ip_vs_update_dest(svc, dest, udest, 0);
LeaveFunction(2);

return 0;
}
Expand Down Expand Up @@ -1317,8 +1306,6 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
struct ip_vs_dest *dest;
__be16 dport = udest->port;

EnterFunction(2);

/* We use function that requires RCU lock */
rcu_read_lock();
dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
Expand All @@ -1339,8 +1326,6 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
*/
__ip_vs_del_dest(svc->ipvs, dest, false);

LeaveFunction(2);

return 0;
}

Expand Down Expand Up @@ -1746,15 +1731,13 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list)
struct netns_ipvs *ipvs;
struct net *net;

EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
ip_vs_flush(ipvs, true);
}
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}

/* Put all references for device (dst_cache) */
Expand Down Expand Up @@ -1792,7 +1775,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
if (event != NETDEV_DOWN || !ipvs)
return NOTIFY_DONE;
IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
EnterFunction(2);
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
Expand Down Expand Up @@ -1821,7 +1803,6 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
}
spin_unlock_bh(&ipvs->dest_trash_lock);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
return NOTIFY_DONE;
}

Expand Down Expand Up @@ -4537,8 +4518,6 @@ int __init ip_vs_control_init(void)
int idx;
int ret;

EnterFunction(2);

/* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
Expand All @@ -4551,15 +4530,12 @@ int __init ip_vs_control_init(void)
if (ret < 0)
return ret;

LeaveFunction(2);
return 0;
}


void ip_vs_control_cleanup(void)
{
EnterFunction(2);
unregister_netdevice_notifier(&ip_vs_dst_notifier);
/* relying on common rcu_barrier() in ip_vs_cleanup() */
LeaveFunction(2);
}
Loading

0 comments on commit ffcddca

Please sign in to comment.