Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

This a bit large (and late) patchset that contains Netfilter updates for
net-next. Most relevantly br_netfilter fixes, ipset RCU support, removal of
x_tables percpu ruleset copy and rework of the nf_tables netdev support. More
specifically, they are:

1) Warn the user when there is a better protocol conntracker available, from
   Marcelo Ricardo Leitner.

2) Fix forwarding of IPv6 fragmented traffic in br_netfilter, from Bernhard
   Thaler. This comes with several patches to prepare the change in first place.

3) Get rid of special mtu handling of PPPoE/VLAN frames for br_netfilter. This
   is not needed anymore since now we use the largest fragment size to
   refragment, from Florian Westphal.

4) Restore vlan tag when refragmenting in br_netfilter, also from Florian.

5) Get rid of the percpu ruleset copy in x_tables, from Florian. Plus another
   follow up patch to refine it from Eric Dumazet.

6) Several ipset cleanups, fixes and finally RCU support, from Jozsef Kadlecsik.

7) Get rid of parens in Netfilter Kconfig files.

8) Attach the net_device to the basechain as opposed to the initial per table
   approach in the nf_tables netdev family.

9) Subscribe to netdev events to detect the removal and registration of a
   device that is referenced by a basechain.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 15, 2015
2 parents 758f0d4 + 835b803 commit ada6c1d
Show file tree
Hide file tree
Showing 45 changed files with 1,972 additions and 1,718 deletions.
29 changes: 17 additions & 12 deletions include/linux/netfilter/ipset/ip_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,13 @@ struct ip_set_counter {
atomic64_t packets;
};

struct ip_set_comment_rcu {
struct rcu_head rcu;
char str[0];
};

struct ip_set_comment {
char *str;
struct ip_set_comment_rcu __rcu *c;
};

struct ip_set_skbinfo {
Expand Down Expand Up @@ -176,6 +181,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
/* Keep listing private when resizing runs parallel */
void (*uref)(struct ip_set *set, struct netlink_callback *cb,
bool start);

/* Return true if "b" set is the same as "a"
* according to the create set parameters */
Expand Down Expand Up @@ -223,7 +231,7 @@ struct ip_set {
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
rwlock_t lock;
spinlock_t lock;
/* References to the set */
u32 ref;
/* The core set type */
Expand Down Expand Up @@ -341,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
cpu_to_be64((u64)skbinfo->skbmark << 32 |
skbinfo->skbmarkmask))) ||
(skbinfo->skbprio &&
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
cpu_to_be32(skbinfo->skbprio))) ||
(skbinfo->skbqueue &&
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
cpu_to_be16(skbinfo->skbqueue)));

}

static inline void
Expand Down Expand Up @@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,

/* Netlink CB args */
enum {
IPSET_CB_NET = 0,
IPSET_CB_DUMP,
IPSET_CB_INDEX,
IPSET_CB_ARG0,
IPSET_CB_NET = 0, /* net namespace */
IPSET_CB_DUMP, /* dump single set/all sets */
IPSET_CB_INDEX, /* set index */
IPSET_CB_PRIVATE, /* set private data */
IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
IPSET_CB_ARG2,
};

/* register and unregister set references */
Expand Down Expand Up @@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \
.timeout = (set)->timeout }

#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))

#define IPSET_CONCAT(a, b) a##b
#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b)

Expand Down
38 changes: 27 additions & 11 deletions include/linux/netfilter/ipset/ip_set_comment.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
return nla_data(tb);
}

/* Called from uadd only, protected by the set spinlock.
* The kadt functions don't use the comment extensions in any way.
*/
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;

if (unlikely(comment->str)) {
kfree(comment->str);
comment->str = NULL;
if (unlikely(c)) {
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
comment->str = kzalloc(len + 1, GFP_ATOMIC);
if (unlikely(!comment->str))
c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
strlcpy(comment->str, ext->comment, len + 1);
strlcpy(c->str, ext->comment, len + 1);
rcu_assign_pointer(comment->c, c);
}

/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
if (!comment->str)
struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);

if (!c)
return 0;
return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}

/* Called from uadd/udel, flush or the garbage collectors protected
* by the set spinlock.
* Called when the set is destroyed and when there can't be any user
* of the set data anymore.
*/
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
{
if (unlikely(!comment->str))
struct ip_set_comment_rcu *c;

c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return;
kfree(comment->str);
comment->str = NULL;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}

#endif
Expand Down
27 changes: 11 additions & 16 deletions include/linux/netfilter/ipset/ip_set_timeout.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
}

static inline bool
ip_set_timeout_test(unsigned long timeout)
ip_set_timeout_expired(unsigned long *t)
{
return timeout == IPSET_ELEM_PERMANENT ||
time_is_after_jiffies(timeout);
}

static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
return *timeout != IPSET_ELEM_PERMANENT &&
time_is_before_jiffies(*timeout);
return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
}

static inline void
ip_set_timeout_set(unsigned long *timeout, u32 t)
ip_set_timeout_set(unsigned long *timeout, u32 value)
{
if (!t) {
unsigned long t;

if (!value) {
*timeout = IPSET_ELEM_PERMANENT;
return;
}

*timeout = msecs_to_jiffies(t * 1000) + jiffies;
if (*timeout == IPSET_ELEM_PERMANENT)
t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */
(*timeout)--;
t--;
*timeout = t;
}

static inline u32
ip_set_timeout_get(unsigned long *timeout)
{
return *timeout == IPSET_ELEM_PERMANENT ? 0 :
jiffies_to_msecs(*timeout - jiffies)/1000;
jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
}

#endif /* __KERNEL__ */
Expand Down
56 changes: 51 additions & 5 deletions include/linux/netfilter/x_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,10 @@ struct xt_table_info {
unsigned int stacksize;
unsigned int __percpu *stackptr;
void ***jumpstack;
/* ipt_entry tables: one per CPU */
/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
void *entries[1];

unsigned char entries[0] __aligned(8);
};

#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
+ nr_cpu_ids * sizeof(char *))
int xt_register_target(struct xt_target *target);
void xt_unregister_target(struct xt_target *target);
int xt_register_targets(struct xt_target *target, unsigned int n);
Expand Down Expand Up @@ -353,6 +350,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
return ret;
}


/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
* real (percpu) counter. On !SMP, its just the packet count,
* so nothing needs to be done there.
*
* xt_percpu_counter_alloc returns the address of the percpu
* counter, or 0 on !SMP.
*
* Hence caller must use IS_ERR_VALUE to check for error, this
* allows us to return 0 for single core systems without forcing
* callers to deal with SMP vs. NONSMP issues.
*/
static inline u64 xt_percpu_counter_alloc(void)
{
if (nr_cpu_ids > 1) {
void __percpu *res = alloc_percpu(struct xt_counters);

if (res == NULL)
return (u64) -ENOMEM;

return (__force u64) res;
}

return 0;
}
static inline void xt_percpu_counter_free(u64 pcnt)
{
if (nr_cpu_ids > 1)
free_percpu((void __percpu *) pcnt);
}

static inline struct xt_counters *
xt_get_this_cpu_counter(struct xt_counters *cnt)
{
if (nr_cpu_ids > 1)
return this_cpu_ptr((void __percpu *) cnt->pcnt);

return cnt;
}

static inline struct xt_counters *
xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
{
if (nr_cpu_ids > 1)
return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);

return cnt;
}

struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);

Expand Down
7 changes: 0 additions & 7 deletions include/linux/netfilter_bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_NF_BRIDGE_PREROUTING 0x08

static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
return PPPOE_SES_HLEN;
return 0;
}

int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);

static inline void br_drop_fake_rtable(struct sk_buff *skb)
Expand Down
3 changes: 3 additions & 0 deletions include/linux/netfilter_ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ void ipv6_netfilter_fini(void);
struct nf_ipv6_ops {
int (*chk_addr)(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
void (*route_input)(struct sk_buff *skb);
int (*fragment)(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *));
};

extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
Expand Down
7 changes: 6 additions & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <linux/sched.h>
#include <net/flow_dissector.h>
#include <linux/splice.h>
#include <linux/in6.h>

/* A. Checksumming of received packets by device.
*
Expand Down Expand Up @@ -173,13 +174,17 @@ struct nf_bridge_info {
BRNF_PROTO_PPPOE
} orig_proto:8;
bool pkt_otherhost;
__u16 frag_max_size;
unsigned int mask;
struct net_device *physindev;
union {
struct net_device *physoutdev;
char neigh_header[8];
};
__be32 ipv4_daddr;
union {
__be32 ipv4_daddr;
struct in6_addr ipv6_daddr;
};
};
#endif

Expand Down
11 changes: 9 additions & 2 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ struct nft_stats {
};

#define NFT_HOOK_OPS_MAX 2
#define NFT_BASECHAIN_DISABLED (1 << 0)

/**
* struct nft_base_chain - nf_tables base chain
Expand All @@ -791,21 +792,29 @@ struct nft_stats {
* @policy: default policy
* @stats: per-cpu chain stats
* @chain: the chain
* @dev_name: device name that this base chain is attached to (if any)
*/
struct nft_base_chain {
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
possible_net_t pnet;
const struct nf_chain_type *type;
u8 policy;
u8 flags;
struct nft_stats __percpu *stats;
struct nft_chain chain;
char dev_name[IFNAMSIZ];
};

static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
{
return container_of(chain, struct nft_base_chain, chain);
}

int nft_register_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops);
void nft_unregister_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops);

unsigned int nft_do_chain(struct nft_pktinfo *pkt,
const struct nf_hook_ops *ops);

Expand All @@ -819,7 +828,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @name: name of the table
* @dev: this table is bound to this device (if any)
*/
struct nft_table {
struct list_head list;
Expand All @@ -829,7 +837,6 @@ struct nft_table {
u32 use;
u16 flags;
char name[NFT_TABLE_MAXNAMELEN];
struct net_device *dev;
};

enum nft_af_flags {
Expand Down
6 changes: 3 additions & 3 deletions include/uapi/linux/netfilter/ipset/ip_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
/* The protocol version */
#define IPSET_PROTOCOL 6

/* The maximum permissible comment length we will accept over netlink */
#define IPSET_MAX_COMMENT_SIZE 255

/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32

/* The maximum permissible comment length we will accept over netlink */
#define IPSET_MAX_COMMENT_SIZE 255

/* Message types and commands */
enum ipset_cmd {
IPSET_CMD_NONE,
Expand Down
Loading

0 comments on commit ada6c1d

Please sign in to comment.