Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter/IPVS/OVS fixes for net

The following patchset contains a rather large batch of Netfilter, IPVS
and OVS fixes for your net tree. This includes fixes for ctnetlink, the
userspace conntrack helper infrastructure, conntrack OVS support,
ebtables DNAT target, several leaks in error path among other. More
specifically, they are:

1) Fix reference count leak in the CT target error path, from Gao Feng.

2) Remove conntrack entry clashing with a matching expectation, patch
   from Jarno Rajahalme.

3) Fix bogus EEXIST when registering two different userspace helpers,
   from Liping Zhang.

4) Don't leak dummy elements in the new bitmap set type in nf_tables,
   from Liping Zhang.

5) Get rid of module autoload from conntrack update path in ctnetlink,
   we don't need autoload at this late stage and it is happening with
   rcu read lock held which is not good. From Liping Zhang.

6) Fix deadlock due to double-acquire of the expect_lock from conntrack
   update path, this fixes a bug that was introduced when the central
   spinlock got removed. Again from Liping Zhang.

7) Safe ct->status update from ctnetlink path, from Liping. The expect_lock
   protection that was selected when the central spinlock was removed was
   not really protecting anything at all.

8) Protect sequence adjustment under ct->lock.

9) Missing socket match with IPv6, from Peter Tirsek.

10) Adjust skb->pkt_type of DNAT'ed frames from ebtables, from
    Linus Luessing.

11) Don't give up on evaluating the expression on new entries added via
    dynset expression in nf_tables, from Liping Zhang.

12) Use skb_checksum() when mangling icmpv6 in IPv6 NAT as this deals
    with non-linear skbuffs.

13) Don't allow IPv6 service in IPVS if no IPv6 support is available,
    from Paolo Abeni.

14) Missing mutex release in error path of xt_find_table_lock(), from
    Dan Carpenter.

15) Update maintainers files, Netfilter section. Add Florian to the
    file, refer to nftables.org and change project status from Supported
    to Maintained.

16) Bail out on mismatching extensions in element updates in nf_tables.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 3, 2017
2 parents ab71632 + 9744a6f commit 4d89ac2
Show file tree
Hide file tree
Showing 14 changed files with 174 additions and 64 deletions.
4 changes: 3 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -8747,14 +8747,16 @@ F: drivers/net/ethernet/neterion/
NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
M: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
M: Florian Westphal <fw@strlen.de>
L: netfilter-devel@vger.kernel.org
L: coreteam@netfilter.org
W: http://www.netfilter.org/
W: http://www.iptables.org/
W: http://www.nftables.org/
Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
S: Supported
S: Maintained
F: include/linux/netfilter*
F: include/linux/netfilter/
F: include/net/netfilter/
Expand Down
13 changes: 9 additions & 4 deletions include/uapi/linux/netfilter/nf_conntrack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ enum ip_conntrack_status {
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),

/* Bits that cannot be altered from userland. */
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING),

/* Connection has fixed timeout. */
IPS_FIXED_TIMEOUT_BIT = 10,
IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
Expand All @@ -103,6 +99,15 @@ enum ip_conntrack_status {
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),

/* Be careful here, modifying these bits can make things messy,
* so don't let users modify them directly.
*/
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
IPS_SEQ_ADJUST | IPS_TEMPLATE),

__IPS_MAX_BIT = 14,
};

/* Connection tracking event types */
Expand Down
20 changes: 20 additions & 0 deletions net/bridge/netfilter/ebt_dnat.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/
#include <linux/module.h>
#include <net/sock.h>
#include "../br_private.h"
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
Expand All @@ -18,11 +19,30 @@ static unsigned int
ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nat_info *info = par->targinfo;
struct net_device *dev;

if (!skb_make_writable(skb, 0))
return EBT_DROP;

ether_addr_copy(eth_hdr(skb)->h_dest, info->mac);

if (is_multicast_ether_addr(info->mac)) {
if (is_broadcast_ether_addr(info->mac))
skb->pkt_type = PACKET_BROADCAST;
else
skb->pkt_type = PACKET_MULTICAST;
} else {
if (xt_hooknum(par) != NF_BR_BROUTING)
dev = br_port_get_rcu(xt_in(par))->br->dev;
else
dev = xt_in(par);

if (ether_addr_equal(info->mac, dev->dev_addr))
skb->pkt_type = PACKET_HOST;
else
skb->pkt_type = PACKET_OTHERHOST;
}

return info->target;
}

Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
inside->icmp6.icmp6_cksum =
csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
skb->len - hdrlen, IPPROTO_ICMPV6,
csum_partial(&inside->icmp6,
skb_checksum(skb, hdrlen,
skb->len - hdrlen, 0));
}

Expand Down
22 changes: 17 additions & 5 deletions net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -3078,6 +3078,17 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
return skb->len;
}

static bool ip_vs_is_af_valid(int af)
{
if (af == AF_INET)
return true;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6 && ipv6_mod_enabled())
return true;
#endif
return false;
}

static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
Expand Down Expand Up @@ -3105,11 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
memset(usvc, 0, sizeof(*usvc));

usvc->af = nla_get_u16(nla_af);
#ifdef CONFIG_IP_VS_IPV6
if (usvc->af != AF_INET && usvc->af != AF_INET6)
#else
if (usvc->af != AF_INET)
#endif
if (!ip_vs_is_af_valid(usvc->af))
return -EAFNOSUPPORT;

if (nla_fwmark) {
Expand Down Expand Up @@ -3612,6 +3619,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (udest.af == 0)
udest.af = svc->af;

if (!ip_vs_is_af_valid(udest.af)) {
ret = -EAFNOSUPPORT;
goto out;
}

if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
/* The synchronization protocol is incompatible
* with mixed family services
Expand Down
26 changes: 21 additions & 5 deletions net/netfilter/nf_conntrack_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
unsigned int h = helper_hash(&me->tuple);
struct nf_conntrack_helper *cur;
int ret = 0;
int ret = 0, i;

BUG_ON(me->expect_policy == NULL);
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
Expand All @@ -395,10 +395,26 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
return -EINVAL;

mutex_lock(&nf_ct_helper_mutex);
hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) {
ret = -EEXIST;
goto out;
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) {
if (!strcmp(cur->name, me->name) &&
(cur->tuple.src.l3num == NFPROTO_UNSPEC ||
cur->tuple.src.l3num == me->tuple.src.l3num) &&
cur->tuple.dst.protonum == me->tuple.dst.protonum) {
ret = -EEXIST;
goto out;
}
}
}

/* avoid unpredictable behaviour for auto_assign_helper */
if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) {
hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple,
&mask)) {
ret = -EEXIST;
goto out;
}
}
}
hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
Expand Down
89 changes: 49 additions & 40 deletions net/netfilter/nf_conntrack_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,24 +417,28 @@ dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
return -1;
}

static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
const struct nf_conn *ct)
static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *seq;

if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;

spin_lock_bh(&ct->lock);
seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
return -1;
goto err;

seq = &seqadj->seq[IP_CT_DIR_REPLY];
if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
return -1;
goto err;

spin_unlock_bh(&ct->lock);
return 0;
err:
spin_unlock_bh(&ct->lock);
return -1;
}

static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
Expand Down Expand Up @@ -1417,6 +1421,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
}
#endif

static void
__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
unsigned long off)
{
unsigned int bit;

/* Ignore these unchangable bits */
on &= ~IPS_UNCHANGEABLE_MASK;
off &= ~IPS_UNCHANGEABLE_MASK;

for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
if (on & (1 << bit))
set_bit(bit, &ct->status);
else if (off & (1 << bit))
clear_bit(bit, &ct->status);
}
}

static int
ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
{
Expand All @@ -1436,10 +1458,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* ASSURED bit can only be set */
return -EBUSY;

/* Be careful here, modifying NAT bits can screw up things,
* so don't let users modify them directly if they don't pass
* nf_nat_range. */
ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
__ctnetlink_change_status(ct, status, 0);
return 0;
}

Expand Down Expand Up @@ -1508,23 +1527,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
return 0;
}

rcu_read_lock();
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
spin_unlock_bh(&nf_conntrack_expect_lock);

if (request_module("nfct-helper-%s", helpname) < 0) {
spin_lock_bh(&nf_conntrack_expect_lock);
return -EOPNOTSUPP;
}

spin_lock_bh(&nf_conntrack_expect_lock);
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper)
return -EAGAIN;
#endif
rcu_read_unlock();
return -EOPNOTSUPP;
}

Expand All @@ -1533,13 +1540,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
/* update private helper data if allowed. */
if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
return 0;
err = 0;
} else
return -EBUSY;
err = -EBUSY;
} else {
/* we cannot set a helper for an existing conntrack */
err = -EOPNOTSUPP;
}

/* we cannot set a helper for an existing conntrack */
return -EOPNOTSUPP;
rcu_read_unlock();
return err;
}

static int ctnetlink_change_timeout(struct nf_conn *ct,
Expand Down Expand Up @@ -1630,25 +1640,30 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
if (!seqadj)
return 0;

spin_lock_bh(&ct->lock);
if (cda[CTA_SEQ_ADJ_ORIG]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
return ret;
goto err;

ct->status |= IPS_SEQ_ADJUST;
set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}

if (cda[CTA_SEQ_ADJ_REPLY]) {
ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
return ret;
goto err;

ct->status |= IPS_SEQ_ADJUST;
set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
}

spin_unlock_bh(&ct->lock);
return 0;
err:
spin_unlock_bh(&ct->lock);
return ret;
}

static int
Expand Down Expand Up @@ -1959,9 +1974,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
spin_lock_bh(&nf_conntrack_expect_lock);
err = ctnetlink_change_conntrack(ct, cda);
spin_unlock_bh(&nf_conntrack_expect_lock);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
(1 << IPCT_ASSURED) |
Expand Down Expand Up @@ -2294,10 +2307,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
/* This check is less strict than ctnetlink_change_status()
* because callers often flip IPS_EXPECTED bits when sending
* an NFQA_CT attribute to the kernel. So ignore the
* unchangeable bits but do not error out.
* unchangeable bits but do not error out. Also user programs
* are allowed to clear the bits that they are allowed to change.
*/
ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
(ct->status & IPS_UNCHANGEABLE_MASK);
__ctnetlink_change_status(ct, status, ~status);
return 0;
}

Expand Down Expand Up @@ -2351,11 +2364,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
if (ret < 0)
return ret;

spin_lock_bh(&nf_conntrack_expect_lock);
ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
spin_unlock_bh(&nf_conntrack_expect_lock);

return ret;
return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
}

static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
Expand Down
5 changes: 5 additions & 0 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -3778,6 +3778,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = set->ops->insert(ctx->net, set, &elem, &ext2);
if (err) {
if (err == -EEXIST) {
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
return -EBUSY;
if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
memcmp(nft_set_ext_data(ext),
Expand Down
5 changes: 2 additions & 3 deletions net/netfilter/nft_dynset.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = jiffies + timeout;
} else if (sexpr == NULL)
goto out;
}

if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
Expand All @@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
return;
}
out:

if (!priv->invert)
regs->verdict.code = NFT_BREAK;
}
Expand Down
Loading

0 comments on commit 4d89ac2

Please sign in to comment.