Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains a larger than usual batch of Netfilter
fixes for your net tree. This series contains a mixture of old bugs and
recently introduced bugs, they are:

1) Fix a crash when using nft_dynset with nft_set_rbtree, which doesn't
   support the set element updates from the packet path. From Liping
   Zhang.

2) Fix leak when nft_expr_clone() fails, from Liping Zhang.

3) Fix a race when inserting new elements to the set hash from the
   packet path, also from Liping.

4) Handle segmented TCP SIP packets properly, basically avoid that the
   INVITE in the allow header create bogus expectations by performing
   stricter SIP message parsing, from Ulrich Weber.

5) nft_parse_u32_check() should return signed integer for errors, from
   John Linville.

6) Fix wrong allocation instead of connlabels, allocate 16 instead of
   32 bytes, from Florian Westphal.

7) Fix compilation breakage when building the ip_vs_sync code with
   CONFIG_OPTIMIZE_INLINING on x86, from Arnd Bergmann.

8) Destroy the new set if the transaction object cannot be allocated,
   also from Liping Zhang.

9) Use device to route duplicated packets via nft_dup only when set by
   the user, otherwise packets may not follow the right route, again
   from Liping.

10) Fix wrong maximum genetlink attribute definition in IPVS, from
    WANG Cong.

11) Ignore untracked conntrack objects from xt_connmark, from Florian
    Westphal.

12) Allow to use conntrack helpers that are registered NFPROTO_UNSPEC
    via CT target, otherwise we cannot use the h.245 helper, from
    Florian.

13) Revisit garbage collection heuristic in the new workqueue-based
    timer approach for conntrack to evict objects earlier, again from
    Florian.

14) Fix crash in nf_tables when inserting an element into a verdict map,
    from Liping Zhang.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Nov 10, 2016
2 parents f567e95 + 58c78e1 commit 9fa684e
Show file tree
Hide file tree
Showing 14 changed files with 114 additions and 45 deletions.
3 changes: 1 addition & 2 deletions include/net/netfilter/nf_conntrack_labels.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
if (net->ct.labels_used == 0)
return NULL;

return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
sizeof(struct nf_conn_labels), GFP_ATOMIC);
return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);
#else
return NULL;
#endif
Expand Down
8 changes: 5 additions & 3 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
}

unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
unsigned int nft_parse_register(const struct nlattr *attr);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);

Expand Down Expand Up @@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
u64 timeout, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);

/**
* struct nft_set_gc_batch_head - nf_tables set garbage collection batch
Expand Down Expand Up @@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
int err;

__module_get(src->ops->type->owner);
if (src->ops->clone) {
dst->ops = src->ops;
err = src->ops->clone(dst, src);
Expand All @@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
} else {
memcpy(dst, src, src->ops->size);
}

__module_get(src->ops->type->owner);
return 0;
}

Expand Down
6 changes: 4 additions & 2 deletions net/ipv4/netfilter/nft_dup_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct in_addr gw = {
.s_addr = (__force __be32)regs->data[priv->sreg_addr],
};
int oif = regs->data[priv->sreg_dev];
int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;

nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
Expand Down Expand Up @@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);

if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
goto nla_put_failure;
if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;

Expand Down
6 changes: 4 additions & 2 deletions net/ipv6/netfilter/nft_dup_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
int oif = regs->data[priv->sreg_dev];
int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;

nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
Expand Down Expand Up @@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);

if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
goto nla_put_failure;
if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;

Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = {
.hdrsize = 0,
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
.maxattr = IPVS_CMD_MAX,
.maxattr = IPVS_CMD_ATTR_MAX,
.netnsok = true, /* Make ipvsadm to work on netns */
};

Expand Down
7 changes: 5 additions & 2 deletions net/netfilter/ipvs/ip_vs_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ struct ip_vs_sync_buff {
*/
static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
{
memset(ho, 0, sizeof(*ho));
ho->init_seq = get_unaligned_be32(&no->init_seq);
ho->delta = get_unaligned_be32(&no->delta);
ho->previous_delta = get_unaligned_be32(&no->previous_delta);
Expand Down Expand Up @@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa
kfree(param->pe_data);
}

if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
if (opt) {
cp->in_seq = opt->in_seq;
cp->out_seq = opt->out_seq;
}
atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
Expand Down
49 changes: 41 additions & 8 deletions net/netfilter/nf_conntrack_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,19 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
long next_gc_run;
};

static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;

/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
#define GC_MAX_BUCKETS 8192u
#define GC_INTERVAL (5 * HZ)
/* upper bound of scan intervals */
#define GC_INTERVAL_MAX (2 * HZ)
/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u

static struct conntrack_gc_work conntrack_gc_work;
Expand Down Expand Up @@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
unsigned int i, goal, buckets = 0, expired_count = 0;
unsigned long next_run = GC_INTERVAL;
unsigned int ratio, scanned = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
unsigned long next_run;

gc_work = container_of(work, struct conntrack_gc_work, dwork.work);

goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;

do {
Expand Down Expand Up @@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
if (gc_work->exiting)
return;

/*
* Eviction will normally happen from the packet path, and not
* from this gc worker.
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
*
* The heuristics below are supposed to balance conflicting goals:
*
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
* Normally, expired_count will be 0, this increases the next_run time
* to priorize 2) above.
*
* As soon as a timed-out entry is found, move towards 1) and increase
* the scan frequency.
* In case we have lots of evictions next scan is done immediately.
*/
ratio = scanned ? expired_count * 100 / scanned : 0;
if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
gc_work->next_gc_run = 0;
next_run = 0;
} else if (expired_count) {
gc_work->next_gc_run /= 2U;
next_run = msecs_to_jiffies(1);
} else {
if (gc_work->next_gc_run < GC_INTERVAL_MAX)
gc_work->next_gc_run += msecs_to_jiffies(1);

next_run = gc_work->next_gc_run;
}

gc_work->last_bucket = i;
schedule_delayed_work(&gc_work->dwork, next_run);
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}

static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->exiting = false;
}

Expand Down Expand Up @@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);

conntrack_gc_work_init(&conntrack_gc_work);
schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);

return 0;

Expand Down
11 changes: 8 additions & 3 deletions net/netfilter/nf_conntrack_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)

for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
if (!strcmp(h->name, name) &&
h->tuple.src.l3num == l3num &&
h->tuple.dst.protonum == protonum)
if (strcmp(h->name, name))
continue;

if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
h->tuple.src.l3num != l3num)
continue;

if (h->tuple.dst.protonum == protonum)
return h;
}
}
Expand Down
5 changes: 4 additions & 1 deletion net/netfilter/nf_conntrack_sip.c
Original file line number Diff line number Diff line change
Expand Up @@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
if (*datalen < handler->len ||
if (*datalen < handler->len + 2 ||
strncasecmp(*dptr, handler->method, handler->len))
continue;
if ((*dptr)[handler->len] != ' ' ||
!isalpha((*dptr)[handler->len+1]))
continue;

if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
&matchoff, &matchlen) <= 0) {
Expand Down
18 changes: 11 additions & 7 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,

err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err2;
goto err3;

list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;

err3:
ops->destroy(set);
err2:
kfree(set);
err1:
Expand Down Expand Up @@ -3452,14 +3454,15 @@ void *nft_set_elem_init(const struct nft_set *set,
return elem;
}

void nft_set_elem_destroy(const struct nft_set *set, void *elem)
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);

nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));

kfree(elem);
Expand Down Expand Up @@ -3565,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
.net = ctx->net,
.afi = ctx->afi,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
Expand Down Expand Up @@ -3812,7 +3816,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)

gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
for (i = 0; i < gcb->head.cnt; i++)
nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
Expand Down Expand Up @@ -4030,7 +4034,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
Expand Down Expand Up @@ -4171,7 +4175,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
Expand Down Expand Up @@ -4421,7 +4425,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
* Otherwise a 0 is returned and the attribute value is stored in the
* destination variable.
*/
unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
{
u32 val;

Expand Down
19 changes: 13 additions & 6 deletions net/netfilter/nft_dynset.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_data],
timeout, GFP_ATOMIC);
if (elem == NULL) {
if (set->size)
atomic_dec(&set->nelems);
return NULL;
}
if (elem == NULL)
goto err1;

ext = nft_set_elem_ext(set, elem);
if (priv->expr != NULL &&
nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
return NULL;
goto err2;

return elem;

err2:
nft_set_elem_destroy(set, elem, false);
err1:
if (set->size)
atomic_dec(&set->nelems);
return NULL;
}

static void nft_dynset_eval(const struct nft_expr *expr,
Expand Down Expand Up @@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}

if (set->ops->update == NULL)
return -EOPNOTSUPP;

if (set->flags & NFT_SET_CONSTANT)
return -EBUSY;

Expand Down
Loading

0 comments on commit 9fa684e

Please sign in to comment.