Skip to content

Commit

Permalink
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/pablo/nftables

Pablo Neira Ayuso says: <pablo@netfilter.org>

====================
nftables updates for net-next

The following patchset contains nftables updates for your net-next tree,
they are:

* Add set operation to the meta expression by means of the select_ops()
  infrastructure, this allows us to set the packet mark among other things.
  From Arturo Borrero Gonzalez.

* Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel
  Borkmann.

* Add new queue expression to nf_tables. These comes with two previous patches
  to prepare this new feature, one to add mask in nf_tables_core to
  evaluate the queue verdict appropriately and another to refactor common
  code with xt_NFQUEUE, from Eric Leblond.

* Do not hide nftables from Kconfig if nfnetlink is not enabled, also from
  Eric Leblond.

* Add the reject expression to nf_tables, this adds the missing TCP RST
  support. It comes with an initial patch to refactor common code with
  xt_NFQUEUE, again from Eric Leblond.

* Remove an unused variable assignment in nf_tables_dump_set(), from Michal
  Nazarewicz.

* Remove the nft_meta_target code, now that Arturo added the set operation
  to the meta expression, from me.

* Add help information for nf_tables to Kconfig, also from me.

* Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is
  available to other nf_tables objects, requested by Arturo, from me.

* Expose the table usage counter, so we can know how many chains are using
  this table without dumping the list of chains, from Tomasz Bursztyka.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 6, 2014
2 parents 6a8c479 + c9c8e48 commit 9aa28f2
Show file tree
Hide file tree
Showing 18 changed files with 872 additions and 534 deletions.
128 changes: 128 additions & 0 deletions include/net/netfilter/ipv4/nf_reject.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#ifndef _IPV4_NF_REJECT_H
#define _IPV4_NF_REJECT_H

#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>

static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
{
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}

/* Send RST reply */
static void nf_send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
const struct iphdr *oiph;
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _otcph, *tcph;

/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return;

oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(_otcph), &_otcph);
if (oth == NULL)
return;

/* No RST for RST. */
if (oth->rst)
return;

if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
return;

/* Check checksum */
if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return;
oiph = ip_hdr(oldskb);

nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
LL_MAX_HEADER, GFP_ATOMIC);
if (!nskb)
return;

skb_reserve(nskb, LL_MAX_HEADER);

skb_reset_network_header(nskb);
niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
niph->version = 4;
niph->ihl = sizeof(struct iphdr) / 4;
niph->tos = 0;
niph->id = 0;
niph->frag_off = htons(IP_DF);
niph->protocol = IPPROTO_TCP;
niph->check = 0;
niph->saddr = oiph->daddr;
niph->daddr = oiph->saddr;

skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
memset(tcph, 0, sizeof(*tcph));
tcph->source = oth->dest;
tcph->dest = oth->source;
tcph->doff = sizeof(struct tcphdr) / 4;

if (oth->ack)
tcph->seq = oth->ack_seq;
else {
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
oldskb->len - ip_hdrlen(oldskb) -
(oth->doff << 2));
tcph->ack = 1;
}

tcph->rst = 1;
tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
niph->daddr, 0);
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);

/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));

nskb->protocol = htons(ETH_P_IP);
if (ip_route_me_harder(nskb, RTN_UNSPEC))
goto free_nskb;

niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));

/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;

nf_ct_attach(nskb, oldskb);

#ifdef CONFIG_BRIDGE_NETFILTER
/* If we use ip_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = oldskb->nf_bridge->physindev;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
goto free_nskb;
dev_queue_xmit(nskb);
} else
#endif
ip_local_out(nskb);

return;

free_nskb:
kfree_skb(nskb);
}


#endif /* _IPV4_NF_REJECT_H */
171 changes: 171 additions & 0 deletions include/net/netfilter/ipv6/nf_reject.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#ifndef _IPV6_NF_REJECT_H
#define _IPV6_NF_REJECT_H

#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fib.h>
#include <net/ip6_checksum.h>
#include <linux/netfilter_ipv6.h>

static inline void
nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
unsigned int hooknum)
{
if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
skb_in->dev = net->loopback_dev;

icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
}

/* Send RST reply */
static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct tcphdr otcph, *tcph;
unsigned int otcplen, hh_len;
int tcphoff, needs_ack;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
struct ipv6hdr *ip6h;
#define DEFAULT_TOS_VALUE 0x0U
const __u8 tclass = DEFAULT_TOS_VALUE;
struct dst_entry *dst = NULL;
u8 proto;
__be16 frag_off;
struct flowi6 fl6;

if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
(!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
pr_debug("addr is not unicast.\n");
return;
}

proto = oip6h->nexthdr;
tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off);

if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
pr_debug("Cannot get TCP header.\n");
return;
}

otcplen = oldskb->len - tcphoff;

/* IP header checks: fragment, too short. */
if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
pr_debug("proto(%d) != IPPROTO_TCP, "
"or too short. otcplen = %d\n",
proto, otcplen);
return;
}

if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
BUG();

/* No RST for RST. */
if (otcph.rst) {
pr_debug("RST is set\n");
return;
}

/* Check checksum. */
if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
pr_debug("TCP checksum is invalid\n");
return;
}

memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.saddr = oip6h->daddr;
fl6.daddr = oip6h->saddr;
fl6.fl6_sport = otcph.dest;
fl6.fl6_dport = otcph.source;
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst == NULL || dst->error) {
dst_release(dst);
return;
}
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
if (IS_ERR(dst))
return;

hh_len = (dst->dev->hard_header_len + 15)&~15;
nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+ sizeof(struct tcphdr) + dst->trailer_len,
GFP_ATOMIC);

if (!nskb) {
net_dbg_ratelimited("cannot alloc skb\n");
dst_release(dst);
return;
}

skb_dst_set(nskb, dst);

skb_reserve(nskb, hh_len + dst->header_len);

skb_put(nskb, sizeof(struct ipv6hdr));
skb_reset_network_header(nskb);
ip6h = ipv6_hdr(nskb);
ip6_flow_hdr(ip6h, tclass, 0);
ip6h->hop_limit = ip6_dst_hoplimit(dst);
ip6h->nexthdr = IPPROTO_TCP;
ip6h->saddr = oip6h->daddr;
ip6h->daddr = oip6h->saddr;

skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
tcph->source = otcph.dest;
tcph->dest = otcph.source;

if (otcph.ack) {
needs_ack = 0;
tcph->seq = otcph.ack_seq;
tcph->ack_seq = 0;
} else {
needs_ack = 1;
tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+ otcplen - (otcph.doff<<2));
tcph->seq = 0;
}

/* Reset flags */
((u_int8_t *)tcph)[13] = 0;
tcph->rst = 1;
tcph->ack = needs_ack;
tcph->window = 0;
tcph->urg_ptr = 0;
tcph->check = 0;

/* Adjust TCP checksum */
tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
&ipv6_hdr(nskb)->daddr,
sizeof(struct tcphdr), IPPROTO_TCP,
csum_partial(tcph,
sizeof(struct tcphdr), 0));

nf_ct_attach(nskb, oldskb);

#ifdef CONFIG_BRIDGE_NETFILTER
/* If we use ip6_local_out for bridged traffic, the MAC source on
* the RST will be ours, instead of the destination's. This confuses
* some routers/firewalls, and they drop the packet. So we need to
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
nskb->dev = oldskb->nf_bridge->physindev;
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
oeth->h_source, oeth->h_dest, nskb->len) < 0)
return;
dev_queue_xmit(nskb);
} else
#endif
ip6_local_out(nskb);
}

#endif /* _IPV6_NF_REJECT_H */
62 changes: 62 additions & 0 deletions include/net/netfilter/nf_queue.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#ifndef _NF_QUEUE_H
#define _NF_QUEUE_H

#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jhash.h>

/* Each queued (to userspace) skbuff has one of these. */
struct nf_queue_entry {
struct list_head list;
Expand Down Expand Up @@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);

static inline void init_hashrandom(u32 *jhash_initval)
{
while (*jhash_initval == 0)
*jhash_initval = prandom_u32();
}

static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
{
const struct iphdr *iph = ip_hdr(skb);

/* packets in either direction go into same queue */
if ((__force u32)iph->saddr < (__force u32)iph->daddr)
return jhash_3words((__force u32)iph->saddr,
(__force u32)iph->daddr, iph->protocol, jhash_initval);

return jhash_3words((__force u32)iph->daddr,
(__force u32)iph->saddr, iph->protocol, jhash_initval);
}

#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
u32 a, b, c;

if ((__force u32)ip6h->saddr.s6_addr32[3] <
(__force u32)ip6h->daddr.s6_addr32[3]) {
a = (__force u32) ip6h->saddr.s6_addr32[3];
b = (__force u32) ip6h->daddr.s6_addr32[3];
} else {
b = (__force u32) ip6h->saddr.s6_addr32[3];
a = (__force u32) ip6h->daddr.s6_addr32[3];
}

if ((__force u32)ip6h->saddr.s6_addr32[1] <
(__force u32)ip6h->daddr.s6_addr32[1])
c = (__force u32) ip6h->saddr.s6_addr32[1];
else
c = (__force u32) ip6h->daddr.s6_addr32[1];

return jhash_3words(a, b, c, jhash_initval);
}
#endif

static inline u32
nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
u32 jhash_initval)
{
if (family == NFPROTO_IPV4)
queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (family == NFPROTO_IPV6)
queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
#endif

return queue;
}

#endif /* _NF_QUEUE_H */
Loading

0 comments on commit 9aa28f2

Please sign in to comment.