Skip to content

Commit

Permalink
Merge branch 'net-sched-Introduce-tc-connection-tracking'
Browse files Browse the repository at this point in the history
Paul Blakey says:

====================
net/sched: Introduce tc connection tracking

This patch series add connection tracking capabilities in tc sw datapath.
It does so via a new tc action, called act_ct, and new tc flower classifier matching
on conntrack state, mark and label.

Usage is as follows:
$ tc qdisc add dev ens1f0_0 ingress
$ tc qdisc add dev ens1f0_1 ingress

$ tc filter add dev ens1f0_0 ingress \
  prio 1 chain 0 proto ip \
  flower ip_proto tcp ct_state -trk \
  action ct zone 2 pipe \
  action goto chain 2
$ tc filter add dev ens1f0_0 ingress \
  prio 1 chain 2 proto ip \
  flower ct_state +trk+new \
  action ct zone 2 commit mark 0xbb nat src addr 5.5.5.7 pipe \
  action mirred egress redirect dev ens1f0_1
$ tc filter add dev ens1f0_0 ingress \
  prio 1 chain 2 proto ip \
  flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \
  action ct nat pipe \
  action mirred egress redirect dev ens1f0_1

$ tc filter add dev ens1f0_1 ingress \
  prio 1 chain 0 proto ip \
  flower ip_proto tcp ct_state -trk \
  action ct zone 2 pipe \
  action goto chain 1
$ tc filter add dev ens1f0_1 ingress \
  prio 1 chain 1 proto ip \
  flower ct_zone 2 ct_mark 0xbb ct_state +trk+est \
  action ct nat pipe \
  action mirred egress redirect dev ens1f0_0

The pattern used in the design here closely resembles OvS, as the plan is to also offload
OvS conntrack rules to tc. OvS datapath rules uses it's recirculation mechanism to send
specific packets to conntrack, and return with the new conntrack state (ct_state) on some other recirc_id
to be matched again (we use goto chain for this).

This results in the following OvS datapath rules:

recirc_id(0),in_port(ens1f0_0),ct_state(-trk),... actions:ct(zone=2),recirc(2)
recirc_id(2),in_port(ens1f0_0),ct_state(+new+trk),ct_mark(0xbb),... actions:ct(commit,zone=2,nat(src=5.5.5.7),mark=0xbb),ens1f0_1
recirc_id(2),in_port(ens1f0_0),ct_state(+est+trk),ct_mark(0xbb),... actions:ct(zone=2,nat),ens1f0_1

recirc_id(1),in_port(ens1f0_1),ct_state(-trk),... actions:ct(zone=2),recirc(1)
recirc_id(1),in_port(ens1f0_1),ct_state(+est+trk),... actions:ct(zone=2,nat),ens1f0_0

Changelog:
	See individual patches.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jul 9, 2019
2 parents f108c88 + 6e52fca commit 216dcb0
Show file tree
Hide file tree
Showing 13 changed files with 1,632 additions and 5 deletions.
10 changes: 10 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,16 @@ void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container);

/* Gets a skb connection tracking info, ctinfo map should be a
* a map of mapsize to translate enum ip_conntrack_info states
* to user states.
*/
void
skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
u16 *ctinfo_map,
size_t mapsize);
void
skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
Expand Down
15 changes: 15 additions & 0 deletions include/net/flow_dissector.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,20 @@ struct flow_dissector_key_meta {
int ingress_ifindex;
};

/**
* struct flow_dissector_key_ct:
* @ct_state: conntrack state after converting with map
* @ct_mark: conttrack mark
* @ct_zone: conntrack zone
* @ct_labels: conntrack labels
*/
struct flow_dissector_key_ct {
u16 ct_state;
u16 ct_zone;
u32 ct_mark;
u32 ct_labels[4];
};

enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
Expand All @@ -234,6 +248,7 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */
FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */

FLOW_DISSECTOR_KEY_MAX,
};
Expand Down
5 changes: 5 additions & 0 deletions include/net/flow_offload.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ enum flow_action_id {
FLOW_ACTION_QUEUE,
FLOW_ACTION_SAMPLE,
FLOW_ACTION_POLICE,
FLOW_ACTION_CT,
};

/* This is mirroring enum pedit_header_type definition for easy mapping between
Expand Down Expand Up @@ -178,6 +179,10 @@ struct flow_action_entry {
s64 burst;
u64 rate_bytes_ps;
} police;
struct { /* FLOW_ACTION_CT */
int action;
u16 zone;
} ct;
};
};

Expand Down
63 changes: 63 additions & 0 deletions include/net/tc_act/tc_ct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_TC_CT_H
#define __NET_TC_CT_H

#include <net/act_api.h>
#include <uapi/linux/tc_act/tc_ct.h>

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack_labels.h>

struct tcf_ct_params {
struct nf_conn *tmpl;
u16 zone;

u32 mark;
u32 mark_mask;

u32 labels[NF_CT_LABELS_MAX_SIZE / sizeof(u32)];
u32 labels_mask[NF_CT_LABELS_MAX_SIZE / sizeof(u32)];

struct nf_nat_range2 range;
bool ipv4_range;

u16 ct_action;

struct rcu_head rcu;
};

struct tcf_ct {
struct tc_action common;
struct tcf_ct_params __rcu *params;
};

#define to_ct(a) ((struct tcf_ct *)a)
#define to_ct_params(a) ((struct tcf_ct_params *) \
rtnl_dereference((to_ct(a)->params)))

static inline uint16_t tcf_ct_zone(const struct tc_action *a)
{
return to_ct_params(a)->zone;
}

static inline int tcf_ct_action(const struct tc_action *a)
{
return to_ct_params(a)->ct_action;
}

#else
static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
#endif /* CONFIG_NF_CONNTRACK */

static inline bool is_tcf_ct(const struct tc_action *a)
{
#if defined(CONFIG_NET_CLS_ACT) && IS_ENABLED(CONFIG_NF_CONNTRACK)
if (a->ops && a->ops->id == TCA_ID_CT)
return true;
#endif
return false;
}

#endif /* __NET_TC_CT_H */
17 changes: 17 additions & 0 deletions include/uapi/linux/pkt_cls.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ enum tca_id {
TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
TCA_ID_CTINFO,
TCA_ID_MPLS,
TCA_ID_CT,
/* other actions go here */
__TCA_ID_MAX = 255
};
Expand Down Expand Up @@ -536,11 +537,27 @@ enum {
TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */
TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */

TCA_FLOWER_KEY_CT_STATE, /* u16 */
TCA_FLOWER_KEY_CT_STATE_MASK, /* u16 */
TCA_FLOWER_KEY_CT_ZONE, /* u16 */
TCA_FLOWER_KEY_CT_ZONE_MASK, /* u16 */
TCA_FLOWER_KEY_CT_MARK, /* u32 */
TCA_FLOWER_KEY_CT_MARK_MASK, /* u32 */
TCA_FLOWER_KEY_CT_LABELS, /* u128 */
TCA_FLOWER_KEY_CT_LABELS_MASK, /* u128 */

__TCA_FLOWER_MAX,
};

#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)

enum {
TCA_FLOWER_KEY_CT_FLAGS_NEW = 1 << 0, /* Beginning of a new connection. */
TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */
TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */
TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */
};

enum {
TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
Expand Down
41 changes: 41 additions & 0 deletions include/uapi/linux/tc_act/tc_ct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __UAPI_TC_CT_H
#define __UAPI_TC_CT_H

#include <linux/types.h>
#include <linux/pkt_cls.h>

enum {
TCA_CT_UNSPEC,
TCA_CT_PARMS,
TCA_CT_TM,
TCA_CT_ACTION, /* u16 */
TCA_CT_ZONE, /* u16 */
TCA_CT_MARK, /* u32 */
TCA_CT_MARK_MASK, /* u32 */
TCA_CT_LABELS, /* u128 */
TCA_CT_LABELS_MASK, /* u128 */
TCA_CT_NAT_IPV4_MIN, /* be32 */
TCA_CT_NAT_IPV4_MAX, /* be32 */
TCA_CT_NAT_IPV6_MIN, /* struct in6_addr */
TCA_CT_NAT_IPV6_MAX, /* struct in6_addr */
TCA_CT_NAT_PORT_MIN, /* be16 */
TCA_CT_NAT_PORT_MAX, /* be16 */
TCA_CT_PAD,
__TCA_CT_MAX
};

#define TCA_CT_MAX (__TCA_CT_MAX - 1)

#define TCA_CT_ACT_COMMIT (1 << 0)
#define TCA_CT_ACT_FORCE (1 << 1)
#define TCA_CT_ACT_CLEAR (1 << 2)
#define TCA_CT_ACT_NAT (1 << 3)
#define TCA_CT_ACT_NAT_SRC (1 << 4)
#define TCA_CT_ACT_NAT_DST (1 << 5)

struct tc_ct {
tc_gen;
};

#endif /* __UAPI_TC_CT_H */
44 changes: 44 additions & 0 deletions net/core/flow_dissector.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#include <scsi/fc/fc_fcoe.h>
#include <uapi/linux/batadv_packet.h>
#include <linux/bpf.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_labels.h>
#endif

static DEFINE_MUTEX(flow_dissector_mutex);

Expand Down Expand Up @@ -231,6 +235,46 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
ctrl->addr_type = type;
}

void
skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
u16 *ctinfo_map,
size_t mapsize)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct flow_dissector_key_ct *key;
enum ip_conntrack_info ctinfo;
struct nf_conn_labels *cl;
struct nf_conn *ct;

if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT))
return;

ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return;

key = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_CT,
target_container);

if (ctinfo < mapsize)
key->ct_state = ctinfo_map[ctinfo];
#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
key->ct_zone = ct->zone.id;
#endif
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
key->ct_mark = ct->mark;
#endif

cl = nf_ct_labels_find(ct);
if (cl)
memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels));
#endif /* CONFIG_NF_CONNTRACK */
}
EXPORT_SYMBOL(skb_flow_dissect_ct);

void
skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
Expand Down
11 changes: 11 additions & 0 deletions net/sched/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,17 @@ config NET_ACT_TUNNEL_KEY
To compile this code as a module, choose M here: the
module will be called act_tunnel_key.

config NET_ACT_CT
tristate "connection tracking tc action"
depends on NET_CLS_ACT && NF_CONNTRACK
help
Say Y here to allow sending the packets to conntrack module.

If unsure, say N.

To compile this code as a module, choose M here: the
module will be called act_ct.

config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
Expand Down
1 change: 1 addition & 0 deletions net/sched/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
Expand Down
Loading

0 comments on commit 216dcb0

Please sign in to comment.