From 5e17da634a21b1200853fe82ba67d6571f2beabe Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:52 -0800 Subject: [PATCH 01/10] openvswitch: Fix comments for skb->_nfct Fix comments referring to skb 'nfct' and 'nfctinfo' fields now that they are combined into '_nfct'. Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fbffe0ea4c4f7..5de6d12b3a739 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -157,7 +157,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, ovs_ct_get_labels(ct, &key->ct.labels); } -/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has +/* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has * previously sent the packet to conntrack via the ct action. If * 'keep_nat_flags' is true, the existing NAT flags retained, else they are * initialized from the connection status. @@ -421,12 +421,12 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) /* Find an existing connection which this packet belongs to without * re-attributing statistics or modifying the connection state. This allows an - * skb->nfct lost due to an upcall to be recovered during actions execution. + * skb->_nfct lost due to an upcall to be recovered during actions execution. * * Must be called with rcu_read_lock. * - * On success, populates skb->nfct and skb->nfctinfo, and returns the - * connection. Returns NULL if there is no existing entry. + * On success, populates skb->_nfct and returns the connection. Returns NULL + * if there is no existing entry. */ static struct nf_conn * ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, @@ -464,7 +464,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, return ct; } -/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ +/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ static bool skb_nfct_cached(struct net *net, const struct sw_flow_key *key, const struct ovs_conntrack_info *info, @@ -475,7 +475,7 @@ static bool skb_nfct_cached(struct net *net, ct = nf_ct_get(skb, &ctinfo); /* If no ct, check if we have evidence that an existing conntrack entry - * might be found for this skb. This happens when we lose a skb->nfct + * might be found for this skb. This happens when we lose a skb->_nfct * due to an upcall. If the connection was not confirmed, it is not * cached and needs to be run through conntrack again. */ @@ -699,7 +699,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if * not done already. Update key with new CT state after passing the packet * through conntrack. - * Note that if the packet is deemed invalid by conntrack, skb->nfct will be + * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be * set to NULL and 0 will be returned. */ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, From 9ff464db50e437eef131f719cc2e9902eea9c607 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:53 -0800 Subject: [PATCH 02/10] openvswitch: Use inverted tuple in ovs_ct_find_existing() if NATted. The conntrack lookup for existing connections fails to invert the packet 5-tuple for NATted packets, and therefore fails to find the existing conntrack entry. Conntrack only stores 5-tuples for incoming packets, and there are various situations where a lookup on a packet that has already been transformed by NAT needs to be made. Looking up an existing conntrack entry upon executing packet received from the userspace is one of them. This patch fixes ovs_ct_find_existing() to invert the packet 5-tuple for the conntrack lookup whenever the packet has already been transformed by conntrack from its input form as evidenced by one of the NAT flags being set in the conntrack state metadata. Fixes: 05752523e565 ("openvswitch: Interface with NAT.") Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 5de6d12b3a739..4df9a5449c952 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -430,7 +430,7 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) */ static struct nf_conn * ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, - u8 l3num, struct sk_buff *skb) + u8 l3num, struct sk_buff *skb, bool natted) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -453,6 +453,17 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, return NULL; } + /* Must invert the tuple if skb has been transformed by NAT. */ + if (natted) { + struct nf_conntrack_tuple inverse; + + if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { + pr_debug("ovs_ct_find_existing: Inversion failed!\n"); + return NULL; + } + tuple = inverse; + } + /* look for tuple match */ h = nf_conntrack_find_get(net, zone, &tuple); if (!h) @@ -460,6 +471,13 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); + /* Inverted packet tuple matches the reverse direction conntrack tuple, + * select the other tuplehash to get the right 'ctinfo' bits for this + * packet. + */ + if (natted) + h = &ct->tuplehash[!h->tuple.dst.dir]; + nf_ct_set(skb, ct, ovs_ct_get_info(h)); return ct; } @@ -482,7 +500,9 @@ static bool skb_nfct_cached(struct net *net, if (!ct && key->ct.state & OVS_CS_F_TRACKED && !(key->ct.state & OVS_CS_F_INVALID) && key->ct.zone == info->zone.id) - ct = ovs_ct_find_existing(net, &info->zone, info->family, skb); + ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, + !!(key->ct.state + & OVS_CS_F_NAT_MASK)); if (!ct) return false; if (!net_eq(net, read_pnet(&ct->ct_net))) From 193e30967897f3a8b6f9f137ac30571d832c2c5c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:54 -0800 Subject: [PATCH 03/10] openvswitch: Do not trigger events for unconfirmed connections. Receiving change events before the 'new' event for the connection has been received can be confusing. Avoid triggering change events for setting conntrack mark or labels before the conntrack entry has been confirmed. Fixes: 182e3042e15d ("openvswitch: Allow matching on conntrack mark") Fixes: c2ac66735870 ("openvswitch: Allow matching on conntrack label") Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4df9a5449c952..a6ff374d57d3a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -245,7 +245,8 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, new_mark = ct_mark | (ct->mark & ~(mask)); if (ct->mark != new_mark) { ct->mark = new_mark; - nf_conntrack_event_cache(IPCT_MARK, ct); + if (nf_ct_is_confirmed(ct)) + nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; } @@ -262,7 +263,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; struct nf_conn *ct; - int err; /* The connection could be invalid, in which case set_label is no-op.*/ ct = nf_ct_get(skb, &ctinfo); @@ -277,10 +277,26 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN) return -ENOSPC; - err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, - OVS_CT_LABELS_LEN / sizeof(u32)); - if (err) - return err; + if (nf_ct_is_confirmed(ct)) { + /* Triggers a change event, which makes sense only for + * confirmed connections. + */ + int err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, + OVS_CT_LABELS_LEN / sizeof(u32)); + if (err) + return err; + } else { + u32 *dst = (u32 *)cl->bits; + const u32 *msk = (const u32 *)mask->ct_labels; + const u32 *lbl = (const u32 *)labels->ct_labels; + int i; + + /* No-one else has access to the non-confirmed entry, copy + * labels over, keeping any bits we are not explicitly setting. + */ + for (i = 0; i < OVS_CT_LABELS_LEN / sizeof(u32); i++) + dst[i] = (dst[i] & ~msk[i]) | (lbl[i] & msk[i]); + } ovs_ct_get_labels(ct, &key->ct.labels); return 0; From cb80d58fae76d8ea93555149b2b16e19b89a1f4f Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:55 -0800 Subject: [PATCH 04/10] openvswitch: Unionize ovs_key_ct_label with a u32 array. Make the array of labels in struct ovs_key_ct_label an union, adding a u32 array of the same byte size as the existing u8 array. It is faster to loop through the labels 32 bits at the time, which is also the alignment of netlink attributes. Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 8 ++++++-- net/openvswitch/conntrack.c | 15 ++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 375d812fea36f..96aee34ef55f5 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -446,9 +446,13 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; -#define OVS_CT_LABELS_LEN 16 +#define OVS_CT_LABELS_LEN_32 4 +#define OVS_CT_LABELS_LEN (OVS_CT_LABELS_LEN_32 * sizeof(__u32)) struct ovs_key_ct_labels { - __u8 ct_labels[OVS_CT_LABELS_LEN]; + union { + __u8 ct_labels[OVS_CT_LABELS_LEN]; + __u32 ct_labels_32[OVS_CT_LABELS_LEN_32]; + }; }; /* OVS_KEY_ATTR_CT_STATE flags */ diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a6ff374d57d3a..f23934ccce201 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -281,20 +281,21 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, /* Triggers a change event, which makes sense only for * confirmed connections. */ - int err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, - OVS_CT_LABELS_LEN / sizeof(u32)); + int err = nf_connlabels_replace(ct, labels->ct_labels_32, + mask->ct_labels_32, + OVS_CT_LABELS_LEN_32); if (err) return err; } else { u32 *dst = (u32 *)cl->bits; - const u32 *msk = (const u32 *)mask->ct_labels; - const u32 *lbl = (const u32 *)labels->ct_labels; + const u32 *msk = mask->ct_labels_32; + const u32 *lbl = labels->ct_labels_32; int i; /* No-one else has access to the non-confirmed entry, copy * labels over, keeping any bits we are not explicitly setting. */ - for (i = 0; i < OVS_CT_LABELS_LEN / sizeof(u32); i++) + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) dst[i] = (dst[i] & ~msk[i]) | (lbl[i] & msk[i]); } @@ -866,8 +867,8 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels) { size_t i; - for (i = 0; i < sizeof(*labels); i++) - if (labels->ct_labels[i]) + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) + if (labels->ct_labels_32[i]) return true; return false; From b87cec3814ccc7f6afb0a1378ee7e5110d07cdd3 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:56 -0800 Subject: [PATCH 05/10] openvswitch: Simplify labels length logic. Since 23014011ba42 ("netfilter: conntrack: support a fixed size of 128 distinct labels"), the size of conntrack labels extension has fixed to 128 bits, so we do not need to check for labels sizes shorter than 128 at run-time. This patch simplifies labels length logic accordingly, but allows the conntrack labels size to be increased in the future without breaking the build. In the event of conntrack labels increasing in size OVS would still be able to deal with the 128 first label bits. Suggested-by: Joe Stringer Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index f23934ccce201..fe2a410ce70ad 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -129,22 +129,20 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) #endif } +/* Guard against conntrack labels max size shrinking below 128 bits. */ +#if NF_CT_LABELS_MAX_SIZE < 16 +#error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes +#endif + static void ovs_ct_get_labels(const struct nf_conn *ct, struct ovs_key_ct_labels *labels) { struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; - if (cl) { - size_t len = sizeof(cl->bits); - - if (len > OVS_CT_LABELS_LEN) - len = OVS_CT_LABELS_LEN; - else if (len < OVS_CT_LABELS_LEN) - memset(labels, 0, OVS_CT_LABELS_LEN); - memcpy(labels, cl->bits, len); - } else { + if (cl) + memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); + else memset(labels, 0, OVS_CT_LABELS_LEN); - } } static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, @@ -274,7 +272,7 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN) + if (!cl) return -ENOSPC; if (nf_ct_is_confirmed(ct)) { From 6ffcea79957df43caeaa6d1de5062556a5afc262 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:57 -0800 Subject: [PATCH 06/10] openvswitch: Refactor labels initialization. Refactoring conntrack labels initialization makes changes in later patches easier to review. Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 104 +++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 42 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fe2a410ce70ad..7c5bb98c22c61 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -227,19 +227,12 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) return 0; } -static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, +static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, u32 ct_mark, u32 mask) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; u32 new_mark; - /* The connection could be invalid, in which case set_mark is no-op. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct) - return 0; - new_mark = ct_mark | (ct->mark & ~(mask)); if (ct->mark != new_mark) { ct->mark = new_mark; @@ -254,50 +247,66 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, #endif } -static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ct_labels *labels, - const struct ovs_key_ct_labels *mask) +static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) { - enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; - struct nf_conn *ct; - - /* The connection could be invalid, in which case set_label is no-op.*/ - ct = nf_ct_get(skb, &ctinfo); - if (!ct) - return 0; cl = nf_ct_labels_find(ct); if (!cl) { nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } + + return cl; +} + +/* Initialize labels for a new, yet to be committed conntrack entry. Note that + * since the new connection is not yet confirmed, and thus no-one else has + * access to it's labels, we simply write them over. Also, we refrain from + * triggering events, as receiving change events before the create event would + * be confusing. + */ +static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) +{ + struct nf_conn_labels *cl; + u32 *dst; + int i; + + cl = ovs_ct_get_conn_labels(ct); if (!cl) return -ENOSPC; - if (nf_ct_is_confirmed(ct)) { - /* Triggers a change event, which makes sense only for - * confirmed connections. - */ - int err = nf_connlabels_replace(ct, labels->ct_labels_32, - mask->ct_labels_32, - OVS_CT_LABELS_LEN_32); - if (err) - return err; - } else { - u32 *dst = (u32 *)cl->bits; - const u32 *msk = mask->ct_labels_32; - const u32 *lbl = labels->ct_labels_32; - int i; + dst = (u32 *)cl->bits; + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) + dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | + (labels->ct_labels_32[i] & mask->ct_labels_32[i]); - /* No-one else has access to the non-confirmed entry, copy - * labels over, keeping any bits we are not explicitly setting. - */ - for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) - dst[i] = (dst[i] & ~msk[i]) | (lbl[i] & msk[i]); - } + memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); + + return 0; +} + +static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) +{ + struct nf_conn_labels *cl; + int err; + + cl = ovs_ct_get_conn_labels(ct); + if (!cl) + return -ENOSPC; + + err = nf_connlabels_replace(ct, labels->ct_labels_32, + mask->ct_labels_32, + OVS_CT_LABELS_LEN_32); + if (err) + return err; + + memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); - ovs_ct_get_labels(ct, &key->ct.labels); return 0; } @@ -877,25 +886,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; int err; err = __ovs_ct_lookup(net, key, info, skb); if (err) return err; + /* The connection could be invalid, in which case this is a no-op.*/ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + /* Apply changes before confirming the connection so that the initial * conntrack NEW netlink event carries the values given in the CT * action. */ if (info->mark.mask) { - err = ovs_ct_set_mark(skb, key, info->mark.value, + err = ovs_ct_set_mark(ct, key, info->mark.value, info->mark.mask); if (err) return err; } if (labels_nonzero(&info->labels.mask)) { - err = ovs_ct_set_labels(skb, key, &info->labels.value, - &info->labels.mask); + if (!nf_ct_is_confirmed(ct)) + err = ovs_ct_init_labels(ct, key, &info->labels.value, + &info->labels.mask); + else + err = ovs_ct_set_labels(ct, key, &info->labels.value, + &info->labels.mask); if (err) return err; } From 09aa98ad496d6b11a698b258bc64d7f64c55d682 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:58 -0800 Subject: [PATCH 07/10] openvswitch: Inherit master's labels. We avoid calling into nf_conntrack_in() for expected connections, as that would remove the expectation that we want to stick around until we are ready to commit the connection. Instead, we do a lookup in the expectation table directly. However, after a successful expectation lookup we have set the flow key label field from the master connection, whereas nf_conntrack_in() does not do this. This leads to master's labels being inherited after an expectation lookup, but those labels not being inherited after the corresponding conntrack action with a commit flag. This patch resolves the problem by changing the commit code path to also inherit the master's labels to the expected connection. Resolving this conflict in favor of inheriting the labels allows more information be passed from the master connection to related connections, which would otherwise be much harder if the 32 bits in the connmark are not enough. Labels can still be set explicitly, so this change only affects the default values of the labels in presense of a master connection. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 45 +++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7c5bb98c22c61..f989ccf38eab4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -73,6 +73,8 @@ struct ovs_conntrack_info { #endif }; +static bool labels_nonzero(const struct ovs_key_ct_labels *labels); + static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -270,18 +272,32 @@ static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, const struct ovs_key_ct_labels *labels, const struct ovs_key_ct_labels *mask) { - struct nf_conn_labels *cl; - u32 *dst; - int i; + struct nf_conn_labels *cl, *master_cl; + bool have_mask = labels_nonzero(mask); + + /* Inherit master's labels to the related connection? */ + master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL; + + if (!master_cl && !have_mask) + return 0; /* Nothing to do. */ cl = ovs_ct_get_conn_labels(ct); if (!cl) return -ENOSPC; - dst = (u32 *)cl->bits; - for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) - dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | - (labels->ct_labels_32[i] & mask->ct_labels_32[i]); + /* Inherit the master's labels, if any. */ + if (master_cl) + *cl = *master_cl; + + if (have_mask) { + u32 *dst = (u32 *)cl->bits; + int i; + + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) + dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | + (labels->ct_labels_32[i] + & mask->ct_labels_32[i]); + } memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); @@ -909,13 +925,14 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (err) return err; } - if (labels_nonzero(&info->labels.mask)) { - if (!nf_ct_is_confirmed(ct)) - err = ovs_ct_init_labels(ct, key, &info->labels.value, - &info->labels.mask); - else - err = ovs_ct_set_labels(ct, key, &info->labels.value, - &info->labels.mask); + if (!nf_ct_is_confirmed(ct)) { + err = ovs_ct_init_labels(ct, key, &info->labels.value, + &info->labels.mask); + if (err) + return err; + } else if (labels_nonzero(&info->labels.mask)) { + err = ovs_ct_set_labels(ct, key, &info->labels.value, + &info->labels.mask); if (err) return err; } From 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:21:59 -0800 Subject: [PATCH 08/10] openvswitch: Add original direction conntrack tuple to sw_flow_key. Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 20 +++++++- net/openvswitch/actions.c | 2 + net/openvswitch/conntrack.c | 86 +++++++++++++++++++++++++++++--- net/openvswitch/conntrack.h | 10 +++- net/openvswitch/flow.c | 34 +++++++++++-- net/openvswitch/flow.h | 49 ++++++++++++++---- net/openvswitch/flow_netlink.c | 85 +++++++++++++++++++++++-------- net/openvswitch/flow_netlink.h | 7 ++- 8 files changed, 246 insertions(+), 47 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 96aee34ef55f5..90af8b8e10f8f 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -331,6 +331,8 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ + OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -472,6 +474,22 @@ struct ovs_key_ct_labels { #define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT) +struct ovs_key_ct_tuple_ipv4 { + __be32 ipv4_src; + __be32 ipv4_dst; + __be16 src_port; + __be16 dst_port; + __u8 ipv4_proto; +}; + +struct ovs_key_ct_tuple_ipv6 { + __be32 ipv6_src[4]; + __be32 ipv6_dst[4]; + __be16 src_port; + __be16 dst_port; + __u8 ipv6_proto; +}; + /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index efa9a8858cc6a..b1beb2b94ec76 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1074,6 +1074,8 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: err = -EINVAL; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index f989ccf38eab4..bfd7606c8be1f 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -147,6 +147,20 @@ static void ovs_ct_get_labels(const struct nf_conn *ct, memset(labels, 0, OVS_CT_LABELS_LEN); } +static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, + const struct nf_conntrack_tuple *orig, + u8 icmp_proto) +{ + key->ct.orig_proto = orig->dst.protonum; + if (orig->dst.protonum == icmp_proto) { + key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); + key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); + } else { + key->ct.orig_tp.src = orig->src.u.all; + key->ct.orig_tp.dst = orig->dst.u.all; + } +} + static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, const struct nf_conntrack_zone *zone, const struct nf_conn *ct) @@ -155,6 +169,35 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, key->ct.zone = zone->id; key->ct.mark = ovs_ct_get_mark(ct); ovs_ct_get_labels(ct, &key->ct.labels); + + if (ct) { + const struct nf_conntrack_tuple *orig; + + /* Use the master if we have one. */ + if (ct->master) + ct = ct->master; + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + + /* IP version must match with the master connection. */ + if (key->eth.type == htons(ETH_P_IP) && + nf_ct_l3num(ct) == NFPROTO_IPV4) { + key->ipv4.ct_orig.src = orig->src.u3.ip; + key->ipv4.ct_orig.dst = orig->dst.u3.ip; + __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); + return; + } else if (key->eth.type == htons(ETH_P_IPV6) && + !sw_flow_key_is_nd(key) && + nf_ct_l3num(ct) == NFPROTO_IPV6) { + key->ipv6.ct_orig.src = orig->src.u3.in6; + key->ipv6.ct_orig.dst = orig->dst.u3.in6; + __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); + return; + } + } + /* Clear 'ct.orig_proto' to mark the non-existence of conntrack + * original direction key fields. + */ + key->ct.orig_proto = 0; } /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has @@ -208,24 +251,55 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) ovs_ct_update_key(skb, NULL, key, false, false); } -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) +#define IN6_ADDR_INITIALIZER(ADDR) \ + { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ + (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } + +int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) { - if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.state)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && - nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && - nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), - &key->ct.labels)) + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), + &output->ct.labels)) return -EMSGSIZE; + if (swkey->ct.orig_proto) { + if (swkey->eth.type == htons(ETH_P_IP)) { + struct ovs_key_ct_tuple_ipv4 orig = { + output->ipv4.ct_orig.src, + output->ipv4.ct_orig.dst, + output->ct.orig_tp.src, + output->ct.orig_tp.dst, + output->ct.orig_proto, + }; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, + sizeof(orig), &orig)) + return -EMSGSIZE; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + struct ovs_key_ct_tuple_ipv6 orig = { + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), + output->ct.orig_tp.src, + output->ct.orig_tp.dst, + output->ct.orig_proto, + }; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, + sizeof(orig), &orig)) + return -EMSGSIZE; + } + } + return 0; } diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 8f6230bd61833..9e92445dc0924 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -32,7 +32,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, const struct ovs_conntrack_info *); void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); +int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ @@ -79,9 +80,14 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, key->ct.zone = 0; key->ct.mark = 0; memset(&key->ct.labels, 0, sizeof(key->ct.labels)); + /* Clear 'ct.orig_proto' to mark the non-existence of original + * direction key fields. + */ + key->ct.orig_proto = 0; } -static inline int ovs_ct_put_key(const struct sw_flow_key *key, +static inline int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) { return 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2c0a00f7f1b7d..9d4bb8eb63f25 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -765,7 +765,7 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { - int res; + int res, err; /* Extract metadata from packet. */ if (tun_info) { @@ -792,7 +792,6 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.skb_mark = skb->mark; - ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; res = key_extract_mac_proto(skb); if (res < 0) @@ -800,17 +799,26 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->mac_proto = res; key->recirc_id = 0; - return key_extract(skb, key); + err = key_extract(skb, key); + if (!err) + ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */ + return err; } int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, struct sk_buff *skb, struct sw_flow_key *key, bool log) { + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + u64 attrs = 0; int err; + err = parse_flow_nlattrs(attr, a, &attrs, log); + if (err) + return -EINVAL; + /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(net, attr, key, log); + err = ovs_nla_get_flow_metadata(net, a, attrs, key, log); if (err) return err; @@ -824,5 +832,21 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, */ skb->protocol = key->eth.type; - return key_extract(skb, key); + err = key_extract(skb, key); + if (err) + return err; + + /* Check that we have conntrack original direction tuple metadata only + * for packets for which it makes sense. Otherwise the key may be + * corrupted due to overlapping key fields. + */ + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) && + key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) && + (key->eth.type != htons(ETH_P_IPV6) || + sw_flow_key_is_nd(key))) + return -EINVAL; + + return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index f61cae7f9030d..76e05b25f0303 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -107,10 +107,16 @@ struct sw_flow_key { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; - struct { - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ - } arp; + union { + struct { + __be32 src; + __be32 dst; + } ct_orig; /* Conntrack original direction fields. */ + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; + }; } ipv4; struct { struct { @@ -118,23 +124,44 @@ struct sw_flow_key { struct in6_addr dst; /* IPv6 destination address. */ } addr; __be32 label; /* IPv6 flow label. */ - struct { - struct in6_addr target; /* ND target address. */ - u8 sll[ETH_ALEN]; /* ND source link layer address. */ - u8 tll[ETH_ALEN]; /* ND target link layer address. */ - } nd; + union { + struct { + struct in6_addr src; + struct in6_addr dst; + } ct_orig; /* Conntrack original direction fields. */ + struct { + struct in6_addr target; /* ND target address. */ + u8 sll[ETH_ALEN]; /* ND source link layer address. */ + u8 tll[ETH_ALEN]; /* ND target link layer address. */ + } nd; + }; } ipv6; }; struct { /* Connection tracking fields. */ + u8 state; + u8 orig_proto; /* CT orig tuple IP protocol. */ u16 zone; u32 mark; - u8 state; + struct { + __be16 src; /* CT orig tuple tp src port. */ + __be16 dst; /* CT orig tuple tp dst port. */ + } orig_tp; + struct ovs_key_ct_labels labels; } ct; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ +static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key) +{ + return key->eth.type == htons(ETH_P_IPV6) && + key->ip.proto == NEXTHDR_ICMP && + key->tp.dst == 0 && + (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)); +} + struct sw_flow_key_range { unsigned short int start; unsigned short int end; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c87d359b9b37a..989f38f120bb4 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -129,7 +129,9 @@ static bool match_validate(const struct sw_flow_match *match, /* The following mask attributes allowed only if they * pass the validation tests. */ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) + | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) | (1 << OVS_KEY_ATTR_IPV6) + | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) | (1 << OVS_KEY_ATTR_TCP) | (1 << OVS_KEY_ATTR_TCP_FLAGS) | (1 << OVS_KEY_ATTR_UDP) @@ -161,8 +163,10 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1 << OVS_KEY_ATTR_IPV4; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; + mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; + } if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { @@ -196,8 +200,10 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_IPV6)) { key_expected |= 1 << OVS_KEY_ATTR_IPV6; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; + mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; + } if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { @@ -230,6 +236,12 @@ static bool match_validate(const struct sw_flow_match *match, htons(NDISC_NEIGHBOUR_SOLICITATION) || match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; + /* Original direction conntrack tuple + * uses the same space as the ND fields + * in the key, so both are not allowed + * at the same time. + */ + mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); if (match->mask && (match->mask->key.tp.src == htons(0xff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } @@ -282,7 +294,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -295,6 +307,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -355,6 +368,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { + .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { + .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -430,9 +447,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr, return __parse_flow_nlattrs(attr, a, attrsp, log, true); } -static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp, - bool log) +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], + u64 *attrsp, bool log) { return __parse_flow_nlattrs(attr, a, attrsp, log, false); } @@ -1082,6 +1098,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { + const struct ovs_key_ct_tuple_ipv4 *ct; + + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); + + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv4_proto, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); + } + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { + const struct ovs_key_ct_tuple_ipv6 *ct; + + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); + + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, + sizeof(match->key->ipv6.ct_orig.src), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, + sizeof(match->key->ipv6.ct_orig.dst), + is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv6_proto, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); + } /* For layer 3 packets the Ethernet type is provided * and treated as metadata but no MAC addresses are provided. @@ -1493,9 +1537,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. - * @key: Receives extracted in_port, priority, tun_key and skb_mark. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. + * @net: Network namespace. + * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack + * metadata. + * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink + * attributes. + * @attrs: Bit mask for the netlink attributes included in @a. * @log: Boolean to allow kernel error logging. Normally true, but when * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. @@ -1504,25 +1551,23 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) * take the same form accepted by flow_from_nlattrs(), but only enough of it to * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. + * + * This must be called before the packet key fields are filled in 'key'. */ -int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, - struct sw_flow_key *key, - bool log) +int ovs_nla_get_flow_metadata(struct net *net, + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], + u64 attrs, struct sw_flow_key *key, bool log) { - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; struct sw_flow_match match; - u64 attrs = 0; - int err; - - err = parse_flow_nlattrs(attr, a, &attrs, log); - if (err) - return -EINVAL; memset(&match, 0, sizeof(match)); match.key = key; memset(&key->ct, 0, sizeof(key->ct)); + memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); + memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); + key->phy.in_port = DP_MAX_PORTS; return metadata_from_nlattrs(net, &match, &attrs, a, false, log); @@ -1584,7 +1629,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; - if (ovs_ct_put_key(output, skb)) + if (ovs_ct_put_key(swkey, output, skb)) goto nla_put_failure; if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 45f9769e5aacc..929c665ac3aa9 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -46,8 +46,11 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); -int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *, - struct sw_flow_key *, bool log); +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], + u64 *attrsp, bool log); +int ovs_nla_get_flow_metadata(struct net *net, + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], + u64 attrs, struct sw_flow_key *key, bool log); int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); From dd41d33f0b033885211a5d6f3ee19e73238aa9ee Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:22:00 -0800 Subject: [PATCH 09/10] openvswitch: Add force commit. Stateful network admission policy may allow connections to one direction and reject connections initiated in the other direction. After policy change it is possible that for a new connection an overlapping conntrack entry already exists, where the original direction of the existing connection is opposed to the new connection's initial packet. Most importantly, conntrack state relating to the current packet gets the "reply" designation based on whether the original direction tuple or the reply direction tuple matched. If this "directionality" is wrong w.r.t. to the stateful network admission policy it may happen that packets in neither direction are correctly admitted. This patch adds a new "force commit" option to the OVS conntrack action that checks the original direction of an existing conntrack entry. If that direction is opposed to the current packet, the existing conntrack entry is deleted and a new one is subsequently created in the correct direction. Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Acked-by: Joe Stringer Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 5 +++++ net/openvswitch/conntrack.c | 26 ++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 90af8b8e10f8f..7f41f7d0000f9 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -674,6 +674,10 @@ struct ovs_action_hash { * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. * @OVS_CT_ATTR_NAT: Nested OVS_NAT_ATTR_* for performing L3 network address * translation (NAT) on the packet. + * @OVS_CT_ATTR_FORCE_COMMIT: Like %OVS_CT_ATTR_COMMIT, but instead of doing + * nothing if the connection is already committed will check that the current + * packet is in conntrack entry's original direction. If directionality does + * not match, will delete the existing conntrack entry and commit a new one. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -684,6 +688,7 @@ enum ovs_ct_attr { OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of related connections. */ OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ + OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ __OVS_CT_ATTR_MAX }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index bfd7606c8be1f..8b15bab705838 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -65,6 +65,7 @@ struct ovs_conntrack_info { struct nf_conn *ct; u8 commit : 1; u8 nat : 3; /* enum ovs_ct_nat */ + u8 force : 1; u16 family; struct md_mark mark; struct md_labels labels; @@ -613,10 +614,13 @@ static bool skb_nfct_cached(struct net *net, */ if (!ct && key->ct.state & OVS_CS_F_TRACKED && !(key->ct.state & OVS_CS_F_INVALID) && - key->ct.zone == info->zone.id) + key->ct.zone == info->zone.id) { ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, !!(key->ct.state & OVS_CS_F_NAT_MASK)); + if (ct) + nf_ct_get(skb, &ctinfo); + } if (!ct) return false; if (!net_eq(net, read_pnet(&ct->ct_net))) @@ -630,6 +634,18 @@ static bool skb_nfct_cached(struct net *net, if (help && rcu_access_pointer(help->helper) != info->helper) return false; } + /* Force conntrack entry direction to the current packet? */ + if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { + /* Delete the conntrack entry if confirmed, else just release + * the reference. + */ + if (nf_ct_is_confirmed(ct)) + nf_ct_delete(ct, 0, 0); + else + nf_conntrack_put(&ct->ct_general); + nf_ct_set(skb, NULL, 0); + return false; + } return true; } @@ -1207,6 +1223,7 @@ static int parse_nat(const struct nlattr *attr, static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, + [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 }, [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), @@ -1246,6 +1263,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } switch (type) { + case OVS_CT_ATTR_FORCE_COMMIT: + info->force = true; + /* fall through. */ case OVS_CT_ATTR_COMMIT: info->commit = true; break; @@ -1472,7 +1492,9 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (!start) return -EMSGSIZE; - if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) + if (ct_info->commit && nla_put_flag(skb, ct_info->force + ? OVS_CT_ATTR_FORCE_COMMIT + : OVS_CT_ATTR_COMMIT)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) From 316d4d78cf9b6795b83f97c45368748741df418c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 9 Feb 2017 11:22:01 -0800 Subject: [PATCH 10/10] openvswitch: Pack struct sw_flow_key. struct sw_flow_key has two 16-bit holes. Move the most matched conntrack match fields there. In some typical cases this reduces the size of the key that needs to be hashed into half and into one cache line. Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 40 +++++++++++++++++----------------- net/openvswitch/conntrack.h | 8 +++---- net/openvswitch/flow.h | 14 +++++++----- net/openvswitch/flow_netlink.c | 11 ++++++---- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 8b15bab705838..c2d452eab0c5d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -152,7 +152,7 @@ static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, const struct nf_conntrack_tuple *orig, u8 icmp_proto) { - key->ct.orig_proto = orig->dst.protonum; + key->ct_orig_proto = orig->dst.protonum; if (orig->dst.protonum == icmp_proto) { key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); @@ -166,8 +166,8 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, const struct nf_conntrack_zone *zone, const struct nf_conn *ct) { - key->ct.state = state; - key->ct.zone = zone->id; + key->ct_state = state; + key->ct_zone = zone->id; key->ct.mark = ovs_ct_get_mark(ct); ovs_ct_get_labels(ct, &key->ct.labels); @@ -195,10 +195,10 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, return; } } - /* Clear 'ct.orig_proto' to mark the non-existence of conntrack + /* Clear 'ct_orig_proto' to mark the non-existence of conntrack * original direction key fields. */ - key->ct.orig_proto = 0; + key->ct_orig_proto = 0; } /* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has @@ -228,7 +228,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb, if (ct->master) state |= OVS_CS_F_RELATED; if (keep_nat_flags) { - state |= key->ct.state & OVS_CS_F_NAT_MASK; + state |= key->ct_state & OVS_CS_F_NAT_MASK; } else { if (ct->status & IPS_SRC_NAT) state |= OVS_CS_F_SRC_NAT; @@ -259,11 +259,11 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) int ovs_ct_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { - if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.state)) + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && - nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone)) + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && @@ -275,14 +275,14 @@ int ovs_ct_put_key(const struct sw_flow_key *swkey, &output->ct.labels)) return -EMSGSIZE; - if (swkey->ct.orig_proto) { + if (swkey->ct_orig_proto) { if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ct_tuple_ipv4 orig = { output->ipv4.ct_orig.src, output->ipv4.ct_orig.dst, output->ct.orig_tp.src, output->ct.orig_tp.dst, - output->ct.orig_proto, + output->ct_orig_proto, }; if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, sizeof(orig), &orig)) @@ -293,7 +293,7 @@ int ovs_ct_put_key(const struct sw_flow_key *swkey, IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), output->ct.orig_tp.src, output->ct.orig_tp.dst, - output->ct.orig_proto, + output->ct_orig_proto, }; if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, sizeof(orig), &orig)) @@ -612,11 +612,11 @@ static bool skb_nfct_cached(struct net *net, * due to an upcall. If the connection was not confirmed, it is not * cached and needs to be run through conntrack again. */ - if (!ct && key->ct.state & OVS_CS_F_TRACKED && - !(key->ct.state & OVS_CS_F_INVALID) && - key->ct.zone == info->zone.id) { + if (!ct && key->ct_state & OVS_CS_F_TRACKED && + !(key->ct_state & OVS_CS_F_INVALID) && + key->ct_zone == info->zone.id) { ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, - !!(key->ct.state + !!(key->ct_state & OVS_CS_F_NAT_MASK)); if (ct) nf_ct_get(skb, &ctinfo); @@ -740,7 +740,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key, if (maniptype == NF_NAT_MANIP_SRC) { __be16 src; - key->ct.state |= OVS_CS_F_SRC_NAT; + key->ct_state |= OVS_CS_F_SRC_NAT; if (key->eth.type == htons(ETH_P_IP)) key->ipv4.addr.src = ip_hdr(skb)->saddr; else if (key->eth.type == htons(ETH_P_IPV6)) @@ -762,7 +762,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key, } else { __be16 dst; - key->ct.state |= OVS_CS_F_DST_NAT; + key->ct_state |= OVS_CS_F_DST_NAT; if (key->eth.type == htons(ETH_P_IP)) key->ipv4.addr.dst = ip_hdr(skb)->daddr; else if (key->eth.type == htons(ETH_P_IPV6)) @@ -886,7 +886,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * NAT after the nf_conntrack_in() call. We can actually clear * the whole state, as it will be re-initialized below. */ - key->ct.state = 0; + key->ct_state = 0; /* Update the key, but keep the NAT flags. */ ovs_ct_update_key(skb, info, key, true, true); @@ -902,9 +902,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * * NAT will be done only if the CT action has NAT, and only * once per packet (per zone), as guarded by the NAT bits in - * the key->ct.state. + * the key->ct_state. */ - if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && + if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && (nf_ct_is_confirmed(ct) || info->commit) && ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { return -EINVAL; diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 9e92445dc0924..bc7efd1867ab4 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -76,14 +76,14 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, static inline void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) { - key->ct.state = 0; - key->ct.zone = 0; + key->ct_state = 0; + key->ct_zone = 0; key->ct.mark = 0; memset(&key->ct.labels, 0, sizeof(key->ct.labels)); - /* Clear 'ct.orig_proto' to mark the non-existence of original + /* Clear 'ct_orig_proto' to mark the non-existence of original * direction key fields. */ - key->ct.orig_proto = 0; + key->ct_orig_proto = 0; } static inline int ovs_ct_put_key(const struct sw_flow_key *swkey, diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 76e05b25f0303..a9bc1c875965c 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -85,6 +85,11 @@ struct sw_flow_key { struct vlan_head cvlan; __be16 type; /* Ethernet frame type. */ } eth; + /* Filling a hole of two bytes. */ + u8 ct_state; + u8 ct_orig_proto; /* CT original direction tuple IP + * protocol. + */ union { struct { __be32 top_lse; /* top label stack entry */ @@ -96,6 +101,7 @@ struct sw_flow_key { u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; }; + u16 ct_zone; /* Conntrack zone. */ struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ @@ -138,16 +144,12 @@ struct sw_flow_key { } ipv6; }; struct { - /* Connection tracking fields. */ - u8 state; - u8 orig_proto; /* CT orig tuple IP protocol. */ - u16 zone; - u32 mark; + /* Connection tracking fields not packed above. */ struct { __be16 src; /* CT orig tuple tp src port. */ __be16 dst; /* CT orig tuple tp dst port. */ } orig_tp; - + u32 mark; struct ovs_key_ct_labels labels; } ct; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 989f38f120bb4..6f5fa50f716d0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1072,14 +1072,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return -EINVAL; } - SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); + SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); - SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); + SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && @@ -1107,7 +1107,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); - SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv4_proto, is_mask); + SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); } if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { @@ -1123,7 +1123,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); - SW_FLOW_KEY_PUT(match, ct.orig_proto, ct->ipv6_proto, is_mask); + SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); } @@ -1564,6 +1564,9 @@ int ovs_nla_get_flow_metadata(struct net *net, memset(&match, 0, sizeof(match)); match.key = key; + key->ct_state = 0; + key->ct_zone = 0; + key->ct_orig_proto = 0; memset(&key->ct, 0, sizeof(key->ct)); memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));