Skip to content

Commit

Permalink
net: openvswitch: fix to make sure flow_lookup() is not preempted
Browse files Browse the repository at this point in the history
The flow_lookup() function uses per CPU variables, which must be called
with BH disabled. However, this is fine in the general NAPI use case
where the local BH is disabled. But, it's also called from the netlink
context. The below patch makes sure that even in the netlink path, the
BH is disabled.

In addition, u64_stats_update_begin() requires a lock to ensure one writer
which is not ensured here. Making it per-CPU and disabling NAPI (softirq)
ensures that there is always only one writer.

Fixes: eac87c4 ("net: openvswitch: reorder masks array based on usage")
Reported-by: Juri Lelli <jlelli@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://lore.kernel.org/r/160295903253.7789.826736662555102345.stgit@ebuild
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Eelco Chaudron authored and Jakub Kicinski committed Oct 18, 2020
1 parent f355a55 commit f981fc3
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 25 deletions.
58 changes: 35 additions & 23 deletions net/openvswitch/flow_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ static struct table_instance *table_instance_alloc(int new_size)

static void __mask_array_destroy(struct mask_array *ma)
{
free_percpu(ma->masks_usage_cntr);
free_percpu(ma->masks_usage_stats);
kfree(ma);
}

Expand All @@ -199,15 +199,15 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma)
ma->masks_usage_zero_cntr[i] = 0;

for_each_possible_cpu(cpu) {
u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
cpu);
struct mask_array_stats *stats;
unsigned int start;
u64 counter;

stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
do {
start = u64_stats_fetch_begin_irq(&ma->syncp);
counter = usage_counters[i];
} while (u64_stats_fetch_retry_irq(&ma->syncp, start));
start = u64_stats_fetch_begin_irq(&stats->syncp);
counter = stats->usage_cntrs[i];
} while (u64_stats_fetch_retry_irq(&stats->syncp, start));

ma->masks_usage_zero_cntr[i] += counter;
}
Expand All @@ -230,9 +230,10 @@ static struct mask_array *tbl_mask_array_alloc(int size)
sizeof(struct sw_flow_mask *) *
size);

new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
__alignof__(u64));
if (!new->masks_usage_cntr) {
new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
sizeof(u64) * size,
__alignof__(u64));
if (!new->masks_usage_stats) {
kfree(new);
return NULL;
}
Expand Down Expand Up @@ -722,6 +723,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,

/* Flow lookup does full lookup on flow table. It starts with
* mask from index passed in *index.
* This function MUST be called with BH disabled due to the use
* of CPU specific variables.
*/
static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct table_instance *ti,
Expand All @@ -731,7 +734,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
u32 *n_cache_hit,
u32 *index)
{
u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
struct sw_flow *flow;
struct sw_flow_mask *mask;
int i;
Expand All @@ -741,9 +744,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) {
u64_stats_update_begin(&ma->syncp);
usage_counters[*index]++;
u64_stats_update_end(&ma->syncp);
u64_stats_update_begin(&stats->syncp);
stats->usage_cntrs[*index]++;
u64_stats_update_end(&stats->syncp);
(*n_cache_hit)++;
return flow;
}
Expand All @@ -762,9 +765,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
*index = i;
u64_stats_update_begin(&ma->syncp);
usage_counters[*index]++;
u64_stats_update_end(&ma->syncp);
u64_stats_update_begin(&stats->syncp);
stats->usage_cntrs[*index]++;
u64_stats_update_end(&stats->syncp);
return flow;
}
}
Expand Down Expand Up @@ -850,9 +853,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit;
u32 __always_unused n_cache_hit;
struct sw_flow *flow;
u32 index = 0;

return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
/* This function gets called trough the netlink interface and therefore
* is preemptible. However, flow_lookup() function needs to be called
* with BH disabled due to CPU specific variables.
*/
local_bh_disable();
flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
local_bh_enable();
return flow;
}

struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
Expand Down Expand Up @@ -1109,7 +1120,6 @@ void ovs_flow_masks_rebalance(struct flow_table *table)

for (i = 0; i < ma->max; i++) {
struct sw_flow_mask *mask;
unsigned int start;
int cpu;

mask = rcu_dereference_ovsl(ma->masks[i]);
Expand All @@ -1120,14 +1130,16 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
masks_and_count[i].counter = 0;

for_each_possible_cpu(cpu) {
u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
cpu);
struct mask_array_stats *stats;
unsigned int start;
u64 counter;

stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
do {
start = u64_stats_fetch_begin_irq(&ma->syncp);
counter = usage_counters[i];
} while (u64_stats_fetch_retry_irq(&ma->syncp, start));
start = u64_stats_fetch_begin_irq(&stats->syncp);
counter = stats->usage_cntrs[i];
} while (u64_stats_fetch_retry_irq(&stats->syncp,
start));

masks_and_count[i].counter += counter;
}
Expand Down
8 changes: 6 additions & 2 deletions net/openvswitch/flow_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,16 @@ struct mask_count {
u64 counter;
};

struct mask_array_stats {
struct u64_stats_sync syncp;
u64 usage_cntrs[];
};

struct mask_array {
struct rcu_head rcu;
int count, max;
u64 __percpu *masks_usage_cntr;
struct mask_array_stats __percpu *masks_usage_stats;
u64 *masks_usage_zero_cntr;
struct u64_stats_sync syncp;
struct sw_flow_mask __rcu *masks[];
};

Expand Down

0 comments on commit f981fc3

Please sign in to comment.