Skip to content

Commit

Permalink
IB/hfi1: Add sysfs interface for affinity setup
Browse files Browse the repository at this point in the history
Some users want more control over which cpu cores are being used by the
driver. For example, users might want to restrict the driver to some
specified subset of the cores so that they can appropriately partition
processes, irq handlers, and work threads.
To allow the user to fine tune system affinity settings new sysfs
attributes are introduced per sdma engine.  This patch adds a new
attribute type for sdma engine and a new cpu_list attribute.
When the user writes a cpu range to the cpu_list attribute the driver
will create an internal cpu->sdma map, which will be used later as a
look-up table to choose an optimal engine for a user requests.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Reviewed-by: Jianxin Xiong <jianxin.xiong@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
  • Loading branch information
Tadeusz Struk authored and Doug Ledford committed Oct 2, 2016
1 parent 3a6982d commit 0cb2aa6
Show file tree
Hide file tree
Showing 5 changed files with 412 additions and 7 deletions.
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/hfi1/hfi.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
#include <rdma/ib_hdrs.h>
#include <linux/rhashtable.h>
#include <rdma/rdma_vt.h>

#include "chip_registers.h"
Expand Down Expand Up @@ -1174,6 +1175,7 @@ struct hfi1_devdata {
atomic_t aspm_disabled_cnt;

struct hfi1_affinity *affinity;
struct rhashtable sdma_rht;
struct kobject kobj;
};

Expand Down
310 changes: 310 additions & 0 deletions drivers/infiniband/hw/hfi1/sdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,34 @@ u16 sdma_get_descq_cnt(void)
return count;
}

/**
* sdma_engine_get_vl() - return vl for a given sdma engine
* @sde: sdma engine
*
* This function returns the vl mapped to a given engine, or an error if
* the mapping can't be found. The mapping fields are protected by RCU.
*/
int sdma_engine_get_vl(struct sdma_engine *sde)
{
struct hfi1_devdata *dd = sde->dd;
struct sdma_vl_map *m;
u8 vl;

if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
return -EINVAL;

rcu_read_lock();
m = rcu_dereference(dd->sdma_map);
if (unlikely(!m)) {
rcu_read_unlock();
return -EINVAL;
}
vl = m->engine_to_vl[sde->this_idx];
rcu_read_unlock();

return vl;
}

/**
* sdma_select_engine_vl() - select sdma engine
* @dd: devdata
Expand Down Expand Up @@ -788,6 +816,283 @@ struct sdma_engine *sdma_select_engine_sc(
return sdma_select_engine_vl(dd, selector, vl);
}

struct sdma_rht_map_elem {
u32 mask;
u8 ctr;
struct sdma_engine *sde[0];
};

struct sdma_rht_node {
unsigned long cpu_id;
struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
struct rhash_head node;
};

#define NR_CPUS_HINT 192

static const struct rhashtable_params sdma_rht_params = {
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
.key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
};

/*
* sdma_select_user_engine() - select sdma engine based on user setup
* @dd: devdata
* @selector: a spreading factor
* @vl: this vl
*
* This function returns an sdma engine for a user sdma request.
* User defined sdma engine affinity setting is honored when applicable,
* otherwise system default sdma engine mapping is used. To ensure correct
* ordering, the mapping from <selector, vl> to sde must remain unchanged.
*/
struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
u32 selector, u8 vl)
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
const struct cpumask *current_mask = tsk_cpus_allowed(current);
unsigned long cpu_id;

/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
if (cpumask_weight(current_mask) != 1)
goto out;

cpu_id = smp_processor_id();
rcu_read_lock();
rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
sdma_rht_params);

if (rht_node && rht_node->map[vl]) {
struct sdma_rht_map_elem *map = rht_node->map[vl];

sde = map->sde[selector & map->mask];
}
rcu_read_unlock();

if (sde)
return sde;

out:
return sdma_select_engine_vl(dd, selector, vl);
}

static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
{
int i;

for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
map->sde[map->ctr + i] = map->sde[i];
}

static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
struct sdma_engine *sde)
{
unsigned int i, pow;

/* only need to check the first ctr entries for a match */
for (i = 0; i < map->ctr; i++) {
if (map->sde[i] == sde) {
memmove(&map->sde[i], &map->sde[i + 1],
(map->ctr - i - 1) * sizeof(map->sde[0]));
map->ctr--;
pow = roundup_pow_of_two(map->ctr ? : 1);
map->mask = pow - 1;
sdma_populate_sde_map(map);
break;
}
}
}

/*
* Prevents concurrent reads and writes of the sdma engine cpu_mask
*/
static DEFINE_MUTEX(process_to_sde_mutex);

ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
size_t count)
{
struct hfi1_devdata *dd = sde->dd;
cpumask_var_t mask, new_mask;
unsigned long cpu;
int ret, vl, sz;

vl = sdma_engine_get_vl(sde);
if (unlikely(vl < 0))
return -EINVAL;

ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
if (!ret)
return -ENOMEM;

ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
if (!ret) {
free_cpumask_var(mask);
return -ENOMEM;
}
ret = cpulist_parse(buf, mask);
if (ret)
goto out_free;

if (!cpumask_subset(mask, cpu_online_mask)) {
dd_dev_warn(sde->dd, "Invalid CPU mask\n");
ret = -EINVAL;
goto out_free;
}

sz = sizeof(struct sdma_rht_map_elem) +
(TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));

mutex_lock(&process_to_sde_mutex);

for_each_cpu(cpu, mask) {
struct sdma_rht_node *rht_node;

/* Check if we have this already mapped */
if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
cpumask_set_cpu(cpu, new_mask);
continue;
}

rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
if (!rht_node) {
ret = -ENOMEM;
goto out;
}

rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
if (!rht_node->map[vl]) {
kfree(rht_node);
ret = -ENOMEM;
goto out;
}
rht_node->cpu_id = cpu;
rht_node->map[vl]->mask = 0;
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;

ret = rhashtable_insert_fast(&dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
kfree(rht_node->map[vl]);
kfree(rht_node);
dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
cpu);
goto out;
}

} else {
int ctr, pow;

/* Add new user mappings */
if (!rht_node->map[vl])
rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);

if (!rht_node->map[vl]) {
ret = -ENOMEM;
goto out;
}

rht_node->map[vl]->ctr++;
ctr = rht_node->map[vl]->ctr;
rht_node->map[vl]->sde[ctr - 1] = sde;
pow = roundup_pow_of_two(ctr);
rht_node->map[vl]->mask = pow - 1;

/* Populate the sde map table */
sdma_populate_sde_map(rht_node->map[vl]);
}
cpumask_set_cpu(cpu, new_mask);
}

/* Clean up old mappings */
for_each_cpu(cpu, cpu_online_mask) {
struct sdma_rht_node *rht_node;

/* Don't cleanup sdes that are set in the new mask */
if (cpumask_test_cpu(cpu, mask))
continue;

rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
int i;

/* Remove mappings for old sde */
for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
if (rht_node->map[i])
sdma_cleanup_sde_map(rht_node->map[i],
sde);

/* Free empty hash table entries */
for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
if (!rht_node->map[i])
continue;

if (rht_node->map[i]->ctr) {
empty = false;
break;
}
}

if (empty) {
ret = rhashtable_remove_fast(&dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);

for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
kfree(rht_node->map[i]);

kfree(rht_node);
}
}
}

cpumask_copy(&sde->cpu_mask, new_mask);
out:
mutex_unlock(&process_to_sde_mutex);
out_free:
free_cpumask_var(mask);
free_cpumask_var(new_mask);
return ret ? : strnlen(buf, PAGE_SIZE);
}

ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
{
mutex_lock(&process_to_sde_mutex);
if (cpumask_empty(&sde->cpu_mask))
snprintf(buf, PAGE_SIZE, "%s\n", "empty");
else
cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
mutex_unlock(&process_to_sde_mutex);
return strnlen(buf, PAGE_SIZE);
}

static void sdma_rht_free(void *ptr, void *arg)
{
struct sdma_rht_node *rht_node = ptr;
int i;

for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
kfree(rht_node->map[i]);

kfree(rht_node);
}

/*
* Free the indicated map struct
*/
Expand Down Expand Up @@ -1161,6 +1466,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
goto bail;

if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
goto bail;

dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;

Expand Down Expand Up @@ -1252,6 +1561,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}

/*
Expand Down
8 changes: 8 additions & 0 deletions drivers/infiniband/hw/hfi1/sdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ struct sdma_engine {
spinlock_t flushlist_lock;
/* private: */
struct list_head flushlist;
struct cpumask cpu_mask;
struct kobject kobj;
};

int sdma_init(struct hfi1_devdata *dd, u8 port);
Expand Down Expand Up @@ -1059,6 +1061,12 @@ struct sdma_engine *sdma_select_engine_vl(
u32 selector,
u8 vl);

struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
u32 selector, u8 vl);
ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
size_t count);
int sdma_engine_get_vl(struct sdma_engine *sde);
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);

#ifdef CONFIG_SDMA_VERBOSITY
Expand Down
Loading

0 comments on commit 0cb2aa6

Please sign in to comment.