Skip to content

Commit

Permalink
cxl: Enable global TLBIs for cxl contexts
Browse files Browse the repository at this point in the history
The PSL and nMMU need to see all TLB invalidations for the memory
contexts used on the adapter. For the hash memory model, it is done by
making all TLBIs global as soon as the cxl driver is in use. For
radix, we need something similar, but we can refine and only convert
to global the invalidations for contexts actually used by the device.

The new mm_context_add_copro() API increments the 'active_cpus' count
for the contexts attached to the cxl adapter. As soon as there's more
than 1 active cpu, the TLBIs for the context become global. Active cpu
count must be decremented when detaching to restore locality if
possible and to avoid overflowing the counter.

The hash memory model support is somewhat limited, as we can't
decrement the active cpus count when mm_context_remove_copro() is
called, because we can't flush the TLB for a mm on hash. So TLBIs
remain global on hash.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Fixes: f24be42 ("cxl: Add psl9 specific code")
Tested-by: Alistair Popple <alistair@popple.id.au>
[mpe: Fold in updated comment on the barrier from Fred]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Frederic Barrat authored and Michael Ellerman committed Sep 28, 2017
1 parent 6110236 commit 03b8abe
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 14 deletions.
46 changes: 46 additions & 0 deletions arch/powerpc/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,52 @@ extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);

#ifdef CONFIG_PPC_BOOK3S_64
static inline void inc_mm_active_cpus(struct mm_struct *mm)
{
atomic_inc(&mm->context.active_cpus);
}

static inline void dec_mm_active_cpus(struct mm_struct *mm)
{
atomic_dec(&mm->context.active_cpus);
}

static inline void mm_context_add_copro(struct mm_struct *mm)
{
/*
* On hash, should only be called once over the lifetime of
* the context, as we can't decrement the active cpus count
* and flush properly for the time being.
*/
inc_mm_active_cpus(mm);
}

static inline void mm_context_remove_copro(struct mm_struct *mm)
{
/*
* Need to broadcast a global flush of the full mm before
* decrementing active_cpus count, as the next TLBI may be
* local and the nMMU and/or PSL need to be cleaned up.
* Should be rare enough so that it's acceptable.
*
* Skip on hash, as we don't know how to do the proper flush
* for the time being. Invalidations will remain global if
* used on hash.
*/
if (radix_enabled()) {
flush_all_mm(mm);
dec_mm_active_cpus(mm);
}
}
#else
static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
static inline void mm_context_add_copro(struct mm_struct *mm) { }
static inline void mm_context_remove_copro(struct mm_struct *mm) { }
#endif


extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);

Expand Down
9 changes: 0 additions & 9 deletions arch/powerpc/mm/mmu_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
struct mm_struct *mm) { }
#endif

#ifdef CONFIG_PPC_BOOK3S_64
static inline void inc_mm_active_cpus(struct mm_struct *mm)
{
atomic_inc(&mm->context.active_cpus);
}
#else
static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
#endif

void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
Expand Down
16 changes: 13 additions & 3 deletions drivers/misc/cxl/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/sched/mm.h>
#include <linux/mmu_context.h>

#include "cxl.h"

Expand Down Expand Up @@ -331,9 +332,12 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
/* ensure this mm_struct can't be freed */
cxl_context_mm_count_get(ctx);

/* decrement the use count */
if (ctx->mm)
if (ctx->mm) {
/* decrement the use count from above */
mmput(ctx->mm);
/* make TLBIs for this context global */
mm_context_add_copro(ctx->mm);
}
}

/*
Expand All @@ -342,13 +346,19 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
*/
cxl_ctx_get();

/* See the comment in afu_ioctl_start_work() */
smp_mb();

if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
put_pid(ctx->pid);
ctx->pid = NULL;
cxl_adapter_context_put(ctx->afu->adapter);
cxl_ctx_put();
if (task)
if (task) {
cxl_context_mm_count_put(ctx);
if (ctx->mm)
mm_context_remove_copro(ctx->mm);
}
goto out;
}

Expand Down
3 changes: 3 additions & 0 deletions drivers/misc/cxl/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/sched/mm.h>
#include <linux/mmu_context.h>
#include <asm/cputable.h>
#include <asm/current.h>
#include <asm/copro.h>
Expand Down Expand Up @@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)

/* Decrease the mm count on the context */
cxl_context_mm_count_put(ctx);
if (ctx->mm)
mm_context_remove_copro(ctx->mm);
ctx->mm = NULL;

return 0;
Expand Down
24 changes: 22 additions & 2 deletions drivers/misc/cxl/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/mmu_context.h>
#include <asm/cputable.h>
#include <asm/current.h>
#include <asm/copro.h>
Expand Down Expand Up @@ -220,16 +221,33 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
/* ensure this mm_struct can't be freed */
cxl_context_mm_count_get(ctx);

/* decrement the use count */
if (ctx->mm)
if (ctx->mm) {
/* decrement the use count from above */
mmput(ctx->mm);
/* make TLBIs for this context global */
mm_context_add_copro(ctx->mm);
}

/*
* Increment driver use count. Enables global TLBIs for hash
* and callbacks to handle the segment table
*/
cxl_ctx_get();

/*
* A barrier is needed to make sure all TLBIs are global
* before we attach and the context starts being used by the
* adapter.
*
* Needed after mm_context_add_copro() for radix and
* cxl_ctx_get() for hash/p8.
*
* The barrier should really be mb(), since it involves a
* device. However, it's only useful when we have local
* vs. global TLBIs, i.e SMP=y. So keep smp_mb().
*/
smp_mb();

trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);

if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
Expand All @@ -240,6 +258,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
ctx->pid = NULL;
cxl_ctx_put();
cxl_context_mm_count_put(ctx);
if (ctx->mm)
mm_context_remove_copro(ctx->mm);
goto out;
}

Expand Down

0 comments on commit 03b8abe

Please sign in to comment.