Skip to content

Commit

Permalink
drm/xe: Add range based TLB invalidations
Browse files Browse the repository at this point in the history
If the platform supports range based TLB invalidations use them. Hide
these details in the xe_gt_tlb_invalidation layer.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
  • Loading branch information
Matthew Brost authored and Rodrigo Vivi committed Dec 19, 2023
1 parent 9d25e28 commit 332dd01
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 25 deletions.
7 changes: 1 addition & 6 deletions drivers/gpu/drm/xe/xe_gt_pagefault.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
goto retry_userptr;

if (!ret) {
/*
* FIXME: Doing a full TLB invalidation for now, likely could
* defer TLB invalidate + fault response to a callback of fence
* too
*/
ret = xe_gt_tlb_invalidation(gt, NULL);
ret = xe_gt_tlb_invalidation(gt, NULL, vma);
if (ret >= 0)
ret = 0;
}
Expand Down
87 changes: 73 additions & 14 deletions drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,10 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
}

static int send_tlb_invalidation(struct xe_guc *guc,
struct xe_gt_tlb_invalidation_fence *fence)
struct xe_gt_tlb_invalidation_fence *fence,
u32 *action, int len)
{
struct xe_gt *gt = guc_to_gt(guc);
u32 action[] = {
XE_GUC_ACTION_TLB_INVALIDATION,
0,
XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
XE_GUC_TLB_INVAL_FLUSH_CACHE,
};
int seqno;
int ret;
bool queue_work;
Expand All @@ -125,7 +119,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
TLB_INVALIDATION_SEQNO_MAX;
if (!gt->tlb_invalidation.seqno)
gt->tlb_invalidation.seqno = 1;
ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
G2H_LEN_DW_TLB_INVALIDATE, 1);
if (!ret && fence) {
fence->invalidation_time = ktime_get();
Expand All @@ -146,18 +140,83 @@ static int send_tlb_invalidation(struct xe_guc *guc,
* @gt: graphics tile
* @fence: invalidation fence which will be signal on TLB invalidation
* completion, can be NULL
* @vma: VMA to invalidate
*
* Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous
* and caller can either use the invalidation fence or seqno +
* xe_gt_tlb_invalidation_wait to wait for completion.
* Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
* completion.
*
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
* negative error code on error.
*/
int xe_gt_tlb_invalidation(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence)
struct xe_gt_tlb_invalidation_fence *fence,
struct xe_vma *vma)
{
return send_tlb_invalidation(&gt->uc.guc, fence);
struct xe_device *xe = gt_to_xe(gt);
#define MAX_TLB_INVALIDATION_LEN 7
u32 action[MAX_TLB_INVALIDATION_LEN];
int len = 0;

XE_BUG_ON(!vma);

if (!xe->info.has_range_tlb_invalidation) {
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
XE_GUC_TLB_INVAL_FLUSH_CACHE)
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
} else {
u64 start = vma->start;
u64 length = vma->end - vma->start + 1;
u64 align, end;

if (length < SZ_4K)
length = SZ_4K;

/*
* We need to invalidate a higher granularity if start address
* is not aligned to length. When start is not aligned with
* length we need to find the length large enough to create an
* address mask covering the required range.
*/
align = roundup_pow_of_two(length);
start = ALIGN_DOWN(vma->start, align);
end = ALIGN(vma->start + length, align);
length = align;
while (start + length < end) {
length <<= 1;
start = ALIGN_DOWN(vma->start, length);
}

/*
* Minimum invalidation size for a 2MB page that the hardware
* expects is 16MB
*/
if (length >= SZ_2M) {
length = max_t(u64, SZ_16M, length);
start = ALIGN_DOWN(vma->start, length);
}

XE_BUG_ON(length < SZ_4K);
XE_BUG_ON(!is_power_of_2(length));
XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
XE_BUG_ON(!IS_ALIGNED(start, length));

action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
action[len++] = vma->vm->usm.asid;
action[len++] = lower_32_bits(start);
action[len++] = upper_32_bits(start);
action[len++] = ilog2(length) - ilog2(SZ_4K);
}

XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);

return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
}

static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
Expand Down
4 changes: 3 additions & 1 deletion drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@

struct xe_gt;
struct xe_guc;
struct xe_vma;

int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
int xe_gt_tlb_invalidation(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence);
struct xe_gt_tlb_invalidation_fence *fence,
struct xe_vma *vma);
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);

Expand Down
9 changes: 6 additions & 3 deletions drivers/gpu/drm/xe/xe_pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
struct invalidation_fence {
struct xe_gt_tlb_invalidation_fence base;
struct xe_gt *gt;
struct xe_vma *vma;
struct dma_fence *fence;
struct dma_fence_cb cb;
struct work_struct work;
Expand Down Expand Up @@ -1505,12 +1506,13 @@ static void invalidation_fence_work_func(struct work_struct *w)
container_of(w, struct invalidation_fence, work);

trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
xe_gt_tlb_invalidation(ifence->gt, &ifence->base);
xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma);
}

static int invalidation_fence_init(struct xe_gt *gt,
struct invalidation_fence *ifence,
struct dma_fence *fence)
struct dma_fence *fence,
struct xe_vma *vma)
{
int ret;

Expand All @@ -1528,6 +1530,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
dma_fence_get(&ifence->base.base); /* Ref for caller */
ifence->fence = fence;
ifence->gt = gt;
ifence->vma = vma;

INIT_WORK(&ifence->work, invalidation_fence_work_func);
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
Expand Down Expand Up @@ -1614,7 +1617,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
int err;

/* TLB invalidation must be done before signaling unbind */
err = invalidation_fence_init(gt, ifence, fence);
err = invalidation_fence_init(gt, ifence, fence, vma);
if (err) {
dma_fence_put(fence);
kfree(ifence);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/xe/xe_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -3349,7 +3349,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
if (xe_pt_zap_ptes(gt, vma)) {
gt_needs_invalidate |= BIT(id);
xe_device_wmb(xe);
seqno[id] = xe_gt_tlb_invalidation(gt, NULL);
seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma);
if (seqno[id] < 0)
return seqno[id];
}
Expand Down

0 comments on commit 332dd01

Please sign in to comment.