Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.15-2025-03-21' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-6.15-2025-03-21:

amdgpu:
- Refine nomodeset handling
- RAS fixes
- DCN 3.x fixes
- DMUB fixes
- eDP fixes
- SMU 14.0.2 fixes
- SMU 13.0.6 fixes
- SMU 13.0.12 fixes
- SDMA engine reset fixes
- Enforce Isolation fixes
- Runtime workload profile ref count fixes
- Documentation fixes
- SR-IOV fixes
- MES fixes
- GC 11.5 cleaner shader support
- SDMA VM invalidation fixes
- IP discovery improvements for GC based chips

amdkfd:
- Dequeue wait count fixes
- Precise memops fixes

radeon:
- Code cleanup

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250321210909.2809595-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Mar 24, 2025
2 parents f72e21e + 7547510 commit a82866f
Show file tree
Hide file tree
Showing 73 changed files with 1,248 additions and 1,052 deletions.
13 changes: 11 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -1194,9 +1194,15 @@ struct amdgpu_device {
bool debug_exp_resets;
bool debug_disable_gpu_ring_reset;

bool enforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and compute processes */
/* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
bool enforce_isolation[MAX_XCP];
struct amdgpu_isolation {
void *owner;
struct dma_fence *spearhead;
struct amdgpu_sync active;
struct amdgpu_sync prev;
} isolation[MAX_XCP];

struct amdgpu_init_level *init_lvl;

Expand Down Expand Up @@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
Expand Down
25 changes: 23 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
{
struct aca_bank_node *node;
struct aca_bank *bank;
int r;

if (!adev->cper.enabled)
return;
Expand All @@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,

/* UEs must be encoded into separate CPER entries */
if (type == ACA_SMU_TYPE_UE) {
struct aca_banks de_banks;

aca_banks_init(&de_banks);
list_for_each_entry(node, &banks->list, node) {
bank = &node->bank;
if (amdgpu_cper_generate_ue_record(adev, bank))
dev_warn(adev->dev, "fail to generate ue cper records\n");
if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
r = aca_banks_add_bank(&de_banks, bank);
if (r)
dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
} else {
if (amdgpu_cper_generate_ue_record(adev, bank))
dev_warn(adev->dev, "fail to generate ue cper records\n");
}
}

if (!list_empty(&de_banks.list)) {
if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
dev_warn(adev->dev, "fail to generate de cper records\n");
}

aca_banks_release(&de_banks);
} else {
/*
* SMU_TYPE_CE banks are combined into 1 CPER entries,
Expand Down Expand Up @@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
if (ret)
return ret;

/* DEs may contain in CEs or UEs */
if (type != ACA_ERROR_TYPE_DEFERRED)
aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);

return aca_log_aca_error(handle, type, err_data);
}

Expand Down
16 changes: 11 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,17 @@ struct ras_query_context;
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */

#define ACA_BANK_ERR_CE_DE_DECODE(bank) \
((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \
ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE)
#define ACA_BANK_ERR_IS_DEFFERED(bank) \
(ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))

#define ACA_BANK_ERR_CE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE)

#define ACA_BANK_ERR_UE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_UE)

enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
Expand Down
8 changes: 4 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;

return amdgpu_sync_fence(sync, vm->last_update);
return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}

static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
Expand Down Expand Up @@ -1249,7 +1249,7 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem,

(void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);

(void)amdgpu_sync_fence(sync, bo_va->last_pt_update);
(void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);

return 0;
}
Expand All @@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}

return amdgpu_sync_fence(sync, bo_va->last_pt_update);
return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}

static int map_bo_to_gpuvm(struct kgd_mem *mem,
Expand Down Expand Up @@ -2913,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
}
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
ret = amdgpu_sync_fence(&sync_obj, fence);
ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
Expand Down
15 changes: 8 additions & 7 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,10 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
return umin(rec_len, chunk);
}

void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
void *src, int count)
void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
{
u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
int rec_cnt_dw = count >> 2;
u32 chunk, ent_sz;
u8 *s = (u8 *)src;

Expand All @@ -485,6 +485,9 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
s += chunk;
}

if (ring->count_dw < rec_cnt_dw)
ring->count_dw = 0;

/* the buffer is overflow, adjust rptr */
if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
Expand All @@ -501,12 +504,10 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
pos = rptr;
} while (!amdgpu_cper_is_hdr(ring, rptr));
}
mutex_unlock(&ring->adev->cper.ring_lock);

if (ring->count_dw >= (count >> 2))
ring->count_dw -= (count >> 2);
else
ring->count_dw = 0;
if (ring->count_dw >= rec_cnt_dw)
ring->count_dw -= rec_cnt_dw;
mutex_unlock(&ring->adev->cper.ring_lock);
}

static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
Expand Down
20 changes: 12 additions & 8 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}

r = amdgpu_sync_fence(&p->sync, fence);
r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
Expand All @@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
return r;
}

r = amdgpu_sync_fence(&p->sync, fence);
r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
return r;
}
Expand Down Expand Up @@ -1111,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
struct drm_gpu_scheduler *sched = entity->rq->sched;
struct amdgpu_ring *ring = to_amdgpu_ring(sched);

if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub))
if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL;
}
}
Expand All @@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;

r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
GFP_KERNEL);
if (r)
return r;

Expand All @@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;

r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
GFP_KERNEL);
if (r)
return r;
}
Expand All @@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;

r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
GFP_KERNEL);
if (r)
return r;
}
Expand All @@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;

r = amdgpu_sync_fence(&p->sync, vm->last_update);
r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r)
return r;

Expand Down Expand Up @@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
continue;
}

r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
Expand Down
Loading

0 comments on commit a82866f

Please sign in to comment.