Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.4-2023-04-14' of https://gitlab.freedesktop…
Browse files Browse the repository at this point in the history
….org/agd5f/linux into drm-next

amd-drm-next-6.4-2023-04-14:

amdgpu:
- S4 fixes for APUs
- GFX11 fixes
- Misc code cleanups
- DCN 3.2 fixes
- DCN 3.1.4 fixes
- FPO/FAMS work to improve display power savings
- DP fixes
- UMC 8.10 code cleanup
- SDMA v4 fix
- GPU clock counter fixes
- SMU 13 fixes
- Sdma v6 invalidation fix for preemption
- RAS fixes
- S0ix fix
- GC 9.4.3 updates

amdkfd:
- Fix user pointers with IOMMU
- Fix coherency flag handling

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230414204609.7942-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Apr 17, 2023
2 parents afa351a + 541372b commit e82c98f
Show file tree
Hide file tree
Showing 107 changed files with 2,118 additions and 730 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ amdgpu-y += \
gfx_v9_0.o \
gfx_v9_4.o \
gfx_v9_4_2.o \
gfx_v9_4_3.o \
gfx_v10_0.o \
imu_v11_0.o \
gfx_v11_0.o \
Expand Down
5 changes: 2 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
extern uint amdgpu_force_long_training;
extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
Expand Down Expand Up @@ -471,7 +470,7 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
/*
* Writeback
*/
#define AMDGPU_MAX_WB 256 /* Reserve at most 256 WB slots for amdgpu-owned rings. */
#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */

struct amdgpu_wb {
struct amdgpu_bo *wb_obj;
Expand Down Expand Up @@ -1222,7 +1221,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
#define amdgpu_asic_invalidate_hdp(adev, r) \
((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \
((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : 0))
((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
Expand Down
7 changes: 6 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
*/
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
if ((adev->flags & AMD_IS_APU) &&
adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
return false;

if ((adev->flags & AMD_IS_APU) &&
amdgpu_acpi_is_s3_active(adev))
return false;

if (amdgpu_sriov_vf(adev))
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
size_t *start_offset)
{
/*
* The first num_doorbells are used by amdgpu.
* The first num_kernel_doorbells are used by amdgpu.
* amdkfd takes whatever's left in the aperture.
*/
if (adev->enable_mes) {
Expand All @@ -109,11 +109,11 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
*aperture_base = adev->doorbell.base;
*aperture_size = 0;
*start_offset = 0;
} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
} else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
sizeof(u32)) {
*aperture_base = adev->doorbell.base;
*aperture_size = adev->doorbell.size;
*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
} else {
*aperture_base = 0;
*aperture_size = 0;
Expand Down
42 changes: 31 additions & 11 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,25 @@ static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
return false;
}

/**
* reuse_dmamap() - Check whether adev can share the original
* userptr BO
*
* If both adev and bo_adev are in direct mapping or
* in the same iommu group, they can share the original BO.
*
* @adev: Device to which can or cannot share the original BO
* @bo_adev: Device to which allocated BO belongs to
*
* Return: returns true if adev can share original userptr BO,
* false otherwise.
*/
static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
{
return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
(adev->dev->iommu_group == bo_adev->dev->iommu_group);
}

/* Set memory usage limits. Current, limits are
* System (TTM + userptr) memory - 15/16th System RAM
* TTM memory - 3/8th System RAM
Expand Down Expand Up @@ -253,15 +272,19 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
struct kgd_mem *mem, struct amdgpu_bo **bo_out)
{
struct drm_gem_object *gem_obj;
int ret, align;
int ret;
uint64_t flags = 0;

ret = amdgpu_bo_reserve(mem->bo, false);
if (ret)
return ret;

align = 1;
ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED);

ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);

amdgpu_bo_unreserve(mem->bo);
Expand Down Expand Up @@ -481,9 +504,6 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
if (unlikely(ret))
goto release_sg;

drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
ttm->num_pages);

amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
Expand Down Expand Up @@ -805,11 +825,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
va + bo_size, vm);

if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
same_hive) {
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
same_hive) {
/* Mappings on the local GPU, or VRAM mappings in the
* local hive, or userptr mapping IOMMU direct map mode
* share the original BO
* local hive, or userptr mapping can reuse dma map
* address space share the original BO
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
bo[i] = mem->bo;
Expand Down
34 changes: 18 additions & 16 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
if (amdgpu_device_skip_hw_access(adev))
return 0;

if (index < adev->doorbell.num_doorbells) {
if (index < adev->doorbell.num_kernel_doorbells) {
return readl(adev->doorbell.ptr + index);
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
Expand All @@ -625,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
if (amdgpu_device_skip_hw_access(adev))
return;

if (index < adev->doorbell.num_doorbells) {
if (index < adev->doorbell.num_kernel_doorbells) {
writel(v, adev->doorbell.ptr + index);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
Expand All @@ -646,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
if (amdgpu_device_skip_hw_access(adev))
return 0;

if (index < adev->doorbell.num_doorbells) {
if (index < adev->doorbell.num_kernel_doorbells) {
return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
} else {
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
Expand All @@ -669,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
if (amdgpu_device_skip_hw_access(adev))
return;

if (index < adev->doorbell.num_doorbells) {
if (index < adev->doorbell.num_kernel_doorbells) {
atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
} else {
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
Expand Down Expand Up @@ -1060,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
if (adev->asic_type < CHIP_BONAIRE) {
adev->doorbell.base = 0;
adev->doorbell.size = 0;
adev->doorbell.num_doorbells = 0;
adev->doorbell.num_kernel_doorbells = 0;
adev->doorbell.ptr = NULL;
return 0;
}
Expand All @@ -1075,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
adev->doorbell.size = pci_resource_len(adev->pdev, 2);

if (adev->enable_mes) {
adev->doorbell.num_doorbells =
adev->doorbell.num_kernel_doorbells =
adev->doorbell.size / sizeof(u32);
} else {
adev->doorbell.num_doorbells =
adev->doorbell.num_kernel_doorbells =
min_t(u32, adev->doorbell.size / sizeof(u32),
adev->doorbell_index.max_assignment+1);
if (adev->doorbell.num_doorbells == 0)
if (adev->doorbell.num_kernel_doorbells == 0)
return -EINVAL;

/* For Vega, reserve and map two pages on doorbell BAR since SDMA
* paging queue doorbell use the second page. The
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
* doorbells are in the first page. So with paging queue enabled,
* the max num_doorbells should + 1 page (0x400 in dword)
* the max num_kernel_doorbells should + 1 page (0x400 in dword)
*/
if (adev->asic_type >= CHIP_VEGA10)
adev->doorbell.num_doorbells += 0x400;
adev->doorbell.num_kernel_doorbells += 0x400;
}

adev->doorbell.ptr = ioremap(adev->doorbell.base,
adev->doorbell.num_doorbells *
adev->doorbell.num_kernel_doorbells *
sizeof(u32));
if (adev->doorbell.ptr == NULL)
return -ENOMEM;
Expand Down Expand Up @@ -2184,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}

amdgpu_amdkfd_device_probe(adev);

adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
Expand Down Expand Up @@ -2240,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (!total)
return -ENODEV;

amdgpu_amdkfd_device_probe(adev);
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;

Expand Down Expand Up @@ -2365,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
}

r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
ring->num_hw_submission, amdgpu_job_hang_limit,
ring->num_hw_submission, 0,
timeout, adev->reset_domain->wq,
ring->sched_score, ring->name,
adev->dev);
Expand Down Expand Up @@ -3305,9 +3305,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
{
int r;

r = amdgpu_amdkfd_resume_iommu(adev);
if (r)
return r;
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume_iommu(adev);
if (r)
return r;
}

r = amdgpu_device_ip_resume_phase1(adev);
if (r)
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -1502,6 +1502,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
break;
case IP_VERSION(10, 1, 10):
Expand Down
8 changes: 7 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
*
*/

#ifndef AMDGPU_DOORBELL_H
#define AMDGPU_DOORBELL_H

/*
* GPU doorbell structures, functions & helpers
*/
Expand All @@ -29,7 +32,9 @@ struct amdgpu_doorbell {
resource_size_t base;
resource_size_t size;
u32 __iomem *ptr;
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */

/* Number of doorbells reserved for amdgpu kernel driver */
u32 num_kernel_doorbells;
};

/* Reserved doorbells for amdgpu (including multimedia).
Expand Down Expand Up @@ -306,3 +311,4 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))

#endif
8 changes: 0 additions & 8 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ char *amdgpu_virtual_display;
*/
uint amdgpu_pp_feature_mask = 0xfff7bfff;
uint amdgpu_force_long_training;
int amdgpu_job_hang_limit;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
Expand Down Expand Up @@ -520,13 +519,6 @@ MODULE_PARM_DESC(virtual_display,
"Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);

/**
* DOC: job_hang_limit (int)
* Set how much time allow a job hang and not drop it. The default is 0.
*/
MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);

/**
* DOC: lbpw (int)
* Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.kiq;
ring->vm_hub = AMDGPU_GFXHUB_0;

r = amdgpu_gfx_kiq_acquire(adev, ring);
if (r)
Expand Down
17 changes: 16 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L

#define AMDGPU_MAX_GC_INSTANCES 8

#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES

Expand All @@ -53,6 +55,15 @@ enum amdgpu_gfx_pipe_priority {
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15

enum amdgpu_gfx_partition {
AMDGPU_SPX_PARTITION_MODE = 0,
AMDGPU_DPX_PARTITION_MODE = 1,
AMDGPU_TPX_PARTITION_MODE = 2,
AMDGPU_QPX_PARTITION_MODE = 3,
AMDGPU_CPX_PARTITION_MODE = 4,
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE,
};

struct amdgpu_mec {
struct amdgpu_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
Expand Down Expand Up @@ -323,7 +334,7 @@ struct amdgpu_gfx {
bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
Expand Down Expand Up @@ -364,6 +375,10 @@ struct amdgpu_gfx {

struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux muxer;

enum amdgpu_gfx_partition partition_mode;
uint32_t num_xcd;
uint32_t num_xcc_per_xcp;
};

#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
Expand Down
Loading

0 comments on commit e82c98f

Please sign in to comment.