Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.3-2023-01-13' of https://gitlab.freedesktop…
Browse files Browse the repository at this point in the history
….org/agd5f/linux into drm-next

amd-drm-next-6.3-2023-01-13:

amdgpu:
- Fix possible segfault in failure case
- Rework FW requests to happen in early_init for all IPs so
  that we don't lose the sbios console if FW is missing
- PSR fixes
- Misc cleanups
- Unload fix
- SMU13 fixes

amdkfd:
- Fix for cleared VRAM BOs
- Fix cleanup if GPUVM creation fails
- Memory accounting fix
- Use resource_size rather than open codeing it
- GC11 mGPU fix

radeon:
- Fix memory leak on shutdown

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230113225911.7776-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Jan 16, 2023
2 parents 45be204 + 0c2dece commit 1f1c24d
Show file tree
Hide file tree
Showing 67 changed files with 997 additions and 1,591 deletions.
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {

struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
int64_t vram_used;
uint64_t vram_used_aligned;
bool init_complete;
struct work_struct reset_work;
Expand Down Expand Up @@ -271,9 +271,9 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
((struct drm_file *)(drm_priv))->driver_priv)->vm)

int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
struct file *filp, u32 pasid);
struct amdgpu_vm *avm, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct file *filp,
struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
Expand Down
32 changes: 10 additions & 22 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1431,18 +1431,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
}

int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
struct file *filp, u32 pasid)
struct amdgpu_vm *avm, u32 pasid)

{
struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm;
int ret;

ret = amdgpu_file_to_fpriv(filp, &drv_priv);
if (ret)
return ret;
avm = &drv_priv->vm;

/* Free the original amdgpu allocated pasid,
* will be replaced with kfd allocated pasid.
*/
Expand All @@ -1459,19 +1452,12 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
}

int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct file *filp,
struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef)
{
struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm;
int ret;

ret = amdgpu_file_to_fpriv(filp, &drv_priv);
if (ret)
return ret;
avm = &drv_priv->vm;

/* Already a compute VM? */
if (avm->process_info)
return -EINVAL;
Expand Down Expand Up @@ -1613,6 +1599,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_bo *bo;
struct drm_gem_object *gobj = NULL;
u32 domain, alloc_domain;
uint64_t aligned_size;
u64 alloc_flags;
int ret;

Expand Down Expand Up @@ -1668,22 +1655,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* the memory.
*/
if ((*mem)->aql_queue)
size = size >> 1;
size >>= 1;
aligned_size = PAGE_ALIGN(size);

(*mem)->alloc_flags = flags;

amdgpu_sync_create(&(*mem)->sync);

ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
if (ret) {
pr_debug("Insufficient memory\n");
goto err_reserve_limit;
}

pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
va, size, domain_string(alloc_domain));
va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));

ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
bo_type, NULL, &gobj);
if (ret) {
pr_debug("Failed to create BO on domain %s. ret %d\n",
Expand Down Expand Up @@ -1740,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
Expand Down Expand Up @@ -2100,7 +2088,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
}

amdgpu_amdkfd_remove_eviction_fence(
bo, bo->kfd_bo->process_info->eviction_fence);
bo, bo->vm_bo->vm->process_info->eviction_fence);

amdgpu_bo_unreserve(bo);

Expand Down
11 changes: 2 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,17 +411,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
return -EINVAL;
}

err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err) {
DRM_ERROR("Failed to request firmware\n");
return err;
}

err = amdgpu_ucode_validate(adev->pm.fw);
err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name);
if (err) {
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
release_firmware(adev->pm.fw);
adev->pm.fw = NULL;
amdgpu_ucode_release(&adev->pm.fw);
return err;
}

Expand Down
22 changes: 11 additions & 11 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <generated/utsrelease.h>
#include <linux/pci-p2pdma.h>

#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
Expand Down Expand Up @@ -90,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_MAX_RETRY_LIMIT 2
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)

static const struct drm_driver amdgpu_kms_driver;

const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
Expand Down Expand Up @@ -1982,17 +1985,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
}

snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
if (err) {
dev_err(adev->dev,
"Failed to load gpu_info firmware \"%s\"\n",
fw_name);
goto out;
}
err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
if (err) {
dev_err(adev->dev,
"Failed to validate gpu_info firmware \"%s\"\n",
"Failed to get gpu_info firmware \"%s\"\n",
fw_name);
goto out;
}
Expand Down Expand Up @@ -3688,6 +3684,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;

/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
if (r)
return r;

/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);

Expand Down Expand Up @@ -4023,8 +4024,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)

amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
release_firmware(adev->firmware.gpu_info_fw);
adev->firmware.gpu_info_fw = NULL;
amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));

Expand Down
6 changes: 0 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
*/

#include <drm/amdgpu_drm.h>
#include <drm/drm_aperture.h>
#include <drm/drm_drv.h>
#include <drm/drm_fbdev_generic.h>
#include <drm/drm_gem.h>
Expand Down Expand Up @@ -2124,11 +2123,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
}
#endif

/* Get rid of things like offb */
ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
if (ret)
return ret;

adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
return PTR_ERR(adev);
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
goto unlock;
}

ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
TTM_BO_VM_NUM_PREFAULT);
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
TTM_BO_VM_NUM_PREFAULT);

drm_dev_exit(idx);
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
}
Expand Down
59 changes: 59 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
*
*/

#include <linux/firmware.h>

#include "amdgpu_mes.h"
#include "amdgpu.h"
#include "soc15_common.h"
Expand Down Expand Up @@ -1423,3 +1425,60 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
kfree(vm);
return 0;
}

int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
{
const struct mes_firmware_header_v1_0 *mes_hdr;
struct amdgpu_firmware_info *info;
char ucode_prefix[30];
char fw_name[40];
int r;

amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
ucode_prefix,
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
if (r)
goto out;

mes_hdr = (const struct mes_firmware_header_v1_0 *)
adev->mes.fw[pipe]->data;
adev->mes.uc_start_addr[pipe] =
le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
adev->mes.data_start_addr[pipe] =
le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);

if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
int ucode, ucode_data;

if (pipe == AMDGPU_MES_SCHED_PIPE) {
ucode = AMDGPU_UCODE_ID_CP_MES;
ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
} else {
ucode = AMDGPU_UCODE_ID_CP_MES1;
ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
}

info = &adev->firmware.ucode[ucode];
info->ucode_id = ucode;
info->fw = adev->mes.fw[pipe];
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
PAGE_SIZE);

info = &adev->firmware.ucode[ucode_data];
info->ucode_id = ucode_data;
info->fw = adev->mes.fw[pipe];
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
PAGE_SIZE);
}

return 0;
out:
amdgpu_ucode_release(&adev->mes.fw[pipe]);
return r;
}
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ struct amdgpu_mes_funcs {

int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);

int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
int amdgpu_mes_init(struct amdgpu_device *adev);
void amdgpu_mes_fini(struct amdgpu_device *adev);

Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,8 +470,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
return true;

fail:
DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
man->size);
if (man)
DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
man->size);
return false;
}

Expand Down
Loading

0 comments on commit 1f1c24d

Please sign in to comment.