Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-5.16-2021-10-08' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-5.16-2021-10-08:

amdgpu:
- gart BO pin count fix
- RAS fixes
- Misc display fixes
- Misc code cleanups
- Validate IP discovery table
- IOMMU handling fixes for hotplug
- Cyan Skillfish display support
- DP 2.0 fixes
- Covert vega and navi to IP discovery based asic enumeration
- JPEG fixes
- More FP cleanup for display
- DCC fixes for DCN3.x
- Initial USB4 DP tunnelling support
- Aldebaran MCE support

amdkfd:
- Misc bug fixes
- Misc code cleanups
- RAS fixes

x86/MCE:
- Export symbol for use by GPU driver

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211008200345.4689-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Oct 11, 2021
2 parents 407baae + 40348ba commit b1f8166
Show file tree
Hide file tree
Showing 193 changed files with 40,630 additions and 1,867 deletions.
4 changes: 2 additions & 2 deletions Documentation/gpu/amdgpu.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,8 @@ pcie_replay_count
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
:doc: pcie_replay_count

+GPU SmartShift Information
============================
GPU SmartShift Information
==========================

GPU SmartShift information via sysfs

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ extern int mce_threshold_remove_device(unsigned int cpu);

void mce_amd_feature_init(struct cpuinfo_x86 *c);
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);

enum smca_bank_types smca_get_bank_type(unsigned int bank);
#else

static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/mce/amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);

static enum smca_bank_types smca_get_bank_type(unsigned int bank)
enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
struct smca_bank *b;

Expand All @@ -132,6 +132,7 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)

return b->hwid->bank_type;
}
EXPORT_SYMBOL_GPL(smca_get_bank_type);

static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype } */
Expand Down
11 changes: 9 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@ enum amd_hw_ip_block_type {
UVD_HWIP,
VCN_HWIP = UVD_HWIP,
JPEG_HWIP = VCN_HWIP,
VCN1_HWIP,
VCE_HWIP,
DF_HWIP,
DCE_HWIP,
Expand All @@ -755,11 +756,16 @@ enum amd_hw_ip_block_type {
CLK_HWIP,
UMC_HWIP,
RSMU_HWIP,
XGMI_HWIP,
DCI_HWIP,
MAX_HWIP
};

#define HWIP_MAX_INSTANCE 10

#define HW_ID_MAX 300
#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))

struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
Expand Down Expand Up @@ -830,6 +836,7 @@ struct amdgpu_device {
struct notifier_block acpi_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct debugfs_blob_wrapper debugfs_vbios_blob;
struct debugfs_blob_wrapper debugfs_discovery_blob;
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutex grbm_idx_mutex;
Expand Down Expand Up @@ -1078,17 +1085,17 @@ struct amdgpu_device {
char product_name[32];
char serial[20];

struct amdgpu_autodump autodump;

atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled;
uint32_t ras_enabled;

bool no_hw_access;
struct pci_saved_state *pci_state;
pci_channel_state_t pci_channel_state;

struct amdgpu_reset_control *reset_cntl;
uint32_t ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE];
};

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
Expand Down
17 changes: 15 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"

/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
Expand Down Expand Up @@ -70,8 +72,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
if (!kfd_initialized)
return;

adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
adev->pdev, adev->asic_type, vf);
adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, vf);

if (adev->kfd.dev)
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
Expand Down Expand Up @@ -780,3 +781,15 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)

return adev->have_atomics_support;
}

void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct ras_err_data err_data = {0, 0, 0, NULL};

/* CPU MCA will handle page retirement if connected_to_cpu is 1 */
if (!adev->gmc.xgmi.connected_to_cpu)
amdgpu_umc_process_ras_data_cb(adev, &err_data, NULL);
else
amdgpu_amdkfd_gpu_reset(kgd);
}
7 changes: 3 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
uint64_t *mmap_offset);
int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd);
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
Expand Down Expand Up @@ -321,8 +322,7 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
#if IS_ENABLED(CONFIG_HSA_AMD)
int kgd2kfd_init(void);
void kgd2kfd_exit(void);
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
unsigned int asic_type, bool vf);
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources);
Expand All @@ -346,8 +346,7 @@ static inline void kgd2kfd_exit(void)
}

static inline
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
unsigned int asic_type, bool vf)
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
{
return NULL;
}
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,

dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
kfree(ttm->sg);
ttm->sg = NULL;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static void amdgpu_bo_list_free(struct kref *ref)

int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries, struct amdgpu_bo_list **result)
size_t num_entries, struct amdgpu_bo_list **result)
{
unsigned last_entry = 0, first_userptr = num_entries;
struct amdgpu_bo_list_entry *array;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
int amdgpu_bo_list_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_bo_list_entry *info,
unsigned num_entries,
size_t num_entries,
struct amdgpu_bo_list **list);

static inline struct amdgpu_bo_list_entry *
Expand Down
96 changes: 11 additions & 85 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
#include <linux/poll.h>

#include "amdgpu.h"
#include "amdgpu_pm.h"
Expand All @@ -38,86 +37,8 @@
#include "amdgpu_fw_attestation.h"
#include "amdgpu_umr.h"

int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
unsigned long timeout = 600 * HZ;
int ret;

wake_up_interruptible(&adev->autodump.gpu_hang);

ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
if (ret == 0) {
pr_err("autodump: timeout, move on to gpu recovery\n");
return -ETIMEDOUT;
}
#endif
return 0;
}

#if defined(CONFIG_DEBUG_FS)

static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
{
struct amdgpu_device *adev = inode->i_private;
int ret;

file->private_data = adev;

ret = down_read_killable(&adev->reset_sem);
if (ret)
return ret;

if (adev->autodump.dumping.done) {
reinit_completion(&adev->autodump.dumping);
ret = 0;
} else {
ret = -EBUSY;
}

up_read(&adev->reset_sem);

return ret;
}

static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
{
struct amdgpu_device *adev = file->private_data;

complete_all(&adev->autodump.dumping);
return 0;
}

static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
{
struct amdgpu_device *adev = file->private_data;

poll_wait(file, &adev->autodump.gpu_hang, poll_table);

if (amdgpu_in_reset(adev))
return POLLIN | POLLRDNORM | POLLWRNORM;

return 0;
}

static const struct file_operations autodump_debug_fops = {
.owner = THIS_MODULE,
.open = amdgpu_debugfs_autodump_open,
.poll = amdgpu_debugfs_autodump_poll,
.release = amdgpu_debugfs_autodump_release,
};

static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
{
init_completion(&adev->autodump.dumping);
complete_all(&adev->autodump.dumping);
init_waitqueue_head(&adev->autodump.gpu_hang);

debugfs_create_file("amdgpu_autodump", 0600,
adev_to_drm(adev)->primary->debugfs_root,
adev, &autodump_debug_fops);
}

/**
* amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
*
Expand Down Expand Up @@ -1407,7 +1328,7 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
return r;
}

*val = amdgpu_bo_evict_vram(adev);
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);

pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
Expand All @@ -1420,17 +1341,15 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
{
struct amdgpu_device *adev = (struct amdgpu_device *)data;
struct drm_device *dev = adev_to_drm(adev);
struct ttm_resource_manager *man;
int r;

r = pm_runtime_get_sync(dev->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(dev->dev);
return r;
}

man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
*val = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);

pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
Expand Down Expand Up @@ -1696,6 +1615,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
struct dentry *ent;
int r, i;

if (!debugfs_initialized())
return 0;

ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
&fops_ib_preempt);
if (IS_ERR(ent)) {
Expand Down Expand Up @@ -1738,7 +1660,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
}

amdgpu_ras_debugfs_create_all(adev);
amdgpu_debugfs_autodump_init(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
Expand All @@ -1757,6 +1678,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
debugfs_create_blob("amdgpu_vbios", 0444, root,
&adev->debugfs_vbios_blob);

adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
debugfs_create_blob("amdgpu_discovery", 0444, root,
&adev->debugfs_discovery_blob);

return 0;
}

Expand Down
5 changes: 0 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,10 @@
/*
* Debugfs
*/
struct amdgpu_autodump {
struct completion dumping;
struct wait_queue_head gpu_hang;
};

int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
void amdgpu_debugfs_fini(struct amdgpu_device *adev);
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
Loading

0 comments on commit b1f8166

Please sign in to comment.