Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.11-2024-06-07' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-6.11-2024-06-07:

amdgpu:
- DCN 4.0.x support
- DCN 3.5 updates
- GC 12.0 support
- DP MST fixes
- Cursor fixes
- MES11 updates
- MMHUB 4.1 support
- DML2 Updates
- DCN 3.1.5 fixes
- IPS fixes
- Various code cleanups
- GMC 12.0 support
- SDMA 7.0 support
- SMU 13 updates
- SR-IOV fixes
- VCN 5.x fixes
- MES12 support
- SMU 14.x updates
- Devcoredump improvements
- Fixes for HDP flush on platforms with >4k pages
- GC 9.4.3 fixes
- RAS ACA updates
- Silence UBSAN flex array warnings
- MMHUB 3.3 updates

amdkfd:
- Contiguous VRAM allocations
- GC 12.0 support
- SDMA 7.0 support
- SR-IOV fixes

radeon:
- Backlight workaround for iMac
- Silence UBSAN flex array warnings

UAPI:
- GFX12 modifier and DCC support
  Proposed Mesa changes:
  https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510
- KFD GFX ALU exceptions
  Proposed ROCdebugger changes:
  https://github.com/ROCm/ROCdbgapi/commit/08c760622b6601abf906f75abbc5e21d9fd425df
  https://github.com/ROCm/ROCgdb/commit/944fe1c1414a68700414e86e32273b6bfa62ba6f
- KFD Contiguous VRAM allocation flag
  Proposed ROCr/HIP changes:
  https://github.com/ROCm/ROCT-Thunk-Interface/commit/f7b4a269914a3ab4f1e2453c2879adb97b5cc9e5
  https://github.com/ROCm/ROCR-Runtime/pull/214/commits/26e8530d05a775872cb06dde6693db72be0c454a
  https://github.com/ROCm/clr/commit/1d48f2a1ab38b632919c4b7274899b3faf4279ff

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240607195900.902537-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Jun 11, 2024
2 parents 7957066 + b95fa49 commit 1ddaaa2
Show file tree
Hide file tree
Showing 612 changed files with 334,433 additions and 4,652 deletions.
6 changes: 6 additions & 0 deletions Documentation/gpu/amdgpu/thermal.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ pp_power_profile_mode
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: pp_power_profile_mode

pm_policy
---------------------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: pm_policy

\*_busy_percent
---------------

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
*/

#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ config DRM_AMDGPU
select HWMON
select I2C
select I2C_ALGOBIT
select CRC16
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
select DRM_BUDDY
Expand Down
18 changes: 11 additions & 7 deletions drivers/gpu/drm/amd/amdgpu/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o

Expand All @@ -116,7 +116,7 @@ amdgpu-y += \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
gfxhub_v11_5_0.o
gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o

# add UMC block
amdgpu-y += \
Expand Down Expand Up @@ -167,7 +167,9 @@ amdgpu-y += \
imu_v11_0.o \
gfx_v11_0.o \
gfx_v11_0_3.o \
imu_v11_0_3.o
imu_v11_0_3.o \
gfx_v12_0.o \
imu_v12_0.o

# add async DMA block
amdgpu-y += \
Expand All @@ -179,13 +181,14 @@ amdgpu-y += \
sdma_v4_4_2.o \
sdma_v5_0.o \
sdma_v5_2.o \
sdma_v6_0.o
sdma_v6_0.o \
sdma_v7_0.o

# add MES block
amdgpu-y += \
amdgpu_mes.o \
mes_v10_1.o \
mes_v11_0.o
mes_v11_0.o \
mes_v12_0.o

# add UVD block
amdgpu-y += \
Expand Down Expand Up @@ -277,7 +280,8 @@ amdgpu-y += \
amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o
amdgpu_amdkfd_gfx_v11.o \
amdgpu_amdkfd_gfx_v12.o

ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
Expand Down
13 changes: 10 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ extern int amdgpu_discovery;
extern int amdgpu_mes;
extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
extern int amdgpu_uni_mes;
extern int amdgpu_jpeg_test;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
Expand Down Expand Up @@ -253,10 +255,12 @@ extern int amdgpu_cik_support;
extern int amdgpu_num_kcq;

#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024)
extern int amdgpu_vcnfw_log;
extern int amdgpu_sg_display;
extern int amdgpu_umsch_mm;
extern int amdgpu_seamless;
extern int amdgpu_umsch_mm_fwlog;

extern int amdgpu_user_partt_mode;
extern int amdgpu_agp;
Expand Down Expand Up @@ -341,9 +345,9 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000

int amdgpu_device_ip_set_clockgating_state(void *dev,
Expand Down Expand Up @@ -1014,6 +1018,7 @@ struct amdgpu_device {

/* jpeg */
struct amdgpu_jpeg jpeg;
bool enable_jpeg_test;

/* vpe */
struct amdgpu_vpe vpe;
Expand Down Expand Up @@ -1046,6 +1051,7 @@ struct amdgpu_device {
/* mes */
bool enable_mes;
bool enable_mes_kiq;
bool enable_uni_mes;
struct amdgpu_mes mes;
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];

Expand Down Expand Up @@ -1161,6 +1167,7 @@ struct amdgpu_device {
bool debug_largebar;
bool debug_disable_soft_recovery;
bool debug_use_vram_fw_buf;
bool debug_enable_ras_aca;
};

static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
Expand Down
35 changes: 15 additions & 20 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_
INIT_LIST_HEAD(&bank_error->node);
memcpy(&bank_error->info, info, sizeof(*info));

mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
list_add_tail(&bank_error->node, &aerr->list);
mutex_unlock(&aerr->lock);
spin_unlock(&aerr->lock);

return bank_error;
}
Expand All @@ -235,7 +235,7 @@ static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca
struct aca_bank_info *tmp_info;
bool found = false;

mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
list_for_each_entry(bank_error, &aerr->list, node) {
tmp_info = &bank_error->info;
if (tmp_info->socket_id == info->socket_id &&
Expand All @@ -246,7 +246,7 @@ static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca
}

out_unlock:
mutex_unlock(&aerr->lock);
spin_unlock(&aerr->lock);

return found ? bank_error : NULL;
}
Expand Down Expand Up @@ -474,7 +474,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
struct aca_error *aerr = &error_cache->errors[type];
struct aca_bank_error *bank_error, *tmp;

mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);

if (list_empty(&aerr->list))
goto out_unlock;
Expand All @@ -485,7 +485,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
}

out_unlock:
mutex_unlock(&aerr->lock);
spin_unlock(&aerr->lock);

return 0;
}
Expand Down Expand Up @@ -534,15 +534,15 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han
if (aca_handle_is_valid(handle))
return -EOPNOTSUPP;

if (!(BIT(type) & handle->mask))
if ((type < 0) || (!(BIT(type) & handle->mask)))
return 0;

return __aca_get_error_data(adev, handle, type, err_data, qctx);
}

static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
{
mutex_init(&aerr->lock);
spin_lock_init(&aerr->lock);
INIT_LIST_HEAD(&aerr->list);
aerr->type = type;
aerr->nr_errors = 0;
Expand All @@ -561,11 +561,10 @@ static void aca_error_fini(struct aca_error *aerr)
{
struct aca_bank_error *bank_error, *tmp;

mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
aca_bank_error_remove(aerr, bank_error);

mutex_destroy(&aerr->lock);
spin_unlock(&aerr->lock);
}

static void aca_fini_error_cache(struct aca_handle *handle)
Expand Down Expand Up @@ -686,7 +685,8 @@ static void aca_manager_fini(struct aca_handle_manager *mgr)

bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
{
return adev->aca.is_enabled;
return (adev->aca.is_enabled ||
adev->debug_enable_ras_aca);
}

int amdgpu_aca_init(struct amdgpu_device *adev)
Expand All @@ -712,13 +712,6 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
atomic_set(&aca->ue_update_flag, 0);
}

int amdgpu_aca_reset(struct amdgpu_device *adev)
{
amdgpu_aca_fini(adev);

return amdgpu_aca_init(adev);
}

void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
{
struct amdgpu_aca *aca = &adev->aca;
Expand Down Expand Up @@ -892,7 +885,9 @@ DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_se
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
{
#if defined(CONFIG_DEBUG_FS)
if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
if (!root ||
(adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6) &&
adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 14)))
return;

debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define __AMDGPU_ACA_H__

#include <linux/list.h>
#include <linux/spinlock.h>

struct ras_err_data;
struct ras_query_context;
Expand Down Expand Up @@ -133,7 +134,7 @@ struct aca_bank_error {

struct aca_error {
struct list_head list;
struct mutex lock;
spinlock_t lock;
enum aca_error_type type;
int nr_errors;
};
Expand Down Expand Up @@ -191,7 +192,6 @@ struct aca_info {

int amdgpu_aca_init(struct amdgpu_device *adev);
void amdgpu_aca_fini(struct amdgpu_device *adev);
int amdgpu_aca_reset(struct amdgpu_device *adev);
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
res.clock = clock;

return res;
}
8 changes: 6 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)

reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
reset_context.src = adev->enable_mes ?
AMDGPU_RESET_SRC_MES :
AMDGPU_RESET_SRC_HWS;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);

amdgpu_device_gpu_recover(adev, NULL, &reset_context);
Expand Down Expand Up @@ -261,12 +264,13 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
return r;
}

int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
int r = 0;

if (adev->kfd.dev)
r = kgd2kfd_pre_reset(adev->kfd.dev);
r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);

return r;
}
Expand Down
10 changes: 7 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE {
};

struct amdgpu_device;
struct amdgpu_reset_context;

enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
Expand Down Expand Up @@ -170,7 +171,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context);

int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);

Expand Down Expand Up @@ -416,7 +418,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_pre_reset(struct kfd_dev *kfd,
struct amdgpu_reset_context *reset_context);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
Expand Down Expand Up @@ -459,7 +462,8 @@ static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return 0;
}

static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
struct amdgpu_reset_context *reset_context)
{
return 0;
}
Expand Down
Loading

0 comments on commit 1ddaaa2

Please sign in to comment.