diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index d792a56f59acf..9bce281314dfd 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -108,3 +108,19 @@ Contact: intel-xe@lists.freedesktop.org Description: RO. Package current voltage in millivolt. Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp2_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Package temperature in millidegree Celsius. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp3_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. VRAM temperature in millidegree Celsius. + + Only supported for particular Intel Xe graphics platforms. diff --git a/MAINTAINERS b/MAINTAINERS index 2c563e35ef884..cddcb097f7f31 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7673,6 +7673,7 @@ X: drivers/gpu/drm/msm/ X: drivers/gpu/drm/nouveau/ X: drivers/gpu/drm/radeon/ X: drivers/gpu/drm/tegra/ +X: drivers/gpu/drm/xe/ DRM DRIVERS FOR ALLWINNER A10 M: Maxime Ripard diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ab33792c98402..065fdf6dbb88e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -685,7 +685,7 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc, if ((crtc_state->active_planes & ~BIT(PLANE_CURSOR)) == 0 && hsw_ips_disable(crtc_state)) { crtc_state->ips_enabled = false; - intel_crtc_wait_for_next_vblank(crtc); + intel_plane_initial_vblank_wait(crtc); } /* @@ -699,7 +699,7 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc, */ if (HAS_GMCH(dev_priv) && intel_set_memory_cxsr(dev_priv, false)) - intel_crtc_wait_for_next_vblank(crtc); + intel_plane_initial_vblank_wait(crtc); /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -709,7 +709,7 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc, intel_set_cpu_fifo_underrun_reporting(display, crtc->pipe, false); intel_plane_disable_arm(NULL, plane, crtc_state); - intel_crtc_wait_for_next_vblank(crtc); + intel_plane_initial_vblank_wait(crtc); } unsigned int diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 852f1129a058d..b72b07329fbf5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -564,6 +564,8 @@ void intel_display_driver_register(struct intel_display *display) intel_display_device_info_print(DISPLAY_INFO(display), DISPLAY_RUNTIME_INFO(display), &p); + + intel_register_dsm_handler(); } /* part #1: call before irq uninstall */ @@ -639,6 +641,8 @@ void intel_display_driver_unregister(struct intel_display *display) if (!HAS_DISPLAY(display)) return; + intel_unregister_dsm_handler(); + drm_client_dev_unregister(display->drm); /* diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 6789b7f140952..b1675b46e06cb 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -14,6 +14,11 @@ #include "intel_frontbuffer.h" #include "intel_plane_initial.h" +void intel_plane_initial_vblank_wait(struct intel_crtc *crtc) +{ + intel_crtc_wait_for_next_vblank(crtc); +} + static bool intel_reuse_initial_plane_obj(struct intel_crtc *this, const struct intel_initial_plane_config plane_configs[], @@ -442,7 +447,7 @@ void intel_initial_plane_config(struct intel_display *display) intel_find_initial_plane_obj(crtc, plane_configs); if (display->funcs.display->fixup_initial_plane_config(crtc, plane_config)) - intel_crtc_wait_for_next_vblank(crtc); + intel_plane_initial_vblank_wait(crtc); plane_config_fini(plane_config); } diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.h b/drivers/gpu/drm/i915/display/intel_plane_initial.h index 6c6aa717ed21f..5c315acda2101 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.h +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.h @@ -6,8 +6,10 @@ #ifndef __INTEL_PLANE_INITIAL_H__ #define __INTEL_PLANE_INITIAL_H__ +struct intel_crtc; struct intel_display; void intel_initial_plane_config(struct intel_display *display); +void intel_plane_initial_vblank_wait(struct intel_crtc *crtc); #endif diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 91a7748f44926..1dfd6269b355b 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -46,7 +46,6 @@ #include #include "display/i9xx_display_sr.h" -#include "display/intel_acpi.h" #include "display/intel_bw.h" #include "display/intel_cdclk.h" #include "display/intel_crtc.h" @@ -657,8 +656,6 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) intel_power_domains_enable(display); intel_runtime_pm_enable(&dev_priv->runtime_pm); - intel_register_dsm_handler(); - if (i915_switcheroo_register(dev_priv)) drm_err(&dev_priv->drm, "Failed to register vga switcheroo!\n"); } @@ -675,8 +672,6 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_switcheroo_unregister(dev_priv); - intel_unregister_dsm_handler(); - intel_runtime_pm_disable(&dev_priv->runtime_pm); intel_power_domains_disable(display); diff --git a/drivers/gpu/drm/xe/Kconfig.profile b/drivers/gpu/drm/xe/Kconfig.profile index ba17a25e8db3b..7530df998148f 100644 --- a/drivers/gpu/drm/xe/Kconfig.profile +++ b/drivers/gpu/drm/xe/Kconfig.profile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config DRM_XE_JOB_TIMEOUT_MAX int "Default max job timeout (ms)" default 10000 # milliseconds diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 3a243c4ea79b5..5ce65ccb3c083 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -34,8 +34,8 @@ xe-y += xe_bb.o \ xe_dma_buf.o \ xe_drm_client.o \ xe_exec.o \ - xe_execlist.o \ xe_exec_queue.o \ + xe_execlist.o \ xe_force_wake.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ @@ -56,6 +56,7 @@ xe-y += xe_bb.o \ xe_gt_topology.o \ xe_guc.o \ xe_guc_ads.o \ + xe_guc_buf.o \ xe_guc_capture.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ @@ -66,11 +67,11 @@ xe-y += xe_bb.o \ xe_guc_pc.o \ xe_guc_submit.o \ xe_heci_gsc.o \ + xe_huc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ xe_hw_engine_group.o \ xe_hw_fence.o \ - xe_huc.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -86,15 +87,19 @@ xe-y += xe_bb.o \ xe_preempt_fence.o \ xe_pt.o \ xe_pt_walk.o \ + xe_pxp.o \ + xe_pxp_debugfs.o \ + xe_pxp_submit.o \ xe_query.o \ xe_range_fence.o \ xe_reg_sr.o \ xe_reg_whitelist.o \ - xe_rtp.o \ xe_ring_ops.o \ + xe_rtp.o \ xe_sa.o \ xe_sched_job.o \ xe_step.o \ + xe_survivability_mode.o \ xe_sync.o \ xe_tile.o \ xe_tile_sysfs.o \ @@ -102,8 +107,8 @@ xe-y += xe_bb.o \ xe_trace_bo.o \ xe_trace_guc.o \ xe_trace_lrc.o \ - xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ + xe_ttm_sys_mgr.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ @@ -112,8 +117,8 @@ xe-y += xe_bb.o \ xe_vram.o \ xe_vram_freq.o \ xe_vsec.o \ - xe_wait_user_fence.o \ xe_wa.o \ + xe_wait_user_fence.o \ xe_wopcm.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o @@ -121,6 +126,8 @@ xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o +xe-$(CONFIG_PERF_EVENTS) += xe_pmu.o + # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ @@ -222,6 +229,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_wa.o \ i915-display/intel_dkl_phy.o \ i915-display/intel_dmc.o \ + i915-display/intel_dmc_wl.o \ i915-display/intel_dp.o \ i915-display/intel_dp_aux.o \ i915-display/intel_dp_aux_backlight.o \ @@ -270,7 +278,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_vdsc.o \ i915-display/intel_vga.o \ i915-display/intel_vrr.o \ - i915-display/intel_dmc_wl.o \ i915-display/intel_wm.o \ i915-display/skl_scaler.o \ i915-display/skl_universal_plane.o \ diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h index 57520809e48db..290e431cf10dd 100644 --- a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h @@ -6,6 +6,7 @@ #ifndef _ABI_GSC_PXP_COMMANDS_ABI_H #define _ABI_GSC_PXP_COMMANDS_ABI_H +#include #include /* Heci client ID for PXP commands */ @@ -13,6 +14,12 @@ #define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF)) +/* + * A PXP sub-section in an HECI packet can be up to 64K big in each direction. + * This does not include the top-level GSC header. + */ +#define PXP_MAX_PACKET_SIZE SZ_64K + /* * there are a lot of status codes for PXP, but we only define the cross-API * common ones that we actually can handle in the kernel driver. Other failure @@ -24,6 +31,7 @@ enum pxp_status { PXP_STATUS_NOT_READY = 0x100e, PXP_STATUS_PLATFCONFIG_KF1_NOVERIF = 0x101a, PXP_STATUS_PLATFCONFIG_KF1_BAD = 0x101f, + PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED = 0x1037, PXP_STATUS_OP_NOT_PERMITTED = 0x4013 }; @@ -42,6 +50,8 @@ struct pxp_cmd_header { u32 buffer_len; } __packed; +#define PXP43_CMDID_INVALIDATE_STREAM_KEY 0x00000007 +#define PXP43_CMDID_INIT_SESSION 0x00000036 #define PXP43_CMDID_NEW_HUC_AUTH 0x0000003F /* MTL+ */ /* PXP-Input-Packet: HUC Auth-only */ @@ -56,4 +66,35 @@ struct pxp43_huc_auth_out { struct pxp_cmd_header header; } __packed; +/* PXP-Input-Packet: Init PXP session */ +struct pxp43_create_arb_in { + struct pxp_cmd_header header; + /* header.stream_id fields for vesion 4.3 of Init PXP session: */ + #define PXP43_INIT_SESSION_VALID BIT(0) + #define PXP43_INIT_SESSION_APPTYPE BIT(1) + #define PXP43_INIT_SESSION_APPID GENMASK(17, 2) + u32 protection_mode; + #define PXP43_INIT_SESSION_PROTECTION_ARB 0x2 + u32 sub_session_id; + u32 init_flags; + u32 rsvd[12]; +} __packed; + +/* PXP-Input-Packet: Init PXP session */ +struct pxp43_create_arb_out { + struct pxp_cmd_header header; + u32 rsvd[8]; +} __packed; + +/* PXP-Input-Packet: Invalidate Stream Key */ +struct pxp43_inv_stream_key_in { + struct pxp_cmd_header header; + u32 rsvd[3]; +} __packed; + +/* PXP-Output-Packet: Invalidate Stream Key */ +struct pxp43_inv_stream_key_out { + struct pxp_cmd_header header; + u32 rsvd; +} __packed; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 9c4cf050059ac..41d39d67817a1 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -1,3 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + #ifndef _I915_GEM_STOLEN_H_ #define _I915_GEM_STOLEN_H_ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h index 5dfc587c8237e..d2eb8e1f6c4b4 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/pxp/intel_pxp.h @@ -9,14 +9,24 @@ #include #include +#include "xe_pxp.h" + struct drm_gem_object; -struct intel_pxp; +struct xe_pxp; -static inline int intel_pxp_key_check(struct intel_pxp *pxp, +static inline int intel_pxp_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj, bool assign) { - return -ENODEV; + /* + * The assign variable is used in i915 to assign the key to the BO at + * first submission time. In Xe the key is instead assigned at BO + * creation time, so the assign variable must always be false. + */ + if (assign) + return -EINVAL; + + return xe_pxp_obj_key_check(pxp, obj); } #endif diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index b463f5bd4eed1..27437c22bd703 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -25,7 +25,7 @@ bool intel_bo_is_shmem(struct drm_gem_object *obj) bool intel_bo_is_protected(struct drm_gem_object *obj) { - return false; + return xe_bo_is_protected(gem_to_xe_bo(obj)); } void intel_bo_flush_if_display(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index 3f8e8d31e8005..ebdb22c9499d7 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -50,10 +50,10 @@ int intel_fb_bo_framebuffer_init(struct drm_framebuffer *fb, /* * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is * automatically set when creating FB. We cannot change caching - * mode when the boect is VM_BINDed, so we can only set + * mode when the bo is VM_BINDed, so we can only set * coherency with display when unbound. */ - if (XE_IOCTL_DBG(xe, !list_empty(&bo->ttm.base.gpuva.list))) { + if (XE_IOCTL_DBG(xe, xe_bo_is_vm_bound(bo))) { ttm_bo_unreserve(&bo->ttm); ret = -EINVAL; goto err; diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 96ba9595bf2ac..02a413a073824 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -29,6 +29,7 @@ #include "intel_hdcp.h" #include "intel_hotplug.h" #include "intel_opregion.h" +#include "skl_watermark.h" #include "xe_module.h" /* Xe device functions */ @@ -101,19 +102,25 @@ int xe_display_create(struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } -static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) +static void xe_display_fini_early(void *arg) { - struct xe_device *xe = to_xe_device(dev); + struct xe_device *xe = arg; struct intel_display *display = &xe->display; if (!xe->info.probe_display) return; + intel_display_driver_remove_nogem(display); + intel_display_driver_remove_noirq(display); + intel_opregion_cleanup(display); intel_power_domains_cleanup(display); } -int xe_display_init_nommio(struct xe_device *xe) +int xe_display_init_early(struct xe_device *xe) { + struct intel_display *display = &xe->display; + int err; + if (!xe->info.probe_display) return 0; @@ -123,29 +130,6 @@ int xe_display_init_nommio(struct xe_device *xe) /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(xe); - return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe); -} - -static void xe_display_fini_noirq(void *arg) -{ - struct xe_device *xe = arg; - struct intel_display *display = &xe->display; - - if (!xe->info.probe_display) - return; - - intel_display_driver_remove_noirq(display); - intel_opregion_cleanup(display); -} - -int xe_display_init_noirq(struct xe_device *xe) -{ - struct intel_display *display = &xe->display; - int err; - - if (!xe->info.probe_display) - return 0; - intel_display_driver_early_probe(display); /* Early display init.. */ @@ -162,26 +146,33 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(display); err = intel_display_driver_probe_noirq(display); - if (err) { - intel_opregion_cleanup(display); - return err; - } + if (err) + goto err_opregion; - return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); + err = intel_display_driver_probe_nogem(display); + if (err) + goto err_noirq; + + return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_early, xe); +err_noirq: + intel_display_driver_remove_noirq(display); + intel_power_domains_cleanup(display); +err_opregion: + intel_opregion_cleanup(display); + return err; } -static void xe_display_fini_noaccel(void *arg) +static void xe_display_fini(void *arg) { struct xe_device *xe = arg; struct intel_display *display = &xe->display; - if (!xe->info.probe_display) - return; - - intel_display_driver_remove_nogem(display); + intel_hpd_poll_fini(xe); + intel_hdcp_component_fini(display); + intel_audio_deinit(display); } -int xe_display_init_noaccel(struct xe_device *xe) +int xe_display_init(struct xe_device *xe) { struct intel_display *display = &xe->display; int err; @@ -189,34 +180,11 @@ int xe_display_init_noaccel(struct xe_device *xe) if (!xe->info.probe_display) return 0; - err = intel_display_driver_probe_nogem(display); + err = intel_display_driver_probe(display); if (err) return err; - return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noaccel, xe); -} - -int xe_display_init(struct xe_device *xe) -{ - struct intel_display *display = &xe->display; - - if (!xe->info.probe_display) - return 0; - - return intel_display_driver_probe(display); -} - -void xe_display_fini(struct xe_device *xe) -{ - struct intel_display *display = &xe->display; - - if (!xe->info.probe_display) - return; - - intel_hpd_poll_fini(xe); - - intel_hdcp_component_fini(display); - intel_audio_deinit(display); + return xe_device_add_action_or_reset(xe, xe_display_fini, xe); } void xe_display_register(struct xe_device *xe) @@ -228,7 +196,6 @@ void xe_display_register(struct xe_device *xe) intel_display_driver_register(display); intel_power_domains_enable(display); - intel_register_dsm_handler(); } void xe_display_unregister(struct xe_device *xe) @@ -238,7 +205,6 @@ void xe_display_unregister(struct xe_device *xe) if (!xe->info.probe_display) return; - intel_unregister_dsm_handler(); intel_power_domains_disable(display); intel_display_driver_unregister(display); } @@ -322,11 +288,58 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) } } -/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ -static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) +static void xe_display_enable_d3cold(struct xe_device *xe) +{ + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) + return; + + /* + * We do a lot of poking in a lot of registers, make sure they work + * properly. + */ + intel_power_domains_disable(display); + + xe_display_flush_cleanup_work(xe); + + intel_opregion_suspend(display, PCI_D3cold); + + intel_dmc_suspend(display); + + if (has_display(xe)) + intel_hpd_poll_enable(xe); +} + +static void xe_display_disable_d3cold(struct xe_device *xe) +{ + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) + return; + + intel_dmc_resume(display); + + if (has_display(xe)) + drm_mode_config_reset(&xe->drm); + + intel_display_driver_init_hw(display); + + intel_hpd_init(xe); + + if (has_display(xe)) + intel_hpd_poll_disable(xe); + + intel_opregion_resume(display); + + intel_power_domains_enable(display); +} + +void xe_display_pm_suspend(struct xe_device *xe) { struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); + if (!xe->info.probe_display) return; @@ -335,10 +348,9 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(display); - if (!runtime) - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - if (!runtime && has_display(xe)) { + if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -348,7 +360,7 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); - if (!runtime && has_display(xe)) { + if (has_display(xe)) { intel_display_driver_suspend_access(display); intel_encoder_suspend_all(&xe->display); } @@ -356,14 +368,6 @@ static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); intel_dmc_suspend(display); - - if (runtime && has_display(xe)) - intel_hpd_poll_enable(xe); -} - -void xe_display_pm_suspend(struct xe_device *xe) -{ - __xe_display_pm_suspend(xe, false); } void xe_display_pm_shutdown(struct xe_device *xe) @@ -402,7 +406,7 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe) return; if (xe->d3cold.allowed) { - __xe_display_pm_suspend(xe, true); + xe_display_enable_d3cold(xe); return; } @@ -463,7 +467,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) intel_display_power_resume_early(display); } -static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) +void xe_display_pm_resume(struct xe_device *xe) { struct intel_display *display = &xe->display; @@ -477,12 +481,12 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(display); - if (!runtime && has_display(xe)) + if (has_display(xe)) intel_display_driver_resume_access(display); intel_hpd_init(xe); - if (!runtime && has_display(xe)) { + if (has_display(xe)) { intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); intel_display_driver_enable_user_access(display); @@ -493,29 +497,24 @@ static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_opregion_resume(display); - if (!runtime) - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); intel_power_domains_enable(display); } -void xe_display_pm_resume(struct xe_device *xe) -{ - __xe_display_pm_resume(xe, false); -} - void xe_display_pm_runtime_resume(struct xe_device *xe) { if (!xe->info.probe_display) return; if (xe->d3cold.allowed) { - __xe_display_pm_resume(xe, true); + xe_display_disable_d3cold(xe); return; } intel_hpd_init(xe); intel_hpd_poll_disable(xe); + skl_watermark_ipc_update(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 233f81a26c255..685dc74402fb8 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -20,11 +20,8 @@ int xe_display_create(struct xe_device *xe); int xe_display_probe(struct xe_device *xe); -int xe_display_init_nommio(struct xe_device *xe); -int xe_display_init_noirq(struct xe_device *xe); -int xe_display_init_noaccel(struct xe_device *xe); +int xe_display_init_early(struct xe_device *xe); int xe_display_init(struct xe_device *xe); -void xe_display_fini(struct xe_device *xe); void xe_display_register(struct xe_device *xe); void xe_display_unregister(struct xe_device *xe); @@ -54,11 +51,8 @@ static inline int xe_display_create(struct xe_device *xe) { return 0; } static inline int xe_display_probe(struct xe_device *xe) { return 0; } -static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; } -static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; } -static inline int xe_display_init_noaccel(struct xe_device *xe) { return 0; } +static inline int xe_display_init_early(struct xe_device *xe) { return 0; } static inline int xe_display_init(struct xe_device *xe) { return 0; } -static inline void xe_display_fini(struct xe_device *xe) {} static inline void xe_display_register(struct xe_device *xe) {} static inline void xe_display_unregister(struct xe_device *xe) {} diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 25c80dd6d3863..4ca0cb5711941 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -8,7 +8,9 @@ #include "regs/xe_gtt_defs.h" #include "xe_ggtt.h" +#include "xe_mmio.h" +#include "i915_reg.h" #include "intel_atomic_plane.h" #include "intel_crtc.h" #include "intel_display.h" @@ -22,6 +24,21 @@ #include +void intel_plane_initial_vblank_wait(struct intel_crtc *crtc) +{ + /* Early xe has no irq */ + struct xe_device *xe = to_xe_device(crtc->base.dev); + struct xe_reg pipe_frmtmstmp = XE_REG(i915_mmio_reg_offset(PIPE_FRMTMSTMP(crtc->pipe))); + u32 timestamp; + int ret; + + timestamp = xe_mmio_read32(xe_root_tile_mmio(xe), pipe_frmtmstmp); + + ret = xe_mmio_wait32_not(xe_root_tile_mmio(xe), pipe_frmtmstmp, ~0U, timestamp, 40000U, ×tamp, false); + if (ret < 0) + drm_warn(&xe->drm, "waiting for early vblank failed with %i\n", ret); +} + static bool intel_reuse_initial_plane_obj(struct intel_crtc *this, const struct intel_initial_plane_config plane_configs[], @@ -293,7 +310,7 @@ void intel_initial_plane_config(struct intel_display *display) intel_find_initial_plane_obj(crtc, plane_configs); if (display->funcs.display->fixup_initial_plane_config(crtc, plane_config)) - intel_crtc_wait_for_next_vblank(crtc); + intel_plane_initial_vblank_wait(crtc); plane_config_fini(plane_config); } diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h index fd2ce7ace5108..e559969468c44 100644 --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -16,6 +16,7 @@ #define XE_INSTR_CMD_TYPE GENMASK(31, 29) #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) #define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2) +#define XE_INSTR_VIDEOPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) #define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) #define XE_INSTR_GFX_STATE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4) diff --git a/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h new file mode 100644 index 0000000000000..3c0c97f78e908 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_mfx_commands.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_MFX_COMMANDS_H_ +#define _XE_MFX_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +#define MFX_CMD_SUBTYPE REG_GENMASK(28, 27) /* A.K.A cmd pipe */ +#define MFX_CMD_OPCODE REG_GENMASK(26, 24) +#define MFX_CMD_SUB_OPCODE REG_GENMASK(23, 16) +#define MFX_FLAGS_AND_LEN REG_GENMASK(15, 0) + +#define XE_MFX_INSTR(subtype, op, sub_op) \ + (XE_INSTR_VIDEOPIPE | \ + REG_FIELD_PREP(MFX_CMD_SUBTYPE, subtype) | \ + REG_FIELD_PREP(MFX_CMD_OPCODE, op) | \ + REG_FIELD_PREP(MFX_CMD_SUB_OPCODE, sub_op)) + +#define MFX_WAIT XE_MFX_INSTR(1, 0, 0) +#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9) +#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8) + +#define CRYPTO_KEY_EXCHANGE XE_MFX_INSTR(2, 6, 9) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 10ec2920d31b3..167fb0f742de7 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -48,6 +48,7 @@ #define MI_LRI_LEN(x) (((x) & 0xff) + 1) #define MI_FLUSH_DW __MI_INSTR(0x26) +#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) #define MI_INVALIDATE_TLB REG_BIT(18) #define MI_FLUSH_DW_CCS REG_BIT(16) @@ -66,4 +67,8 @@ #define MI_BATCH_BUFFER_START __MI_INSTR(0x31) +#define MI_SET_APPID __MI_INSTR(0x0e) +#define MI_SET_APPID_SESSION_ID_MASK REG_GENMASK(6, 0) +#define MI_SET_APPID_SESSION_ID(x) REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index d86219dedde2a..c8fd3d5ca5026 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -132,6 +132,7 @@ #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) +#define CTX_CTRL_PXP_ENABLE REG_BIT(10) #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) #define CTX_CTRL_RUN_ALONE REG_BIT(7) #define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 162f18e975dae..096859072396d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -221,6 +221,9 @@ #define MIRROR_FUSE1 XE_REG(0x911c) +#define MIRROR_L3BANK_ENABLE XE_REG(0x9130) +#define XE3_L3BANK_ENABLE REG_GENMASK(31, 0) + #define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) #define XELP_GT_SLICE_ENABLE XE_REG(0x9138) @@ -500,6 +503,9 @@ #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) +#define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) +#define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) + #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define WR_REQ_CHAINING_DIS REG_BIT(26) diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index 1776b3f78ccb7..f0ecfcac40037 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -44,6 +44,7 @@ #define ENGINE1_MASK REG_GENMASK(31, 16) #define ENGINE0_MASK REG_GENMASK(15, 0) #define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF) +#define CRYPTO_RSVD_INTR_ENABLE XE_REG(0x190040) #define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF) #define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF) @@ -54,6 +55,7 @@ #define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) #define OTHER_GUC_INSTANCE 0 #define OTHER_GSC_HECI2_INSTANCE 3 +#define OTHER_KCR_INSTANCE 4 #define OTHER_GSC_INSTANCE 6 #define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF) @@ -65,6 +67,7 @@ #define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) #define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) #define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) +#define CRYPTO_RSVD_INTR_MASK XE_REG(0x1900f0) #define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF) #define CCS0_CCS1_INTR_MASK XE_REG(0x190100) #define CCS2_CCS3_INTR_MASK XE_REG(0x190104) @@ -79,4 +82,9 @@ #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT REG_BIT(0) +/* irqs for OTHER_KCR_INSTANCE */ +#define KCR_PXP_STATE_TERMINATED_INTERRUPT REG_BIT(1) +#define KCR_APP_TERMINATED_PER_FW_REQ_INTERRUPT REG_BIT(2) +#define KCR_PXP_STATE_RESET_COMPLETE_INTERRUPT REG_BIT(3) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 519dd1067a198..f5e5234857c19 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -34,6 +34,9 @@ #define PCU_CR_PACKAGE_ENERGY_STATUS XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x593c) +#define PCU_CR_PACKAGE_TEMPERATURE XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5978) +#define TEMP_MASK REG_GENMASK(7, 0) + #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PKG_PWR_LIM_1 REG_GENMASK(14, 0) #define PKG_PWR_LIM_1_EN REG_BIT(15) diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index 0b0b49d850aef..8846eb9ce2a40 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -21,6 +21,8 @@ #define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) #define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) +#define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) +#define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) #define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) #define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458) #define BMG_PLATFORM_POWER_LIMIT XE_REG(0x138460) diff --git a/drivers/gpu/drm/xe/regs/xe_pxp_regs.h b/drivers/gpu/drm/xe/regs/xe_pxp_regs.h new file mode 100644 index 0000000000000..aa158938b42eb --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_pxp_regs.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2024, Intel Corporation. All rights reserved. + */ + +#ifndef __XE_PXP_REGS_H__ +#define __XE_PXP_REGS_H__ + +#include "regs/xe_regs.h" + +/* The following registers are only valid on platforms with a media GT */ + +/* KCR enable/disable control */ +#define KCR_INIT XE_REG(0x3860f0) +#define KCR_INIT_ALLOW_DISPLAY_ME_WRITES REG_BIT(14) + +/* KCR hwdrm session in play status 0-31 */ +#define KCR_SIP XE_REG(0x386260) + +/* PXP global terminate register for session termination */ +#define KCR_GLOBAL_TERMINATE XE_REG(0x3860f8) + +#endif /* __XE_PXP_REGS_H__ */ diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 0eedd6c26b1bf..c39aab843e357 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -7,9 +7,21 @@ #define _XE_REG_DEFS_H_ #include +#include +#include #include "compat-i915-headers/i915_reg_defs.h" +/** + * XE_REG_ADDR_MAX - The upper limit on MMIO register address + * + * This macro specifies the upper limit (not inclusive) on MMIO register offset + * supported by struct xe_reg and functions based on struct xe_mmio. + * + * Currently this is defined as 4 MiB. + */ +#define XE_REG_ADDR_MAX SZ_4M + /** * struct xe_reg - Register definition * @@ -21,7 +33,7 @@ struct xe_reg { union { struct { /** @addr: address */ - u32 addr:28; + u32 addr:const_ilog2(XE_REG_ADDR_MAX); /** * @masked: register is "masked", with upper 16bits used * to identify the bits that are updated on the lower @@ -41,10 +53,6 @@ struct xe_reg { * @vf: register is accessible from the Virtual Function. */ u32 vf:1; - /** - * @ext: access MMIO extension space for current register. - */ - u32 ext:1; }; /** @raw: Raw value with both address and options */ u32 raw; @@ -111,16 +119,6 @@ struct xe_reg_mcr { */ #define XE_REG(r_, ...) ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__)) -/** - * XE_REG_EXT - Create a struct xe_reg from extension offset and additional - * flags - * @r_: Register extension offset - * @...: Additional options like access mode. See struct xe_reg for available - * options. - */ -#define XE_REG_EXT(r_, ...) \ - ((const struct xe_reg)XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .ext = 1)) - /** * XE_REG_MCR - Create a struct xe_reg_mcr from offset and additional flags * @r_: Register offset diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c new file mode 100644 index 0000000000000..6faffcd748694 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include +#include + +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_guc_ct.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +#define DUT_GGTT_START SZ_1M +#define DUT_GGTT_SIZE SZ_2M + +static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device *xe, + struct xe_tile *tile, + size_t size, u32 flags) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_bo *bo; + void *buf; + + bo = drmm_kzalloc(&xe->drm, sizeof(*bo), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + + buf = drmm_kzalloc(&xe->drm, size, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf); + + bo->tile = tile; + bo->ttm.bdev = &xe->ttm; + bo->size = size; + iosys_map_set_vaddr(&bo->vmap, buf); + + if (flags & XE_BO_FLAG_GGTT) { + struct xe_ggtt *ggtt = tile->mem.ggtt; + + bo->ggtt_node[tile->id] = xe_ggtt_node_init(ggtt); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); + + KUNIT_ASSERT_EQ(test, 0, + drm_mm_insert_node_in_range(&ggtt->mm, + &bo->ggtt_node[tile->id]->base, + bo->size, SZ_4K, + 0, 0, U64_MAX, 0)); + } + + return bo; +} + +static int guc_buf_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_ggtt *ggtt; + struct xe_guc *guc; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt; + guc = &xe_device_get_gt(test->priv, 0)->uc.guc; + + drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE); + mutex_init(&ggtt->lock); + + kunit_activate_static_stub(test, xe_managed_bo_create_pin_map, + replacement_xe_managed_bo_create_pin_map); + + KUNIT_ASSERT_EQ(test, 0, xe_guc_buf_cache_init(&guc->buf)); + + test->priv = &guc->buf; + return 0; +} + +static void test_smallest(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + + buf = xe_guc_buf_reserve(cache, 1); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + xe_guc_buf_release(buf); +} + +static void test_largest(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + + buf = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache)); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + xe_guc_buf_release(buf); +} + +static void test_granular(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf *bufs; + int n, dwords; + + dwords = xe_guc_buf_cache_dwords(cache); + bufs = kunit_kcalloc(test, dwords, sizeof(*bufs), GFP_KERNEL); + KUNIT_EXPECT_NOT_NULL(test, bufs); + + for (n = 0; n < dwords; n++) + bufs[n] = xe_guc_buf_reserve(cache, 1); + + for (n = 0; n < dwords; n++) + KUNIT_EXPECT_TRUE_MSG(test, xe_guc_buf_is_valid(bufs[n]), "n=%d", n); + + for (n = 0; n < dwords; n++) + xe_guc_buf_release(bufs[n]); +} + +static void test_unique(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf *bufs; + int n, m, dwords; + + dwords = xe_guc_buf_cache_dwords(cache); + bufs = kunit_kcalloc(test, dwords, sizeof(*bufs), GFP_KERNEL); + KUNIT_EXPECT_NOT_NULL(test, bufs); + + for (n = 0; n < dwords; n++) + bufs[n] = xe_guc_buf_reserve(cache, 1); + + for (n = 0; n < dwords; n++) { + for (m = n + 1; m < dwords; m++) { + KUNIT_EXPECT_PTR_NE_MSG(test, xe_guc_buf_cpu_ptr(bufs[n]), + xe_guc_buf_cpu_ptr(bufs[m]), "n=%d, m=%d", n, m); + KUNIT_ASSERT_NE_MSG(test, xe_guc_buf_gpu_addr(bufs[n]), + xe_guc_buf_gpu_addr(bufs[m]), "n=%d, m=%d", n, m); + } + } + + for (n = 0; n < dwords; n++) + xe_guc_buf_release(bufs[n]); +} + +static void test_overlap(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf b1, b2; + u32 dwords = xe_guc_buf_cache_dwords(cache) / 2; + u32 bytes = dwords * sizeof(u32); + void *p1, *p2; + u64 a1, a2; + + b1 = xe_guc_buf_reserve(cache, dwords); + b2 = xe_guc_buf_reserve(cache, dwords); + + p1 = xe_guc_buf_cpu_ptr(b1); + p2 = xe_guc_buf_cpu_ptr(b2); + + a1 = xe_guc_buf_gpu_addr(b1); + a2 = xe_guc_buf_gpu_addr(b2); + + KUNIT_EXPECT_PTR_NE(test, p1, p2); + if (p1 < p2) + KUNIT_EXPECT_LT(test, (uintptr_t)(p1 + bytes - 1), (uintptr_t)p2); + else + KUNIT_EXPECT_LT(test, (uintptr_t)(p2 + bytes - 1), (uintptr_t)p1); + + KUNIT_EXPECT_NE(test, a1, a2); + if (a1 < a2) + KUNIT_EXPECT_LT(test, a1 + bytes - 1, a2); + else + KUNIT_EXPECT_LT(test, a2 + bytes - 1, a1); + + xe_guc_buf_release(b1); + xe_guc_buf_release(b2); +} + +static void test_reusable(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf b1, b2; + void *p1; + u64 a1; + + b1 = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache)); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(b1)); + KUNIT_EXPECT_NOT_NULL(test, p1 = xe_guc_buf_cpu_ptr(b1)); + KUNIT_EXPECT_NE(test, 0, a1 = xe_guc_buf_gpu_addr(b1)); + xe_guc_buf_release(b1); + + b2 = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache)); + KUNIT_EXPECT_PTR_EQ(test, p1, xe_guc_buf_cpu_ptr(b2)); + KUNIT_EXPECT_EQ(test, a1, xe_guc_buf_gpu_addr(b2)); + xe_guc_buf_release(b2); +} + +static void test_too_big(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + + buf = xe_guc_buf_reserve(cache, xe_guc_buf_cache_dwords(cache) + 1); + KUNIT_EXPECT_FALSE(test, xe_guc_buf_is_valid(buf)); + xe_guc_buf_release(buf); /* shouldn't crash */ +} + +static void test_flush(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + const u32 dwords = xe_guc_buf_cache_dwords(cache); + const u32 bytes = dwords * sizeof(u32); + u32 *s, *p, *d; + int n; + + KUNIT_ASSERT_NOT_NULL(test, s = kunit_kcalloc(test, dwords, sizeof(u32), GFP_KERNEL)); + KUNIT_ASSERT_NOT_NULL(test, d = kunit_kcalloc(test, dwords, sizeof(u32), GFP_KERNEL)); + + for (n = 0; n < dwords; n++) + s[n] = n; + + buf = xe_guc_buf_reserve(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_ASSERT_NOT_NULL(test, p = xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_PTR_NE(test, p, s); + KUNIT_EXPECT_PTR_NE(test, p, d); + + memcpy(p, s, bytes); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_flush(buf)); + + iosys_map_memcpy_from(d, &cache->sam->bo->vmap, 0, bytes); + KUNIT_EXPECT_MEMEQ(test, s, d, bytes); + + xe_guc_buf_release(buf); +} + +static void test_lookup(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + u32 dwords; + u64 addr; + u32 *p; + int n; + + dwords = xe_guc_buf_cache_dwords(cache); + buf = xe_guc_buf_reserve(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_ASSERT_NOT_NULL(test, p = xe_guc_buf_cpu_ptr(buf)); + KUNIT_ASSERT_NE(test, 0, addr = xe_guc_buf_gpu_addr(buf)); + + KUNIT_EXPECT_EQ(test, 0, xe_guc_cache_gpu_addr_from_ptr(cache, p - 1, sizeof(u32))); + KUNIT_EXPECT_EQ(test, 0, xe_guc_cache_gpu_addr_from_ptr(cache, p + dwords, sizeof(u32))); + + for (n = 0; n < dwords; n++) + KUNIT_EXPECT_EQ_MSG(test, xe_guc_cache_gpu_addr_from_ptr(cache, p + n, sizeof(u32)), + addr + n * sizeof(u32), "n=%d", n); + + xe_guc_buf_release(buf); +} + +static void test_data(struct kunit *test) +{ + static const u32 data[] = { 1, 2, 3, 4, 5, 6 }; + struct xe_guc_buf_cache *cache = test->priv; + struct xe_guc_buf buf; + void *p; + + buf = xe_guc_buf_from_data(cache, data, sizeof(data)); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_ASSERT_NOT_NULL(test, p = xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_MEMEQ(test, p, data, sizeof(data)); + + xe_guc_buf_release(buf); +} + +static void test_class(struct kunit *test) +{ + struct xe_guc_buf_cache *cache = test->priv; + u32 dwords = xe_guc_buf_cache_dwords(cache); + + { + CLASS(xe_guc_buf, buf)(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + } + + { + CLASS(xe_guc_buf, buf)(cache, dwords); + KUNIT_ASSERT_TRUE(test, xe_guc_buf_is_valid(buf)); + KUNIT_EXPECT_NOT_NULL(test, xe_guc_buf_cpu_ptr(buf)); + KUNIT_EXPECT_NE(test, 0, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_LE(test, DUT_GGTT_START, xe_guc_buf_gpu_addr(buf)); + KUNIT_EXPECT_GT(test, DUT_GGTT_START + DUT_GGTT_SIZE, xe_guc_buf_gpu_addr(buf)); + } +} + +static struct kunit_case guc_buf_test_cases[] = { + KUNIT_CASE(test_smallest), + KUNIT_CASE(test_largest), + KUNIT_CASE(test_granular), + KUNIT_CASE(test_unique), + KUNIT_CASE(test_overlap), + KUNIT_CASE(test_reusable), + KUNIT_CASE(test_too_big), + KUNIT_CASE(test_flush), + KUNIT_CASE(test_lookup), + KUNIT_CASE(test_data), + KUNIT_CASE(test_class), + {} +}; + +static struct kunit_suite guc_buf_suite = { + .name = "guc_buf", + .test_cases = guc_buf_test_cases, + .init = guc_buf_test_init, +}; + +kunit_test_suites(&guc_buf_suite); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3f5391d416d46..25761924a8b41 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -6,6 +6,7 @@ #include "xe_bo.h" #include +#include #include #include @@ -15,6 +16,8 @@ #include #include +#include + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -24,6 +27,7 @@ #include "xe_migrate.h" #include "xe_pm.h" #include "xe_preempt_fence.h" +#include "xe_pxp.h" #include "xe_res_cursor.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" @@ -124,6 +128,22 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo) GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270; } +/** + * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND + * @bo: The BO + * + * Check if a given bo is bound through VM_BIND. This requires the + * reservation lock for the BO to be held. + * + * Returns: boolean + */ +bool xe_bo_is_vm_bound(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + return !list_empty(&bo->ttm.base.gpuva.list); +} + static bool xe_bo_is_user(struct xe_bo *bo) { return bo->flags & XE_BO_FLAG_USER; @@ -139,14 +159,17 @@ mem_type_to_migrate(struct xe_device *xe, u32 mem_type) return tile->migrate; } -static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) +static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res) { struct xe_device *xe = ttm_to_xe_device(res->bo->bdev); struct ttm_resource_manager *mgr; + struct xe_ttm_vram_mgr *vram_mgr; xe_assert(xe, resource_is_vram(res)); mgr = ttm_manager_type(&xe->ttm, res->mem_type); - return to_xe_ttm_vram_mgr(mgr)->vram; + vram_mgr = to_xe_ttm_vram_mgr(mgr); + + return container_of(vram_mgr, struct xe_vram_region, ttm); } static void try_add_system(struct xe_device *xe, struct xe_bo *bo, @@ -175,12 +198,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { struct ttm_place place = { .mem_type = mem_type }; - struct xe_mem_region *vram; + struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type); + struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr); + + struct xe_vram_region *vram; u64 io_size; xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); - vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram; + vram = container_of(vram_mgr, struct xe_vram_region, ttm); xe_assert(xe, vram && vram->usable_size); io_size = vram->io_size; @@ -464,7 +490,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, return 0; case XE_PL_VRAM0: case XE_PL_VRAM1: { - struct xe_mem_region *vram = res_to_mem_region(mem); + struct xe_vram_region *vram = res_to_mem_region(mem); if (!xe_ttm_resource_visible(mem)) return -EINVAL; @@ -713,6 +739,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } + /* Reject BO eviction if BO is bound to current VM. */ + if (evict && ctx->resv) { + struct drm_gpuvm_bo *vm_bo; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { + struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); + + if (xe_vm_resv(vm) == ctx->resv && + xe_vm_in_preempt_fence_mode(vm)) { + ret = -EBUSY; + goto out; + } + } + } + /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -796,7 +837,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, /* Create a new VMAP once kernel BO back in VRAM */ if (!ret && resource_is_vram(new_mem)) { - struct xe_mem_region *vram = res_to_mem_region(new_mem); + struct xe_vram_region *vram = res_to_mem_region(new_mem); void __iomem *new_addr = vram->mapping + (new_mem->start << PAGE_SHIFT); @@ -1006,7 +1047,7 @@ static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo, { struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct xe_res_cursor cursor; - struct xe_mem_region *vram; + struct xe_vram_region *vram; if (ttm_bo->resource->mem_type == XE_PL_STOLEN) return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT; @@ -1146,7 +1187,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); struct iosys_map vmap; struct xe_res_cursor cursor; - struct xe_mem_region *vram; + struct xe_vram_region *vram; int bytes_left = len; xe_bo_assert_held(bo); @@ -1644,6 +1685,7 @@ __xe_bo_create_locked(struct xe_device *xe, } } + trace_xe_bo_create(bo); return bo; err_unlock_put_bo: @@ -1781,6 +1823,8 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile struct xe_bo *bo; int ret; + KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags); + bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); if (IS_ERR(bo)) return bo; @@ -2135,6 +2179,93 @@ void xe_bo_vunmap(struct xe_bo *bo) __xe_bo_vunmap(bo); } +static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value) +{ + if (value == DRM_XE_PXP_TYPE_NONE) + return 0; + + /* we only support DRM_XE_PXP_TYPE_HWDRM for now */ + if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) + return -EINVAL; + + return xe_pxp_key_assign(xe->pxp, bo); +} + +typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe, + struct xe_bo *bo, + u64 value); + +static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = { + [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type, +}; + +static int gem_create_user_ext_set_property(struct xe_device *xe, + struct xe_bo *bo, + u64 extension) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.property >= + ARRAY_SIZE(gem_create_set_property_funcs)) || + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs)); + if (!gem_create_set_property_funcs[idx]) + return -EINVAL; + + return gem_create_set_property_funcs[idx](xe, bo, ext.value); +} + +typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe, + struct xe_bo *bo, + u64 extension); + +static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = { + [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo, + u64 extensions, int ext_number) +{ + u64 __user *address = u64_to_user_ptr(extensions); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, + ARRAY_SIZE(gem_create_user_extension_funcs)); + err = gem_create_user_extension_funcs[idx](xe, bo, extensions); + if (XE_IOCTL_DBG(xe, err)) + return err; + + if (ext.next_extension) + return gem_create_user_extensions(xe, bo, ext.next_extension, + ++ext_number); + + return 0; +} + int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -2147,8 +2278,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, u32 handle; int err; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || + if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; @@ -2230,6 +2360,12 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, goto out_vm; } + if (args->extensions) { + err = gem_create_user_extensions(xe, bo, args->extensions, 0); + if (err) + goto out_bulk; + } + err = drm_gem_handle_create(file, &bo->ttm.base, &handle); if (err) goto out_bulk; @@ -2263,9 +2399,26 @@ int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags)) + if (XE_IOCTL_DBG(xe, args->flags & + ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER)) return -EINVAL; + if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) { + if (XE_IOCTL_DBG(xe, !IS_DGFX(xe))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->handle)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K)) + return -EINVAL; + + BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) + + SZ_4K) >= DRM_FILE_PAGE_OFFSET_START); + args->offset = XE_PCI_BARRIER_MMAP_OFFSET; + return 0; + } + gem_obj = drm_gem_object_lookup(file, args->handle); if (XE_IOCTL_DBG(xe, !gem_obj)) return -ENOENT; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index d9386ab031404..a25340949415a 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -75,6 +75,8 @@ #define XE_BO_PROPS_INVALID (-1) +#define XE_PCI_BARRIER_MMAP_OFFSET (0x50 << XE_PTE_SHIFT) + struct sg_table; struct xe_bo *xe_bo_alloc(void); @@ -184,6 +186,11 @@ static inline bool xe_bo_is_pinned(struct xe_bo *bo) return bo->ttm.pin_count; } +static inline bool xe_bo_is_protected(const struct xe_bo *bo) +{ + return bo->pxp_key_instance; +} + static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) { if (likely(bo)) { @@ -234,6 +241,7 @@ bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_is_stolen_devmem(struct xe_bo *bo); +bool xe_bo_is_vm_bound(struct xe_bo *bo); bool xe_bo_has_single_placement(struct xe_bo *bo); uint64_t vram_region_gpu_offset(struct ttm_resource *res); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 46dc9e4e3e46a..60c522866500c 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -57,6 +57,12 @@ struct xe_bo { */ struct list_head client_link; #endif + /** + * @pxp_key_instance: PXP key instance this BO was created against. A + * 0 in this variable indicates that the BO does not use PXP encryption. + */ + u32 pxp_key_instance; + /** @freed: List node for delayed put. */ struct llist_node freed; /** @update_index: Update index if PT BO */ diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 492b4877433f1..d0503959a8ed0 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -18,6 +18,7 @@ #include "xe_gt_printk.h" #include "xe_guc_ads.h" #include "xe_pm.h" +#include "xe_pxp_debugfs.h" #include "xe_sriov.h" #include "xe_step.h" @@ -166,7 +167,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, return -EINVAL; if (xe->wedged.mode == wedged_mode) - return 0; + return size; xe->wedged.mode = wedged_mode; @@ -175,6 +176,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); if (ret) { xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n"); + xe_pm_runtime_put(xe); return -EIO; } } @@ -230,5 +232,7 @@ void xe_debugfs_register(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); + xe_pxp_debugfs_register(xe->pxp); + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index fc4a49f25c09b..64d3a26ad4a3e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -49,8 +49,11 @@ #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" +#include "xe_pmu.h" +#include "xe_pxp.h" #include "xe_query.h" #include "xe_sriov.h" +#include "xe_survivability_mode.h" #include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" @@ -62,6 +65,12 @@ #include +struct xe_device_remove_action { + struct list_head node; + void (*action)(void *); + void *data; +}; + static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -232,12 +241,117 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo #define xe_drm_compat_ioctl NULL #endif +static void barrier_open(struct vm_area_struct *vma) +{ + drm_dev_get(vma->vm_private_data); +} + +static void barrier_close(struct vm_area_struct *vma) +{ + drm_dev_put(vma->vm_private_data); +} + +static void barrier_release_dummy_page(struct drm_device *dev, void *res) +{ + struct page *dummy_page = (struct page *)res; + + __free_page(dummy_page); +} + +static vm_fault_t barrier_fault(struct vm_fault *vmf) +{ + struct drm_device *dev = vmf->vma->vm_private_data; + struct vm_area_struct *vma = vmf->vma; + vm_fault_t ret = VM_FAULT_NOPAGE; + pgprot_t prot; + int idx; + + prot = vm_get_page_prot(vma->vm_flags); + + if (drm_dev_enter(dev, &idx)) { + unsigned long pfn; + +#define LAST_DB_PAGE_OFFSET 0x7ff001 + pfn = PHYS_PFN(pci_resource_start(to_pci_dev(dev->dev), 0) + + LAST_DB_PAGE_OFFSET); + ret = vmf_insert_pfn_prot(vma, vma->vm_start, pfn, + pgprot_noncached(prot)); + drm_dev_exit(idx); + } else { + struct page *page; + + /* Allocate new dummy page to map all the VA range in this VMA to it*/ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + /* Set the page to be freed using drmm release action */ + if (drmm_add_action_or_reset(dev, barrier_release_dummy_page, page)) + return VM_FAULT_OOM; + + ret = vmf_insert_pfn_prot(vma, vma->vm_start, page_to_pfn(page), + prot); + } + + return ret; +} + +static const struct vm_operations_struct vm_ops_barrier = { + .open = barrier_open, + .close = barrier_close, + .fault = barrier_fault, +}; + +static int xe_pci_barrier_mmap(struct file *filp, + struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct xe_device *xe = to_xe_device(dev); + + if (!IS_DGFX(xe)) + return -EINVAL; + + if (vma->vm_end - vma->vm_start > SZ_4K) + return -EINVAL; + + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + + if (vma->vm_flags & (VM_READ | VM_EXEC)) + return -EINVAL; + + vm_flags_clear(vma, VM_MAYREAD | VM_MAYEXEC); + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + vma->vm_ops = &vm_ops_barrier; + vma->vm_private_data = dev; + drm_dev_get(vma->vm_private_data); + + return 0; +} + +static int xe_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + switch (vma->vm_pgoff) { + case XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT: + return xe_pci_barrier_mmap(filp, vma); + } + + return drm_gem_mmap(filp, vma); +} + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, .unlocked_ioctl = xe_drm_ioctl, - .mmap = drm_gem_mmap, + .mmap = xe_mmap, .poll = drm_poll, .read = drm_read, .compat_ioctl = xe_drm_compat_ioctl, @@ -578,7 +692,7 @@ int xe_device_probe_early(struct xe_device *xe) { int err; - err = xe_mmio_init(xe); + err = xe_mmio_probe_early(xe); if (err) return err; @@ -587,8 +701,12 @@ int xe_device_probe_early(struct xe_device *xe) update_device_info(xe); err = xe_pcode_probe_early(xe); - if (err) + if (err) { + if (xe_survivability_mode_required(xe)) + xe_survivability_mode_init(xe); + return err; + } err = wait_for_lmem_ready(xe); if (err) @@ -623,6 +741,7 @@ static int probe_has_flat_ccs(struct xe_device *xe) "Flat CCS has been disabled in bios, May lead to performance impact"); xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } @@ -631,9 +750,11 @@ int xe_device_probe(struct xe_device *xe) struct xe_tile *tile; struct xe_gt *gt; int err; - u8 last_gt; u8 id; + xe->probing = true; + INIT_LIST_HEAD(&xe->remove_action_list); + xe_pat_init_early(xe); err = xe_sriov_init(xe); @@ -641,10 +762,6 @@ int xe_device_probe(struct xe_device *xe) return err; xe->info.mem_region_mask = 1; - err = xe_display_init_nommio(xe); - if (err) - return err; - err = xe_set_dma_info(xe); if (err) return err; @@ -693,36 +810,34 @@ int xe_device_probe(struct xe_device *xe) } err = xe_devcoredump_init(xe); - if (err) - return err; - err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe); if (err) return err; - err = xe_display_init_noirq(xe); + /* + * From here on, if a step fails, make sure a Driver-FLR is triggereed + */ + err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe); if (err) return err; - err = xe_irq_install(xe); - if (err) - goto err; - err = probe_has_flat_ccs(xe); if (err) - goto err; + return err; err = xe_vram_probe(xe); if (err) - goto err; + return err; for_each_tile(tile, xe, id) { err = xe_tile_init_noalloc(tile); if (err) - goto err; + return err; } /* Allocate and map stolen after potential VRAM resize */ - xe_ttm_stolen_mgr_init(xe); + err = xe_ttm_stolen_mgr_init(xe); + if (err) + return err; /* * Now that GT is initialized (TTM in particular), @@ -730,91 +845,143 @@ int xe_device_probe(struct xe_device *xe) * This is the reason the first allocation needs to be done * inside display. */ - err = xe_display_init_noaccel(xe); + err = xe_display_init_early(xe); if (err) - goto err; + return err; - for_each_gt(gt, xe, id) { - last_gt = id; + for_each_tile(tile, xe, id) { + err = xe_tile_init(tile); + if (err) + return err; + } + err = xe_irq_install(xe); + if (err) + return err; + + for_each_gt(gt, xe, id) { err = xe_gt_init(gt); if (err) - goto err_fini_gt; + return err; } xe_heci_gsc_init(xe); err = xe_oa_init(xe); if (err) - goto err_fini_gt; + return err; err = xe_display_init(xe); if (err) - goto err_fini_oa; + return err; + + err = xe_pxp_init(xe); + if (err) + goto err_remove_display; err = drm_dev_register(&xe->drm, 0); if (err) - goto err_fini_display; + goto err_remove_display; xe_display_register(xe); - xe_oa_register(xe); + err = xe_oa_register(xe); + if (err) + goto err_unregister_display; + + err = xe_pmu_register(&xe->pmu); + if (err) + goto err_unregister_display; xe_debugfs_register(xe); - xe_hwmon_register(xe); + err = xe_hwmon_register(xe); + if (err) + goto err_unregister_display; for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); xe_vsec_init(xe); + xe->probing = false; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); -err_fini_display: +err_unregister_display: + xe_display_unregister(xe); +err_remove_display: xe_display_driver_remove(xe); -err_fini_oa: - xe_oa_fini(xe); + return err; +} -err_fini_gt: - for_each_gt(gt, xe, id) { - if (id < last_gt) - xe_gt_remove(gt); - else - break; +/** + * xe_device_call_remove_actions - Call the remove actions + * @xe: xe device instance + * + * This is only to be used by xe_pci and xe_device to call the remove actions + * while removing the driver or handling probe failures. + */ +void xe_device_call_remove_actions(struct xe_device *xe) +{ + struct xe_device_remove_action *ra, *tmp; + + list_for_each_entry_safe(ra, tmp, &xe->remove_action_list, node) { + ra->action(ra->data); + list_del(&ra->node); + kfree(ra); } -err: - xe_display_fini(xe); - return err; + xe->probing = false; } -static void xe_device_remove_display(struct xe_device *xe) +/** + * xe_device_add_action_or_reset - Add an action to run on driver removal + * @xe: xe device instance + * @action: Function that should be called on device remove + * @data: Pointer to data passed to @action implementation + * + * This adds a custom action to the list of remove callbacks executed on device + * remove, before any dev or drm managed resources are removed. This is only + * needed if the action leads to component_del()/component_master_del() since + * that is not compatible with devres cleanup. + * + * Returns: 0 on success or a negative error code on failure, in which case + * @action is already called. + */ +int xe_device_add_action_or_reset(struct xe_device *xe, + void (*action)(void *), void *data) { - xe_display_unregister(xe); + struct xe_device_remove_action *ra; - drm_dev_unplug(&xe->drm); - xe_display_driver_remove(xe); + drm_WARN_ON(&xe->drm, !xe->probing); + + ra = kmalloc(sizeof(*ra), GFP_KERNEL); + if (!ra) { + action(data); + return -ENOMEM; + } + + INIT_LIST_HEAD(&ra->node); + ra->action = action; + ra->data = data; + list_add(&ra->node, &xe->remove_action_list); + + return 0; } void xe_device_remove(struct xe_device *xe) { - struct xe_gt *gt; - u8 id; - - xe_oa_unregister(xe); - - xe_device_remove_display(xe); + xe_display_unregister(xe); - xe_display_fini(xe); + drm_dev_unplug(&xe->drm); - xe_oa_fini(xe); + xe_display_driver_remove(xe); xe_heci_gsc_fini(xe); - for_each_gt(gt, xe, id) - xe_gt_remove(gt); + xe_device_call_remove_actions(xe); } void xe_device_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index fc3c2af3fb7fd..079dad32a6f53 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -45,6 +45,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, const struct pci_device_id *ent); int xe_device_probe_early(struct xe_device *xe); int xe_device_probe(struct xe_device *xe); +int xe_device_add_action_or_reset(struct xe_device *xe, + void (*action)(void *), void *data); +void xe_device_call_remove_actions(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); @@ -170,6 +173,11 @@ static inline bool xe_device_uses_memirq(struct xe_device *xe) return xe_device_has_memirq(xe) && (IS_SRIOV_VF(xe) || xe_device_has_msix(xe)); } +static inline bool xe_device_has_lmtt(struct xe_device *xe) +{ + return IS_DGFX(xe); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 17509d3c72d41..4656305dd45a6 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,9 +18,12 @@ #include "xe_memirq_types.h" #include "xe_oa_types.h" #include "xe_platform_types.h" +#include "xe_pmu_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" #include "xe_step_types.h" +#include "xe_survivability_mode_types.h" +#include "xe_ttm_vram_mgr_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR @@ -34,6 +37,7 @@ struct xe_ggtt; struct xe_pat_ops; +struct xe_pxp; #define XE_BO_INVALID_OFFSET LONG_MAX @@ -67,11 +71,11 @@ struct xe_pat_ops; struct xe_tile * : (tile__)->xe) /** - * struct xe_mem_region - memory region structure + * struct xe_vram_region - memory region structure * This is used to describe a memory region in xe * device, such as HBM memory or CXL extension memory. */ -struct xe_mem_region { +struct xe_vram_region { /** @io_start: IO start address of this VRAM instance */ resource_size_t io_start; /** @@ -102,6 +106,8 @@ struct xe_mem_region { resource_size_t actual_physical_size; /** @mapping: pointer to VRAM mappable space */ void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; }; /** @@ -186,13 +192,6 @@ struct xe_tile { */ struct xe_mmio mmio; - /** - * @mmio_ext: MMIO-extension info for a tile. - * - * Each tile has its own additional 256MB (28-bit) MMIO-extension space. - */ - struct xe_mmio mmio_ext; - /** @mem: memory management info for tile */ struct { /** @@ -201,10 +200,7 @@ struct xe_tile { * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ - struct xe_mem_region vram; - - /** @mem.vram_mgr: VRAM TTM manager */ - struct xe_ttm_vram_mgr *vram_mgr; + struct xe_vram_region vram; /** @mem.ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; @@ -263,8 +259,6 @@ struct xe_device { const char *graphics_name; /** @info.media_name: media IP name */ const char *media_name; - /** @info.tile_mmio_ext_size: size of MMIO extension space, per-tile */ - u32 tile_mmio_ext_size; /** @info.graphics_verx100: graphics IP version */ u32 graphics_verx100; /** @info.media_verx100: media IP version */ @@ -314,8 +308,8 @@ struct xe_device { u8 has_heci_gscfi:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; - /** @info.has_mmio_ext: Device has extra MMIO address range */ - u8 has_mmio_ext:1; + /** @info.has_pxp: Device has PXP support */ + u8 has_pxp:1; /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; /** @info.has_sriov: Supports SR-IOV */ @@ -341,6 +335,9 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @survivability: survivability information for device */ + struct xe_survivability survivability; + /** @irq: device interrupt state */ struct { /** @irq.lock: lock for processing irq's on this device */ @@ -372,7 +369,7 @@ struct xe_device { /** @mem: memory info for device */ struct { /** @mem.vram: VRAM info for device */ - struct xe_mem_region vram; + struct xe_vram_region vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; } mem; @@ -431,6 +428,20 @@ struct xe_device { /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; + /** + * @remove_action_list: list of actions to execute on device remove. + * Use xe_device_add_remove_action() for that. Actions can only be added + * during probe and are executed during the call from PCI subsystem to + * remove the driver from the device. + */ + struct list_head remove_action_list; + + /** + * @probing: cover the section in which @remove_action_list can be used + * to post cleaning actions + */ + bool probing; + /** * @mem_access: keep track of memory access in the device, possibly * triggering additional actions when they occur. @@ -514,6 +525,9 @@ struct xe_device { /** @oa: oa observation subsystem */ struct xe_oa oa; + /** @pxp: Encapsulate Protected Xe Path support */ + struct xe_pxp *pxp; + /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; @@ -525,6 +539,9 @@ struct xe_device { int mode; } wedged; + /** @pmu: performance monitoring unit */ + struct xe_pmu pmu; + #ifdef TEST_VM_OPS_ERROR /** * @vm_inject_error_position: inject errors at different places in VM @@ -585,8 +602,6 @@ struct xe_device { unsigned int czclk_freq; unsigned int fsb_freq, mem_freq, is_ddr3; }; - - void *pxp; #endif }; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 2d4874d2b9225..31f688e953d7b 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -324,6 +324,14 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) u64 gpu_timestamp; unsigned int fw_ref; + /* + * RING_TIMESTAMP registers are inaccessible in VF mode. + * Without drm-total-cycles-*, other keys provide little value. + * Show all or none of the optional "run_ticks" keys in this case. + */ + if (IS_SRIOV_VF(xe)) + return; + /* * Wait for any exec queue going away: their cycles will get updated on * context switch out, so wait for that to happen diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index df8ce550deb40..b75adfc99fb7c 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -262,6 +262,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_exec; } + if (xe_exec_queue_uses_pxp(q)) { + err = xe_vm_validate_protected(q->vm); + if (err) + goto err_exec; + } + job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ? addresses : &args->address); if (IS_ERR(job)) { diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7e1abbbfba121..23a9f519ce1c7 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -25,6 +25,7 @@ #include "xe_ring_ops_types.h" #include "xe_trace.h" #include "xe_vm.h" +#include "xe_pxp.h" enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_JOB_TIMEOUT = 0, @@ -38,6 +39,8 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue static void __xe_exec_queue_free(struct xe_exec_queue *q) { + if (xe_exec_queue_uses_pxp(q)) + xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); if (q->vm) xe_vm_put(q->vm); @@ -78,6 +81,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); + INIT_LIST_HEAD(&q->pxp.link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -112,6 +116,21 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) { struct xe_vm *vm = q->vm; int i, err; + u32 flags = 0; + + /* + * PXP workloads executing on RCS or CCS must run in isolation (i.e. no + * other workload can use the EUs at the same time). On MTL this is done + * by setting the RUNALONE bit in the LRC, while starting on Xe2 there + * is a dedicated bit for it. + */ + if (xe_exec_queue_uses_pxp(q) && + (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { + if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) + flags |= XE_LRC_CREATE_PXP; + else + flags |= XE_LRC_CREATE_RUNALONE; + } if (vm) { err = xe_vm_lock(vm, true); @@ -120,7 +139,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) } for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); goto err_unlock; @@ -153,6 +172,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v struct xe_exec_queue *q; int err; + /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ + xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); + q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, extensions); if (IS_ERR(q)) @@ -162,6 +184,19 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (err) goto err_post_alloc; + /* + * We can only add the queue to the PXP list after the init is complete, + * because the PXP termination can call exec_queue_kill and that will + * go bad if the queue is only half-initialized. This means that we + * can't do it when we handle the PXP extension in __xe_exec_queue_alloc + * and we need to do it here instead. + */ + if (xe_exec_queue_uses_pxp(q)) { + err = xe_pxp_exec_queue_add(xe->pxp, q); + if (err) + goto err_post_alloc; + } + return q; err_post_alloc: @@ -250,6 +285,9 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + if (xe_exec_queue_uses_pxp(q)) + xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); + xe_exec_queue_last_fence_put_unlocked(q); if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, @@ -405,6 +443,22 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return 0; } +static int +exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) +{ + if (value == DRM_XE_PXP_TYPE_NONE) + return 0; + + /* we only support HWDRM sessions right now */ + if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) + return -EINVAL; + + if (!xe_pxp_is_enabled(xe->pxp)) + return -ENODEV; + + return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -412,6 +466,7 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -431,7 +486,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, ARRAY_SIZE(exec_queue_set_property_funcs)) || XE_IOCTL_DBG(xe, ext.pad) || XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); @@ -483,7 +539,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, +static u32 calc_validate_logical_mask(struct xe_device *xe, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) { @@ -545,7 +601,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; struct xe_vm *vm; - struct xe_gt *gt; struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; @@ -598,8 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, &q->multi_gt_link); } } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, + logical_mask = calc_validate_logical_mask(xe, eci, args->width, args->num_placements); if (XE_IOCTL_DBG(xe, !logical_mask)) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 90c7f73eab884..17bc50a7f05a4 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -57,6 +57,11 @@ static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q) return q->width > 1; } +static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q) +{ + return q->pxp.type; +} + bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_ring_full(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 5af5419cec7a6..6eb7ff091534f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -130,6 +130,14 @@ struct xe_exec_queue { struct list_head link; } lr; + /** @pxp: PXP info tracking */ + struct { + /** @pxp.type: PXP session type used by this queue */ + u8 type; + /** @pxp.link: link into the list of PXP exec queues */ + struct list_head link; + } pxp; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 66001af5cf552..9fbed1a2fcc62 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -269,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, port->hwe = hwe; - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX); + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); if (IS_ERR(port->lrc)) { err = PTR_ERR(port->lrc); goto err; diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 1eb791ddc375c..fd41113f85725 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,15 +555,6 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } -/** - * xe_gsc_remove() - Clean up the GSC structures before driver removal - * @gsc: the GSC uC - */ -void xe_gsc_remove(struct xe_gsc *gsc) -{ - xe_gsc_proxy_remove(gsc); -} - /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index e282b9ef6ec4d..d99f66c38075c 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -17,7 +17,6 @@ int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); -void xe_gsc_remove(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 24cc6a4f9a96a..31c90577faf0b 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -423,6 +423,34 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) return 0; } +static void xe_gsc_proxy_remove(void *arg) +{ + struct xe_gsc *gsc = arg; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref = 0; + + if (!gsc->proxy.component_added) + return; + + /* disable HECI2 IRQs */ + xe_pm_runtime_get(xe); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref) + xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); + + /* try do disable irq even if forcewake failed */ + gsc_proxy_irq_toggle(gsc, false); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + xe_pm_runtime_put(xe); + + xe_gsc_wait_for_worker_completion(gsc); + + component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); + gsc->proxy.component_added = false; +} + /** * xe_gsc_proxy_init() - init objects and MEI component required by GSC proxy * @gsc: the GSC uC @@ -462,40 +490,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) gsc->proxy.component_added = true; - /* the component must be removed before unload, so can't use drmm for cleanup */ - - return 0; -} - -/** - * xe_gsc_proxy_remove() - remove the GSC proxy MEI component - * @gsc: the GSC uC - */ -void xe_gsc_proxy_remove(struct xe_gsc *gsc) -{ - struct xe_gt *gt = gsc_to_gt(gsc); - struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref = 0; - - if (!gsc->proxy.component_added) - return; - - /* disable HECI2 IRQs */ - xe_pm_runtime_get(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (!fw_ref) - xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); - - /* try do disable irq even if forcewake failed */ - gsc_proxy_irq_toggle(gsc, false); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); - - xe_gsc_wait_for_worker_completion(gsc); - - component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); - gsc->proxy.component_added = false; + return xe_device_add_action_or_reset(xe, xe_gsc_proxy_remove, gsc); } /** diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index c511ade6b8637..fdef56995cd43 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,7 +12,6 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); -void xe_gsc_proxy_remove(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 5926de20214c8..97c056656df05 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -13,6 +13,7 @@ #include #include "xe_uc_fw_types.h" +#include "xe_device_types.h" struct xe_bo; struct xe_exec_queue; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5d6fb79957b63..650a0ee56e97e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -32,6 +32,7 @@ #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" @@ -140,26 +141,6 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) xe_force_wake_put(gt_to_fw(gt), fw_ref); } -/** - * xe_gt_remove() - Clean up the GT structures before driver removal - * @gt: the GT object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_gt_remove(struct xe_gt *gt) -{ - int i; - - xe_uc_remove(>->uc); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - xe_gt_disable_host_l2_vram(gt); -} - static void gt_reset_worker(struct work_struct *w); static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) @@ -408,13 +389,11 @@ static void dump_pat_on_error(struct xe_gt *gt) static int gt_fw_domain_init(struct xe_gt *gt) { unsigned int fw_ref; - int err, i; + int err; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) { - err = -ETIMEDOUT; - goto err_hw_fence_irq; - } + if (!fw_ref) + return -ETIMEDOUT; if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); @@ -455,9 +434,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) err_force_wake: dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); return err; } @@ -465,7 +441,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) static int all_fw_domain_init(struct xe_gt *gt) { unsigned int fw_ref; - int err, i; + int err; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { @@ -543,8 +519,6 @@ static int all_fw_domain_init(struct xe_gt *gt) err_force_wake: xe_force_wake_put(gt_to_fw(gt), fw_ref); - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); return err; } @@ -582,6 +556,17 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) return err; } +static void xe_gt_fini(void *arg) +{ + struct xe_gt *gt = arg; + int i; + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); +} + int xe_gt_init(struct xe_gt *gt) { int err; @@ -594,6 +579,10 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } + err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt); + if (err) + return err; + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -637,17 +626,19 @@ int xe_gt_init(struct xe_gt *gt) void xe_gt_mmio_init(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); - gt->mmio.regs = tile->mmio.regs; - gt->mmio.regs_size = tile->mmio.regs_size; - gt->mmio.tile = tile; + xe_mmio_init(>->mmio, tile, tile->mmio.regs, tile->mmio.regs_size); if (gt->info.type == XE_GT_TYPE_MEDIA) { gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; + } else { + gt->mmio.adj_offset = 0; + gt->mmio.adj_limit = 0; } - if (IS_SRIOV_VF(gt_to_xe(gt))) + if (IS_SRIOV_VF(xe)) gt->mmio.sriov_vf_gt = gt; } @@ -676,6 +667,9 @@ static int do_gt_reset(struct xe_gt *gt) { int err; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return xe_gt_sriov_vf_reset(gt); + xe_gsc_wa_14015076503(gt, true); xe_mmio_write32(>->mmio, GDRST, GRDOM_FULL); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index e504cc33ade4f..187fa6490eafc 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -54,7 +54,6 @@ int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); int xe_gt_sanitize_freq(struct xe_gt *gt); -void xe_gt_remove(struct xe_gt *gt); /** * xe_gt_wait_for_reset - wait for gt's async reset to finalize. diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index ffd3ba7f66561..fbbace7b0b12a 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -69,6 +69,8 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) { u64 delta, overflow_residency, prev_residency; + lockdep_assert_held(>idle->lock); + overflow_residency = BIT_ULL(32); /* @@ -275,8 +277,21 @@ static ssize_t idle_status_show(struct device *dev, return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } -static DEVICE_ATTR_RO(idle_status); +u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle) +{ + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + u64 residency; + unsigned long flags; + + raw_spin_lock_irqsave(>idle->lock, flags); + residency = get_residency_ms(gtidle, gtidle->idle_residency(pc)); + raw_spin_unlock_irqrestore(>idle->lock, flags); + + return residency; +} + +static DEVICE_ATTR_RO(idle_status); static ssize_t idle_residency_ms_show(struct device *dev, struct device_attribute *attr, char *buff) { @@ -285,10 +300,10 @@ static ssize_t idle_residency_ms_show(struct device *dev, u64 residency; xe_pm_runtime_get(pc_to_xe(pc)); - residency = gtidle->idle_residency(pc); + residency = xe_gt_idle_residency_msec(gtidle); xe_pm_runtime_put(pc_to_xe(pc)); - return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); + return sysfs_emit(buff, "%llu\n", residency); } static DEVICE_ATTR_RO(idle_residency_ms); @@ -331,6 +346,8 @@ int xe_gt_idle_init(struct xe_gt_idle *gtidle) if (!kobj) return -ENOMEM; + raw_spin_lock_init(>idle->lock); + if (xe_gt_is_media_type(gt)) { snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 4455a6501cb07..591a01e181bcc 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -17,5 +17,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); +u64 xe_gt_idle_residency_msec(struct xe_gt_idle *gtidle); #endif /* _XE_GT_IDLE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h index b8b297a3f8848..a3667c567f8a7 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_types.h +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -6,6 +6,7 @@ #ifndef _XE_GT_IDLE_SYSFS_TYPES_H_ #define _XE_GT_IDLE_SYSFS_TYPES_H_ +#include #include struct xe_guc_pc; @@ -31,6 +32,8 @@ struct xe_gt_idle { u64 cur_residency; /** @prev_residency: previous residency counter */ u64 prev_residency; + /** @lock: Lock protecting idle residency counters */ + raw_spinlock_t lock; /** @idle_status: get the current idle state */ enum xe_gt_idle_state (*idle_status)(struct xe_guc_pc *pc); /** @idle_residency: get idle residency counter */ diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index a1676b787fdcb..605aad3554e77 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -341,7 +341,13 @@ static unsigned int dss_per_group(struct xe_gt *gt) return DIV_ROUND_UP(max_subslices, max_slices); fallback: - xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); + /* + * Some older platforms don't have tables or don't have complete tables. + * Newer platforms should always have the required info. + */ + if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 2000) + xe_gt_err(gt, "Slice/Subslice counts missing from hwconfig table; using typical fallback values\n"); + if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 2606cd396df5c..46701ca11ce0d 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -14,6 +14,7 @@ #include "abi/guc_actions_abi.h" #include "xe_bo.h" #include "xe_gt.h" +#include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -124,16 +125,20 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, return 0; } -static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf, +static int handle_vma_pagefault(struct xe_gt *gt, struct pagefault *pf, struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); + struct xe_tile *tile = gt_to_tile(gt); struct drm_exec exec; struct dma_fence *fence; ktime_t end = 0; int err; bool atomic; + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES, xe_vma_size(vma)); + trace_xe_vma_pagefault(vma); atomic = access_is_atomic(pf->access_type); @@ -202,7 +207,6 @@ static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); struct xe_vm *vm; struct xe_vma *vma = NULL; int err; @@ -231,7 +235,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) goto unlock_vm; } - err = handle_vma_pagefault(tile, pf, vma); + err = handle_vma_pagefault(gt, pf, vma); unlock_vm: if (!err) @@ -263,12 +267,13 @@ static void print_pagefault(struct xe_device *xe, struct pagefault *pf) "\tFaultType: %d\n" "\tAccessType: %d\n" "\tFaultLevel: %d\n" - "\tEngineClass: %d\n" + "\tEngineClass: %d %s\n" "\tEngineInstance: %d\n", pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), lower_32_bits(pf->page_addr), pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, pf->engine_instance); + pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), + pf->engine_instance); } #define PF_MSG_LEN_DW 4 diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 6f906c8e8108b..c08efca6420e7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -15,7 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" +#include "xe_gt_sriov_printk.h" #include "xe_mmio.h" +#include "xe_pm.h" + +static void pf_worker_restart_func(struct work_struct *w); /* * VF's metadata is maintained in the flexible array where: @@ -41,6 +45,11 @@ static int pf_alloc_metadata(struct xe_gt *gt) return 0; } +static void pf_init_workers(struct xe_gt *gt) +{ + INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -65,6 +74,8 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + pf_init_workers(gt); + return 0; } @@ -78,6 +89,12 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) */ int xe_gt_sriov_pf_init(struct xe_gt *gt) { + int err; + + err = xe_gt_sriov_pf_config_init(gt); + if (err) + return err; + return xe_gt_sriov_pf_migration_init(gt); } @@ -155,6 +172,35 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_pm_runtime_get(xe); + xe_gt_sriov_pf_config_restart(gt); + xe_gt_sriov_pf_control_restart(gt); + xe_pm_runtime_put(xe); + + xe_gt_sriov_dbg(gt, "restart completed\n"); +} + +static void pf_worker_restart_func(struct work_struct *w) +{ + struct xe_gt *gt = container_of(w, typeof(*gt), sriov.pf.workers.restart); + + pf_restart(gt); +} + +static void pf_queue_restart(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + xe_gt_sriov_dbg(gt, "restart already in queue!\n"); +} + /** * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset. * @gt: the &xe_gt @@ -163,6 +209,5 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) */ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { - xe_gt_sriov_pf_config_restart(gt); - xe_gt_sriov_pf_control_restart(gt); + pf_queue_restart(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 878e96281c035..10be109bf357f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -20,6 +20,7 @@ #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_printk.h" #include "xe_guc.h" +#include "xe_guc_buf.h" #include "xe_guc_ct.h" #include "xe_guc_db_mgr.h" #include "xe_guc_fwif.h" @@ -71,48 +72,27 @@ static int pf_send_vf_cfg_reset(struct xe_gt *gt, u32 vfid) * Return: number of KLVs that were successfully parsed and saved, * negative error code on failure. */ -static int pf_send_vf_cfg_klvs(struct xe_gt *gt, u32 vfid, const u32 *klvs, u32 num_dwords) +static int pf_send_vf_buf_klvs(struct xe_gt *gt, u32 vfid, struct xe_guc_buf buf, u32 num_dwords) { - const u32 bytes = num_dwords * sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; - int ret; - - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(bytes, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); - - ret = guc_action_update_vf_cfg(guc, vfid, xe_bo_ggtt_addr(bo), num_dwords); - xe_bo_unpin_map_no_vm(bo); - - return ret; + return guc_action_update_vf_cfg(guc, vfid, xe_guc_buf_flush(buf), num_dwords); } /* * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, * negative error code on failure. */ -static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, - const u32 *klvs, u32 num_dwords) +static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, + struct xe_guc_buf buf, u32 num_dwords) { int ret; - xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); - - ret = pf_send_vf_cfg_klvs(gt, vfid, klvs, num_dwords); + ret = pf_send_vf_buf_klvs(gt, vfid, buf, num_dwords); if (ret != num_klvs) { int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; + void *klvs = xe_guc_buf_cpu_ptr(buf); struct drm_printer p = xe_gt_info_printer(gt); char name[8]; @@ -125,13 +105,35 @@ static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { struct drm_printer p = xe_gt_info_printer(gt); + void *klvs = xe_guc_buf_cpu_ptr(buf); + char name[8]; + xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } return 0; } +/* + * Return: 0 on success, -ENOBUFS if no free buffer for the indirect data, + * negative error code on failure. + */ +static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + CLASS(xe_guc_buf_from_data, buf)(>->uc.guc.buf, klvs, num_dwords * sizeof(u32)); + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + return pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); +} + static int pf_push_vf_cfg_u32(struct xe_gt *gt, unsigned int vfid, u16 key, u32 value) { u32 klv[] = { @@ -262,7 +264,7 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool n += encode_config_ggtt(cfg, config, details); - if (details) { + if (details && config->num_ctxs) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID); cfg[n++] = config->begin_ctx; } @@ -270,7 +272,7 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS); cfg[n++] = config->num_ctxs; - if (details) { + if (details && config->num_dbs) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID); cfg[n++] = config->begin_db; } @@ -304,16 +306,17 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - u32 max_cfg_dwords = SZ_4K / sizeof(u32); + u32 max_cfg_dwords = xe_guc_buf_cache_dwords(>->uc.guc.buf); + CLASS(xe_guc_buf, buf)(>->uc.guc.buf, max_cfg_dwords); u32 num_dwords; int num_klvs; u32 *cfg; int err; - cfg = kcalloc(max_cfg_dwords, sizeof(u32), GFP_KERNEL); - if (!cfg) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + cfg = xe_guc_buf_cpu_ptr(buf); num_dwords = encode_config(cfg, config, true); xe_gt_assert(gt, num_dwords <= max_cfg_dwords); @@ -330,12 +333,31 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) xe_gt_assert(gt, num_dwords <= max_cfg_dwords); num_klvs = xe_guc_klv_count(cfg, num_dwords); - err = pf_push_vf_cfg_klvs(gt, vfid, num_klvs, cfg, num_dwords); + err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); - kfree(cfg); return err; } +static int pf_push_vf_cfg(struct xe_gt *gt, unsigned int vfid, bool reset) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (reset) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + + return err; +} + +static int pf_refresh_vf_cfg(struct xe_gt *gt, unsigned int vfid) +{ + return pf_push_vf_cfg(gt, vfid, true); +} + static u64 pf_get_ggtt_alignment(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -432,6 +454,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) return err; pf_release_vf_config_ggtt(gt, config); + + err = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(err)) + return err; } xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); @@ -757,6 +783,10 @@ static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctx return ret; pf_release_config_ctxs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_ctxs) @@ -1054,6 +1084,10 @@ static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) return ret; pf_release_config_dbs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_dbs) @@ -1302,7 +1336,7 @@ static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) struct xe_tile *tile; unsigned int tid; - xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, xe_device_has_lmtt(xe)); xe_assert(xe, IS_SRIOV_PF(xe)); for_each_tile(tile, xe, tid) { @@ -1323,7 +1357,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) unsigned int tid; int err; - xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, xe_device_has_lmtt(xe)); xe_assert(xe, IS_SRIOV_PF(xe)); total = 0; @@ -1400,7 +1434,8 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) if (unlikely(err)) return err; - pf_reset_vf_lmtt(xe, vfid); + if (xe_device_has_lmtt(xe)) + pf_reset_vf_lmtt(xe, vfid); pf_release_vf_config_lmem(gt, config); } xe_gt_assert(gt, !config->lmem_obj); @@ -1420,9 +1455,11 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) config->lmem_obj = bo; - err = pf_update_vf_lmtt(xe, vfid); - if (unlikely(err)) - goto release; + if (xe_device_has_lmtt(xe)) { + err = pf_update_vf_lmtt(xe, vfid); + if (unlikely(err)) + goto release; + } err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); if (unlikely(err)) @@ -1433,7 +1470,8 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return 0; reset_lmtt: - pf_reset_vf_lmtt(xe, vfid); + if (xe_device_has_lmtt(xe)) + pf_reset_vf_lmtt(xe, vfid); release: pf_release_vf_config_lmem(gt, config); return err; @@ -1526,7 +1564,7 @@ static u64 pf_query_free_lmem(struct xe_gt *gt) { struct xe_tile *tile = gt->tile; - return xe_ttm_vram_get_avail(&tile->mem.vram_mgr->manager); + return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager); } static u64 pf_query_max_lmem(struct xe_gt *gt) @@ -1947,7 +1985,8 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { pf_release_vf_config_lmem(gt, config); - pf_update_vf_lmtt(xe, vfid); + if (xe_device_has_lmtt(xe)) + pf_update_vf_lmtt(xe, vfid); } } pf_release_config_ctxs(gt, config); @@ -2085,10 +2124,7 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh xe_gt_assert(gt, vfid); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - if (refresh) - err = pf_send_vf_cfg_reset(gt, vfid); - if (!err) - err = pf_push_full_vf_config(gt, vfid); + err = pf_push_vf_cfg(gt, vfid, refresh); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); if (unlikely(err)) { @@ -2320,6 +2356,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void fini_config(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) + pf_release_vf_config(gt, n); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); +} + +/** + * xe_gt_sriov_pf_config_init - Initialize SR-IOV configuration data. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); +} + /** * xe_gt_sriov_pf_config_restart - Restart SR-IOV configurations after a GT reset. * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index f894e9d4abba2..513e6512a575b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -63,6 +63,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_init(struct xe_gt *gt); void xe_gt_sriov_pf_config_restart(struct xe_gt *gt); int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index c00fb354705f4..4445f660e6d10 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -10,6 +10,7 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc_buf.h" #include "xe_guc_ct.h" #include "xe_guc_klv_helpers.h" #include "xe_pm.h" @@ -34,48 +35,28 @@ static int guc_action_update_vgt_policy(struct xe_guc *guc, u64 addr, u32 size) * Return: number of KLVs that were successfully parsed and saved, * negative error code on failure. */ -static int pf_send_policy_klvs(struct xe_gt *gt, const u32 *klvs, u32 num_dwords) +static int pf_send_policy_klvs(struct xe_gt *gt, struct xe_guc_buf buf, u32 num_dwords) { - const u32 bytes = num_dwords * sizeof(u32); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = tile_to_xe(tile); struct xe_guc *guc = >->uc.guc; - struct xe_bo *bo; - int ret; - - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(bytes, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); - ret = guc_action_update_vgt_policy(guc, xe_bo_ggtt_addr(bo), num_dwords); - - xe_bo_unpin_map_no_vm(bo); - - return ret; + return guc_action_update_vgt_policy(guc, xe_guc_buf_flush(buf), num_dwords); } /* * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, * negative error code on failure. */ -static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, - const u32 *klvs, u32 num_dwords) +static int pf_push_policy_buf_klvs(struct xe_gt *gt, u32 num_klvs, + struct xe_guc_buf buf, u32 num_dwords) { int ret; - xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); - - ret = pf_send_policy_klvs(gt, klvs, num_dwords); + ret = pf_send_policy_klvs(gt, buf, num_dwords); if (ret != num_klvs) { int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; struct drm_printer p = xe_gt_info_printer(gt); + void *klvs = xe_guc_buf_cpu_ptr(buf); xe_gt_sriov_notice(gt, "Failed to push %u policy KLV%s (%pe)\n", num_klvs, str_plural(num_klvs), ERR_PTR(err)); @@ -86,6 +67,23 @@ static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, return 0; } +/* + * Return: 0 on success, -ENOBUFS if there is no free buffer for the indirect data, + * negative error code on failure. + */ +static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + CLASS(xe_guc_buf_from_data, buf)(>->uc.guc.buf, klvs, num_dwords * sizeof(u32)); + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + return pf_push_policy_buf_klvs(gt, num_klvs, buf, num_dwords); +} + static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value) { u32 klv[] = { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 924e75b94aec0..6b5f849a07223 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -176,11 +176,32 @@ static const struct xe_reg ver_2000_runtime_regs[] = { TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; +static const struct xe_reg ver_3000_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + MIRROR_L3BANK_ENABLE, /* _MMIO(0x9130) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ +}; + static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) { const struct xe_reg *regs; - if (GRAPHICS_VERx100(xe) >= 2000) { + if (GRAPHICS_VERx100(xe) >= 3000) { + *count = ARRAY_SIZE(ver_3000_runtime_regs); + regs = ver_3000_runtime_regs; + } else if (GRAPHICS_VERx100(xe) >= 2000) { *count = ARRAY_SIZE(ver_2000_runtime_regs); regs = ver_2000_runtime_regs; } else if (GRAPHICS_VERx100(xe) >= 1270) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 0426b1a77069a..a64a6835ad656 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -35,8 +35,17 @@ struct xe_gt_sriov_metadata { struct xe_gt_sriov_state_snapshot snapshot; }; +/** + * struct xe_gt_sriov_pf_workers - GT level workers used by the PF. + */ +struct xe_gt_sriov_pf_workers { + /** @restart: worker that executes actions post GT reset */ + struct work_struct restart; +}; + /** * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @workers: workers data. * @service: service data. * @control: control data. * @policy: policy data. @@ -45,6 +54,7 @@ struct xe_gt_sriov_metadata { * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_workers workers; struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_control control; struct xe_gt_sriov_pf_policy policy; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index cca5d57328021..4831549da319a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -58,6 +58,22 @@ static int vf_reset_guc_state(struct xe_gt *gt) return err; } +/** + * xe_gt_sriov_vf_reset - Reset GuC VF internal state. + * @gt: the &xe_gt + * + * It requires functional `GuC MMIO based communication`_. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_vf_reset(struct xe_gt *gt) +{ + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + + return vf_reset_guc_state(gt); +} + static int guc_action_match_version(struct xe_guc *guc, u32 wanted_branch, u32 wanted_major, u32 wanted_minor, u32 *branch, u32 *major, u32 *minor, u32 *patch) @@ -213,6 +229,9 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) { int err; + if (!xe_device_uc_enabled(gt_to_xe(gt))) + return -ENODEV; + err = vf_reset_guc_state(gt); if (unlikely(err)) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 912d208142616..ba6c5d74e326f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -12,6 +12,7 @@ struct drm_printer; struct xe_gt; struct xe_reg; +int xe_gt_sriov_vf_reset(struct xe_gt *gt); int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 7a6c1d808e419..2e9879ea4674a 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -28,6 +28,8 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { "tlb_inval_count", + "vma_pagefault_count", + "vma_pagefault_bytes", }; /** diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index 2fc055e39f273..b072bd80c4b97 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -8,6 +8,8 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_TLB_INVAL, + XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, + XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index df2042db7ee68..516c81e3b8dd9 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -129,7 +129,8 @@ static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { struct xe_device *xe = gt_to_xe(gt); - u32 fuse3 = xe_mmio_read32(>->mmio, MIRROR_FUSE3); + struct xe_mmio *mmio = >->mmio; + u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); /* * PTL platforms with media version 30.00 do not provide proper values @@ -143,7 +144,16 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) if (XE_WA(gt, no_media_l3)) return; - if (GRAPHICS_VER(xe) >= 20) { + if (GRAPHICS_VER(xe) >= 30) { + xe_l3_bank_mask_t per_node = {}; + u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); + u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE); + u32 bank_val = REG_FIELD_GET(XE3_L3BANK_ENABLE, mirror_l3bank_enable); + + bitmap_from_arr32(per_node, &bank_val, 32); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 32, + meml3_en); + } else if (GRAPHICS_VER(xe) >= 20) { xe_l3_bank_mask_t per_node = {}; u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); @@ -155,7 +165,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) xe_l3_bank_mask_t per_node = {}; xe_l3_bank_mask_t per_mask_bit = {}; u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); - u32 fuse4 = xe_mmio_read32(>->mmio, XEHP_FUSE4); + u32 fuse4 = xe_mmio_read32(mmio, XEHP_FUSE4); u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); bitmap_set_value8(per_mask_bit, 0x3, 0); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 408365dfe4eed..1619c0a52db93 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -23,6 +23,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_gt_throttle.h" #include "xe_guc_ads.h" +#include "xe_guc_buf.h" #include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_guc_db_mgr.h" @@ -743,6 +744,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_buf_cache_init(&guc->buf); + if (ret) + return ret; + return xe_guc_ads_init_post_hwconfig(&guc->ads); } diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c new file mode 100644 index 0000000000000..0193c94dd6a00 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include +#include + +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_buf.h" +#include "xe_sa.h" + +static struct xe_guc *cache_to_guc(struct xe_guc_buf_cache *cache) +{ + return container_of(cache, struct xe_guc, buf); +} + +static struct xe_gt *cache_to_gt(struct xe_guc_buf_cache *cache) +{ + return guc_to_gt(cache_to_guc(cache)); +} + +/** + * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache. + * @cache: the &xe_guc_buf_cache to initialize + * + * The Buffer Cache allows to obtain a reusable buffer that can be used to pass + * indirect H2G data to GuC without a need to create a ad-hoc allocation. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache) +{ + struct xe_gt *gt = cache_to_gt(cache); + struct xe_sa_manager *sam; + + /* XXX: currently it's useful only for the PF actions */ + if (!IS_SRIOV_PF(gt_to_xe(gt))) + return 0; + + sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32)); + if (IS_ERR(sam)) + return PTR_ERR(sam); + cache->sam = sam; + + xe_gt_dbg(gt, "reusable buffer with %u dwords at %#x for %ps\n", + xe_guc_buf_cache_dwords(cache), xe_bo_ggtt_addr(sam->bo), + __builtin_return_address(0)); + return 0; +} + +/** + * xe_guc_buf_cache_dwords() - Number of dwords the GuC Buffer Cache supports. + * @cache: the &xe_guc_buf_cache to query + * + * Return: a size of the largest reusable buffer (in dwords) + */ +u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache) +{ + return cache->sam ? cache->sam->base.size / sizeof(u32) : 0; +} + +/** + * xe_guc_buf_reserve() - Reserve a new sub-allocation. + * @cache: the &xe_guc_buf_cache where reserve sub-allocation + * @dwords: the requested size of the buffer in dwords + * + * Use xe_guc_buf_is_valid() to check if returned buffer reference is valid. + * Must use xe_guc_buf_release() to release a sub-allocation. + * + * Return: a &xe_guc_buf of new sub-allocation. + */ +struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords) +{ + struct drm_suballoc *sa; + + if (cache->sam) + sa = __xe_sa_bo_new(cache->sam, dwords * sizeof(u32), GFP_ATOMIC); + else + sa = ERR_PTR(-EOPNOTSUPP); + + return (struct xe_guc_buf){ .sa = sa }; +} + +/** + * xe_guc_buf_from_data() - Reserve a new sub-allocation using data. + * @cache: the &xe_guc_buf_cache where reserve sub-allocation + * @data: the data to flush the sub-allocation + * @size: the size of the data + * + * Similar to xe_guc_buf_reserve() but flushes @data to the GPU memory. + * + * Return: a &xe_guc_buf of new sub-allocation. + */ +struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, + const void *data, size_t size) +{ + struct drm_suballoc *sa; + + sa = __xe_sa_bo_new(cache->sam, size, GFP_ATOMIC); + if (!IS_ERR(sa)) + memcpy(xe_sa_bo_cpu_addr(sa), data, size); + + return (struct xe_guc_buf){ .sa = sa }; +} + +/** + * xe_guc_buf_release() - Release a sub-allocation. + * @buf: the &xe_guc_buf to release + * + * Releases a sub-allocation reserved by the xe_guc_buf_reserve(). + */ +void xe_guc_buf_release(const struct xe_guc_buf buf) +{ + if (xe_guc_buf_is_valid(buf)) + xe_sa_bo_free(buf.sa, NULL); +} + +/** + * xe_guc_buf_flush() - Copy the data from the sub-allocation to the GPU memory. + * @buf: the &xe_guc_buf to flush + * + * Return: a GPU address of the sub-allocation. + */ +u64 xe_guc_buf_flush(const struct xe_guc_buf buf) +{ + xe_sa_bo_flush_write(buf.sa); + return xe_sa_bo_gpu_addr(buf.sa); +} + +/** + * xe_guc_buf_cpu_ptr() - Obtain a CPU pointer to the sub-allocation. + * @buf: the &xe_guc_buf to query + * + * Return: a CPU pointer of the sub-allocation. + */ +void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf) +{ + return xe_sa_bo_cpu_addr(buf.sa); +} + +/** + * xe_guc_buf_gpu_addr() - Obtain a GPU address of the sub-allocation. + * @buf: the &xe_guc_buf to query + * + * Return: a GPU address of the sub-allocation. + */ +u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf) +{ + return xe_sa_bo_gpu_addr(buf.sa); +} + +/** + * xe_guc_cache_gpu_addr_from_ptr() - Lookup a GPU address using the pointer. + * @cache: the &xe_guc_buf_cache with sub-allocations + * @ptr: the CPU pointer of the sub-allocation + * @size: the size of the data + * + * Return: a GPU address on success or 0 if the pointer was unrelated. + */ +u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size) +{ + ptrdiff_t offset = ptr - cache->sam->cpu_ptr; + + if (offset < 0 || offset + size > cache->sam->base.size) + return 0; + + return cache->sam->gpu_addr + offset; +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_buf_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_buf.h b/drivers/gpu/drm/xe/xe_guc_buf.h new file mode 100644 index 0000000000000..0d67604d96bdd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_buf.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_BUF_H_ +#define _XE_GUC_BUF_H_ + +#include +#include + +#include "xe_guc_buf_types.h" + +int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache); +u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache); +struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords); +struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache, + const void *data, size_t size); +void xe_guc_buf_release(const struct xe_guc_buf buf); + +/** + * xe_guc_buf_is_valid() - Check if a buffer reference is valid. + * @buf: the &xe_guc_buf reference to check + * + * Return: true if @ref represents a valid sub-allication. + */ +static inline bool xe_guc_buf_is_valid(const struct xe_guc_buf buf) +{ + return !IS_ERR_OR_NULL(buf.sa); +} + +void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf); +u64 xe_guc_buf_flush(const struct xe_guc_buf buf); +u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf); +u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size); + +DEFINE_CLASS(xe_guc_buf, struct xe_guc_buf, + xe_guc_buf_release(_T), + xe_guc_buf_reserve(cache, num), + struct xe_guc_buf_cache *cache, u32 num); + +DEFINE_CLASS(xe_guc_buf_from_data, struct xe_guc_buf, + xe_guc_buf_release(_T), + xe_guc_buf_from_data(cache, data, size), + struct xe_guc_buf_cache *cache, const void *data, size_t size); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_buf_types.h b/drivers/gpu/drm/xe/xe_guc_buf_types.h new file mode 100644 index 0000000000000..9e123d71c064a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_buf_types.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_BUF_TYPES_H_ +#define _XE_GUC_BUF_TYPES_H_ + +struct drm_suballoc; +struct xe_sa_manager; + +/** + * struct xe_guc_buf_cache - GuC Data Buffer Cache. + */ +struct xe_guc_buf_cache { + /* private: internal sub-allocation manager */ + struct xe_sa_manager *sam; +}; + +/** + * struct xe_guc_buf - GuC Data Buffer Reference. + */ +struct xe_guc_buf { + /* private: internal sub-allocation reference */ + struct drm_suballoc *sa; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 995b306aced77..c569ff456e741 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -13,6 +13,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_log.h" +#include "xe_guc_pc.h" #include "xe_macros.h" #include "xe_pm.h" @@ -47,6 +48,18 @@ static int guc_log(struct seq_file *m, void *data) return 0; } +static int guc_log_dmesg(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + + xe_pm_runtime_get(xe); + xe_guc_log_print_dmesg(&guc->log); + xe_pm_runtime_put(xe); + + return 0; +} + static int guc_ctb(struct seq_file *m, void *data) { struct xe_guc *guc = node_to_guc(m->private); @@ -60,10 +73,25 @@ static int guc_ctb(struct seq_file *m, void *data) return 0; } +static int guc_pc(struct seq_file *m, void *data) +{ + struct xe_guc *guc = node_to_guc(m->private); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_seq_file_printer(m); + + xe_pm_runtime_get(xe); + xe_guc_pc_print(&guc->pc, &p); + xe_pm_runtime_put(xe); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"guc_info", guc_info, 0}, {"guc_log", guc_log, 0}, + {"guc_log_dmesg", guc_log_dmesg, 0}, {"guc_ctb", guc_ctb, 0}, + {"guc_pc", guc_pc, 0}, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 0ca3056d8bd3f..80514a446ba28 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -149,16 +149,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, size_t remain; int i; - if (!log->bo) { - xe_gt_err(gt, "GuC log buffer not allocated\n"); + if (!log->bo) return NULL; - } snapshot = xe_guc_log_snapshot_alloc(log, atomic); - if (!snapshot) { - xe_gt_err(gt, "GuC log snapshot not allocated\n"); + if (!snapshot) return NULL; - } remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index df7f130fb663f..02409eedb9143 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "abi/guc_actions_slpc_abi.h" @@ -362,16 +363,17 @@ static void tgl_update_rpa_value(struct xe_guc_pc *pc) u32 reg; /* - * For PVC we still need to use fused RP1 as the approximation for RPe - * For other platforms than PVC we get the resolved RPe directly from + * For PVC we still need to use fused RP0 as the approximation for RPa + * For other platforms than PVC we get the resolved RPa directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) { reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - else + pc->rpa_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } else { reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - - pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } } static void tgl_update_rpe_value(struct xe_guc_pc *pc) @@ -385,12 +387,13 @@ static void tgl_update_rpe_value(struct xe_guc_pc *pc) * For other platforms than PVC we get the resolved RPe directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) { reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - else + pc->rpe_freq = REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } else { reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - - pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + } } static void pc_update_rp_values(struct xe_guc_pc *pc) @@ -1131,3 +1134,61 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); } + +static const char *pc_get_state_string(struct xe_guc_pc *pc) +{ + switch (slpc_shared_data_read(pc, header.global_state)) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + return "not running"; + case SLPC_GLOBAL_STATE_INITIALIZING: + return "initializing"; + case SLPC_GLOBAL_STATE_RESETTING: + return "resetting"; + case SLPC_GLOBAL_STATE_RUNNING: + return "running"; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + return "shutting down"; + case SLPC_GLOBAL_STATE_ERROR: + return "error"; + default: + return "unknown"; + } +} + +/** + * xe_guc_pc_print - Print GuC's Power Conservation information for debug + * @pc: Xe_GuC_PC instance + * @p: drm_printer + */ +void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p) +{ + drm_printf(p, "SLPC Shared Data Header:\n"); + drm_printf(p, "\tSize: %x\n", slpc_shared_data_read(pc, header.size)); + drm_printf(p, "\tGlobal State: %s\n", pc_get_state_string(pc)); + + if (pc_action_query_task_state(pc)) + return; + + drm_printf(p, "\nSLPC Tasks Status:\n"); + drm_printf(p, "\tGTPERF enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tDCC enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_DCC_TASK_ENABLED)); + drm_printf(p, "\tDCC in use: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_IN_DCC)); + drm_printf(p, "\tBalancer enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_ENABLED)); + drm_printf(p, "\tIBC enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_IBC_TASK_ENABLED)); + drm_printf(p, "\tBalancer IA LMT enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_IA_LMT_ENABLED)); + drm_printf(p, "\tBalancer IA LMT active: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_IA_LMT_ACTIVE)); +} diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 619f59cd633c5..39102b79602fd 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -10,6 +10,7 @@ struct xe_guc_pc; enum slpc_gucrc_mode; +struct drm_printer; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); @@ -17,6 +18,7 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); +void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index 8f62de026724c..e5dc94f3e6181 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -225,7 +225,7 @@ __relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u * with CTB lock held which is marked as used in the reclaim path. * Btw, that's one of the reason why we use mempool here! */ - txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL); + txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_NOWAIT); if (!txn) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 83a41ebcdc91d..573aa6308380a 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -11,6 +11,7 @@ #include "regs/xe_reg_defs.h" #include "xe_guc_ads_types.h" +#include "xe_guc_buf_types.h" #include "xe_guc_ct_types.h" #include "xe_guc_fwif.h" #include "xe_guc_log_types.h" @@ -58,6 +59,8 @@ struct xe_guc { struct xe_guc_ads ads; /** @ct: GuC ct */ struct xe_guc_ct ct; + /** @buf: GuC Buffer Cache manager */ + struct xe_guc_buf_cache buf; /** @capture: the error-state-capture module's data and objects */ struct xe_guc_state_capture *capture; /** @pc: GuC Power Conservation */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index d765bfd3636b4..06dc78d3a8123 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -12,6 +12,7 @@ #include "xe_drv.h" #include "xe_heci_gsc.h" #include "xe_platform_types.h" +#include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC @@ -200,7 +201,7 @@ void xe_heci_gsc_init(struct xe_device *xe) return; } - if (!def->use_polling) { + if (!def->use_polling && !xe_survivability_mode_enabled(xe)) { ret = heci_gsc_irq_setup(xe); if (ret) goto fail; diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index fde56dad3ab7a..48d80ffdf7bb9 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "regs/xe_gt_regs.h" @@ -20,6 +21,7 @@ #include "xe_pm.h" enum xe_hwmon_reg { + REG_TEMP, REG_PKG_RAPL_LIMIT, REG_PKG_POWER_SKU, REG_PKG_POWER_SKU_UNIT, @@ -36,6 +38,7 @@ enum xe_hwmon_reg_operation { enum xe_hwmon_channel { CHANNEL_CARD, CHANNEL_PKG, + CHANNEL_VRAM, CHANNEL_MAX, }; @@ -84,6 +87,19 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg struct xe_device *xe = hwmon->xe; switch (hwmon_reg) { + case REG_TEMP: + if (xe->info.platform == XE_BATTLEMAGE) { + if (channel == CHANNEL_PKG) + return BMG_PACKAGE_TEMPERATURE; + else if (channel == CHANNEL_VRAM) + return BMG_VRAM_TEMPERATURE; + } else if (xe->info.platform == XE_DG2) { + if (channel == CHANNEL_PKG) + return PCU_CR_PACKAGE_TEMPERATURE; + else if (channel == CHANNEL_VRAM) + return BMG_VRAM_TEMPERATURE; + } + break; case REG_PKG_RAPL_LIMIT: if (xe->info.platform == XE_BATTLEMAGE) { if (channel == CHANNEL_PKG) @@ -431,6 +447,8 @@ static const struct attribute_group *hwmon_groups[] = { }; static const struct hwmon_channel_info * const hwmon_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), @@ -506,6 +524,36 @@ static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *valu *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); } +static umode_t +xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) +{ + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_label: + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP, channel)) ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); + u64 reg_val; + + switch (attr) { + case hwmon_temp_input: + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_TEMP, channel)); + + /* HW register value is in degrees Celsius, convert to millidegrees. */ + *val = REG_FIELD_GET(TEMP_MASK, reg_val) * MILLIDEGREE_PER_DEGREE; + return 0; + default: + return -EOPNOTSUPP; + } +} + static umode_t xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { @@ -667,6 +715,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, xe_pm_runtime_get(hwmon->xe); switch (type) { + case hwmon_temp: + ret = xe_hwmon_temp_is_visible(hwmon, attr, channel); + break; case hwmon_power: ret = xe_hwmon_power_is_visible(hwmon, attr, channel); break; @@ -699,6 +750,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, xe_pm_runtime_get(hwmon->xe); switch (type) { + case hwmon_temp: + ret = xe_hwmon_temp_read(hwmon, attr, channel, val); + break; case hwmon_power: ret = xe_hwmon_power_read(hwmon, attr, channel, val); break; @@ -752,6 +806,12 @@ static int xe_hwmon_read_label(struct device *dev, u32 attr, int channel, const char **str) { switch (type) { + case hwmon_temp: + if (channel == CHANNEL_PKG) + *str = "pkg"; + else if (channel == CHANNEL_VRAM) + *str = "vram"; + return 0; case hwmon_power: case hwmon_energy: case hwmon_curr: @@ -779,10 +839,9 @@ static const struct hwmon_chip_info hwmon_chip_info = { }; static void -xe_hwmon_get_preregistration_info(struct xe_device *xe) +xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { - struct xe_mmio *mmio = xe_root_tile_mmio(xe); - struct xe_hwmon *hwmon = xe->hwmon; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); long energy; u64 val_sku_unit = 0; int channel; @@ -816,33 +875,34 @@ static void xe_hwmon_mutex_destroy(void *arg) mutex_destroy(&hwmon->hwmon_lock); } -void xe_hwmon_register(struct xe_device *xe) +int xe_hwmon_register(struct xe_device *xe) { struct device *dev = xe->drm.dev; struct xe_hwmon *hwmon; + int ret; /* hwmon is available only for dGfx */ if (!IS_DGFX(xe)) - return; + return 0; /* hwmon is not available on VFs */ if (IS_SRIOV_VF(xe)) - return; + return 0; hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); if (!hwmon) - return; - - xe->hwmon = hwmon; + return -ENOMEM; mutex_init(&hwmon->hwmon_lock); - if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) - return; + ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon); + if (ret) + return ret; /* There's only one instance of hwmon per device */ hwmon->xe = xe; + xe->hwmon = hwmon; - xe_hwmon_get_preregistration_info(xe); + xe_hwmon_get_preregistration_info(hwmon); drm_dbg(&xe->drm, "Register xe hwmon interface\n"); @@ -850,11 +910,12 @@ void xe_hwmon_register(struct xe_device *xe) hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, &hwmon_chip_info, hwmon_groups); - if (IS_ERR(hwmon->hwmon_dev)) { - drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); + drm_err(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); xe->hwmon = NULL; - return; + return PTR_ERR(hwmon->hwmon_dev); } + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h index c42a1de2cd7a2..d02c1bfe8c0a0 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.h +++ b/drivers/gpu/drm/xe/xe_hwmon.h @@ -11,9 +11,9 @@ struct xe_device; #if IS_REACHABLE(CONFIG_HWMON) -void xe_hwmon_register(struct xe_device *xe); +int xe_hwmon_register(struct xe_device *xe); #else -static inline void xe_hwmon_register(struct xe_device *xe) { }; +static inline int xe_hwmon_register(struct xe_device *xe) { return 0; }; #endif #endif /* _XE_HWMON_H_ */ diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 08552ee3fb94b..5362d3174b060 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -20,6 +20,7 @@ #include "xe_hw_engine.h" #include "xe_memirq.h" #include "xe_mmio.h" +#include "xe_pxp.h" #include "xe_sriov.h" /* @@ -208,6 +209,15 @@ void xe_irq_enable_hwe(struct xe_gt *gt) } if (heci_mask) xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16)); + + if (xe_pxp_is_supported(xe)) { + u32 kcr_mask = KCR_PXP_STATE_TERMINATED_INTERRUPT | + KCR_APP_TERMINATED_PER_FW_REQ_INTERRUPT | + KCR_PXP_STATE_RESET_COMPLETE_INTERRUPT; + + xe_mmio_write32(mmio, CRYPTO_RSVD_INTR_ENABLE, kcr_mask << 16); + xe_mmio_write32(mmio, CRYPTO_RSVD_INTR_MASK, ~(kcr_mask << 16)); + } } } @@ -330,9 +340,15 @@ static void gt_irq_handler(struct xe_tile *tile, } if (class == XE_ENGINE_CLASS_OTHER) { - /* HECI GSCFI interrupts come from outside of GT */ + /* + * HECI GSCFI interrupts come from outside of GT. + * KCR irqs come from inside GT but are handled + * by the global PXP subsystem. + */ if (xe->info.has_heci_gscfi && instance == OTHER_GSC_INSTANCE) xe_heci_gsc_irq_handler(xe, intr_vec); + else if (instance == OTHER_KCR_INSTANCE) + xe_pxp_irq_handler(xe, intr_vec); else gt_other_irq_handler(engine_gt, instance, intr_vec); } @@ -510,6 +526,8 @@ static void gt_irq_reset(struct xe_tile *tile) xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0); + xe_mmio_write32(mmio, CRYPTO_RSVD_INTR_ENABLE, 0); + xe_mmio_write32(mmio, CRYPTO_RSVD_INTR_MASK, ~0); } xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index a60ceae4c6dd2..89393dcb53d9d 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -164,7 +164,7 @@ int xe_lmtt_init(struct xe_lmtt *lmtt) lmtt_assert(lmtt, IS_SRIOV_PF(xe)); lmtt_assert(lmtt, !lmtt->ops); - if (!IS_DGFX(xe)) + if (!xe_device_has_lmtt(xe)) return 0; if (xe_has_multi_level_lmtt(xe)) @@ -486,7 +486,7 @@ u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size) u64 pt_size; lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); - lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, xe_device_has_lmtt(lmtt_to_xe(lmtt))); lmtt_assert(lmtt, lmtt->ops); pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bbb9ffbf63672..df3ceddede070 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -883,7 +883,8 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size, u16 msix_vec) + struct xe_vm *vm, u32 ring_size, u16 msix_vec, + u32 init_flags) { struct xe_gt *gt = hwe->gt; struct xe_tile *tile = gt_to_tile(gt); @@ -979,6 +980,16 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); } + if (init_flags & XE_LRC_CREATE_RUNALONE) + xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, + xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | + _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE)); + + if (init_flags & XE_LRC_CREATE_PXP) + xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL, + xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | + _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); if (xe->info.has_asid && vm) @@ -1021,6 +1032,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * @vm: The VM (address space) * @ring_size: LRC ring size * @msix_vec: MSI-X interrupt vector (for platforms that support it) + * @flags: LRC initialization flags * * Allocate and initialize the Logical Ring Context (LRC). * @@ -1028,7 +1040,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * upon failure. */ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size, u16 msix_vec) + u32 ring_size, u16 msix_vec, u32 flags) { struct xe_lrc *lrc; int err; @@ -1037,7 +1049,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, if (!lrc) return ERR_PTR(-ENOMEM); - err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec); + err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags); if (err) { kfree(lrc); return ERR_PTR(err); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 4206e6a8b50a0..0b40f349ab95d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -39,10 +39,13 @@ struct xe_lrc_snapshot { u32 ctx_job_timestamp; }; -#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) +#define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4) +#define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4) +#define XE_LRC_CREATE_RUNALONE 0x1 +#define XE_LRC_CREATE_PXP 0x2 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size, u16 msix_vec); + u32 ring_size, u16 msix_vec, u32 flags); void xe_lrc_destroy(struct kref *ref); /** diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index a48f239cad1c5..70a36e7775466 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -55,12 +55,11 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; - void __iomem *regs; u8 id; /* * Nothing to be done as tile 0 has already been setup earlier with the - * entire BAR mapped - see xe_mmio_init() + * entire BAR mapped - see xe_mmio_probe_early() */ if (xe->info.tile_count == 1) return; @@ -74,7 +73,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* * Although the per-tile mmio regs are not yet initialized, this * is fine as it's going to the root tile's mmio, that's - * guaranteed to be initialized earlier in xe_mmio_init() + * guaranteed to be initialized earlier in xe_mmio_probe_early() */ mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; @@ -94,59 +93,15 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) } } - regs = xe->mmio.regs; - for_each_tile(tile, xe, id) { - tile->mmio.regs_size = SZ_4M; - tile->mmio.regs = regs; - tile->mmio.tile = tile; - regs += tile_mmio_size; - } -} - -/* - * On top of all the multi-tile MMIO space there can be a platform-dependent - * extension for each tile, resulting in a layout like below: - * - * .----------------------. <- ext_base + tile_count * tile_mmio_ext_size - * | .... | - * |----------------------| <- ext_base + 2 * tile_mmio_ext_size - * | tile1->mmio_ext.regs | - * |----------------------| <- ext_base + 1 * tile_mmio_ext_size - * | tile0->mmio_ext.regs | - * |======================| <- ext_base = tile_count * tile_mmio_size - * | | - * | mmio.regs | - * | | - * '----------------------' <- 0MB - * - * Set up the tile[]->mmio_ext pointers/sizes. - */ -static void mmio_extension_setup(struct xe_device *xe, size_t tile_mmio_size, - size_t tile_mmio_ext_size) -{ - struct xe_tile *tile; - void __iomem *regs; - u8 id; - - if (!xe->info.has_mmio_ext) - return; - - regs = xe->mmio.regs + tile_mmio_size * xe->info.tile_count; - for_each_tile(tile, xe, id) { - tile->mmio_ext.regs_size = tile_mmio_ext_size; - tile->mmio_ext.regs = regs; - tile->mmio_ext.tile = tile; - regs += tile_mmio_ext_size; - } + for_each_remote_tile(tile, xe, id) + xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M); } int xe_mmio_probe_tiles(struct xe_device *xe) { size_t tile_mmio_size = SZ_16M; - size_t tile_mmio_ext_size = xe->info.tile_mmio_ext_size; mmio_multi_tile_setup(xe, tile_mmio_size); - mmio_extension_setup(xe, tile_mmio_size, tile_mmio_ext_size); return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe); } @@ -161,7 +116,7 @@ static void mmio_fini(void *arg) root_tile->mmio.regs = NULL; } -int xe_mmio_init(struct xe_device *xe) +int xe_mmio_probe_early(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -179,13 +134,29 @@ int xe_mmio_init(struct xe_device *xe) } /* Setup first tile; other tiles (if present) will be setup later. */ - root_tile->mmio.regs_size = SZ_4M; - root_tile->mmio.regs = xe->mmio.regs; - root_tile->mmio.tile = root_tile; + xe_mmio_init(&root_tile->mmio, root_tile, xe->mmio.regs, SZ_4M); return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +/** + * xe_mmio_init() - Initialize an MMIO instance + * @mmio: Pointer to the MMIO instance to initialize + * @tile: The tile to which the MMIO region belongs + * @ptr: Pointer to the start of the MMIO region + * @size: The size of the MMIO region in bytes + * + * This is a convenience function for minimal initialization of struct xe_mmio. + */ +void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr, u32 size) +{ + xe_tile_assert(tile, size <= XE_REG_ADDR_MAX); + + mmio->regs = ptr; + mmio->regs_size = size; + mmio->tile = tile; +} + static void mmio_flush_pending_writes(struct xe_mmio *mmio) { #define DUMMY_REG_OFFSET 0x130030 diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 8a46f4006a84f..c151ba569003f 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -11,9 +11,11 @@ struct xe_device; struct xe_reg; -int xe_mmio_init(struct xe_device *xe); +int xe_mmio_probe_early(struct xe_device *xe); int xe_mmio_probe_tiles(struct xe_device *xe); +void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr, u32 size); + u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg); u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg); void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 07b27114be9af..7185a2cdf6e38 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -77,10 +77,6 @@ struct init_funcs { void (*exit)(void); }; -static void xe_dummy_exit(void) -{ -} - static const struct init_funcs init_funcs[] = { { .init = xe_check_nomodeset, @@ -103,7 +99,6 @@ static const struct init_funcs init_funcs[] = { }, { .init = xe_pm_module_init, - .exit = xe_dummy_exit, }, }; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fa873f3d0a9d1..2c5a24a13e87a 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -548,6 +548,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, mutex_unlock(&stream->stream_lock); } while (!offset && !ret); } else { + xe_oa_buffer_check_unlocked(stream); mutex_lock(&stream->stream_lock); ret = __xe_oa_read(stream, buf, count, &offset); mutex_unlock(&stream->stream_lock); @@ -2423,36 +2424,36 @@ int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file return ret; } +static void xe_oa_unregister(void *arg) +{ + struct xe_oa *oa = arg; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + /** * xe_oa_register - Xe OA registration * @xe: @xe_device * * Exposes the metrics sysfs directory upon completion of module initialization */ -void xe_oa_register(struct xe_device *xe) +int xe_oa_register(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; if (!oa->xe) - return; + return 0; oa->metrics_kobj = kobject_create_and_add("metrics", &xe->drm.primary->kdev->kobj); -} - -/** - * xe_oa_unregister - Xe OA de-registration - * @xe: @xe_device - */ -void xe_oa_unregister(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - if (!oa->metrics_kobj) - return; + return -ENOMEM; - kobject_put(oa->metrics_kobj); - oa->metrics_kobj = NULL; + return devm_add_action_or_reset(xe->drm.dev, xe_oa_unregister, oa); } static u32 num_oa_units_per_gt(struct xe_gt *gt) @@ -2641,6 +2642,27 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) } } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + + return 0; +} + +static void xe_oa_fini(void *arg) +{ + struct xe_device *xe = arg; + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + + oa->xe = NULL; +} + /** * xe_oa_init - OA initialization during device probe * @xe: @xe_device @@ -2672,31 +2694,10 @@ int xe_oa_init(struct xe_device *xe) } xe_oa_init_supported_formats(oa); - return 0; -exit: - oa->xe = NULL; - return ret; -} -static int destroy_config(int id, void *p, void *data) -{ - xe_oa_config_put(p); - return 0; -} - -/** - * xe_oa_fini - OA de-initialization during device remove - * @xe: @xe_device - */ -void xe_oa_fini(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - idr_for_each(&oa->metrics_idr, destroy_config, oa); - idr_destroy(&oa->metrics_idr); + return devm_add_action_or_reset(xe->drm.dev, xe_oa_fini, xe); +exit: oa->xe = NULL; + return ret; } diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 87a38820c317d..e510826f9efc6 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -15,9 +15,7 @@ struct xe_gt; struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); -void xe_oa_fini(struct xe_device *xe); -void xe_oa_register(struct xe_device *xe); -void xe_oa_unregister(struct xe_device *xe); +int xe_oa_register(struct xe_device *xe); int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 39be74848e447..f8417f4d8ce6d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -30,6 +30,7 @@ #include "xe_pm.h" #include "xe_sriov.h" #include "xe_step.h" +#include "xe_survivability_mode.h" #include "xe_tile.h" enum toggle_d3cold { @@ -54,6 +55,9 @@ struct xe_device_desc { enum xe_platform platform; + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 require_force_probe:1; u8 is_dgfx:1; @@ -61,7 +65,7 @@ struct xe_device_desc { u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; - u8 has_mmio_ext:1; + u8 has_pxp:1; u8 has_sriov:1; u8 skip_guc_pc:1; u8 skip_mtcfg:1; @@ -84,7 +88,6 @@ static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), - .dma_mask_size = 39, .va_bits = 48, .vm_max_level = 3, }; @@ -96,14 +99,12 @@ static const struct xe_graphics_desc graphics_xelpp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), - .dma_mask_size = 39, .va_bits = 48, .vm_max_level = 3, }; #define XE_HP_FEATURES \ .has_range_tlb_invalidation = true, \ - .dma_mask_size = 46, \ .va_bits = 48, \ .vm_max_level = 3 @@ -138,8 +139,6 @@ static const struct xe_graphics_desc graphics_xehpc = { BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), XE_HP_FEATURES, - .dma_mask_size = 52, - .max_remote_tiles = 1, .va_bits = 57, .vm_max_level = 4, .vram_flags = XE_VRAM_FLAGS_NEED64K, @@ -159,7 +158,6 @@ static const struct xe_graphics_desc graphics_xelpg = { }; #define XE2_GFX_FEATURES \ - .dma_mask_size = 46, \ .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ @@ -219,6 +217,7 @@ static const struct xe_device_desc tgl_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(TIGERLAKE), + .dma_mask_size = 39, .has_display = true, .has_llc = true, .require_force_probe = true, @@ -228,6 +227,7 @@ static const struct xe_device_desc rkl_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(ROCKETLAKE), + .dma_mask_size = 39, .has_display = true, .has_llc = true, .require_force_probe = true, @@ -239,6 +239,7 @@ static const struct xe_device_desc adl_s_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(ALDERLAKE_S), + .dma_mask_size = 39, .has_display = true, .has_llc = true, .require_force_probe = true, @@ -254,6 +255,7 @@ static const struct xe_device_desc adl_p_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(ALDERLAKE_P), + .dma_mask_size = 39, .has_display = true, .has_llc = true, .require_force_probe = true, @@ -267,6 +269,7 @@ static const struct xe_device_desc adl_n_desc = { .graphics = &graphics_xelp, .media = &media_xem, PLATFORM(ALDERLAKE_N), + .dma_mask_size = 39, .has_display = true, .has_llc = true, .require_force_probe = true, @@ -280,6 +283,7 @@ static const struct xe_device_desc dg1_desc = { .media = &media_xem, DGFX_FEATURES, PLATFORM(DG1), + .dma_mask_size = 39, .has_display = true, .has_heci_gscfi = 1, .require_force_probe = true, @@ -303,6 +307,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; static const struct xe_device_desc ats_m_desc = { .graphics = &graphics_xehpg, .media = &media_xehpm, + .dma_mask_size = 46, .require_force_probe = true, DG2_FEATURES, @@ -312,6 +317,7 @@ static const struct xe_device_desc ats_m_desc = { static const struct xe_device_desc dg2_desc = { .graphics = &graphics_xehpg, .media = &media_xehpm, + .dma_mask_size = 46, .require_force_probe = true, DG2_FEATURES, @@ -322,8 +328,10 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .graphics = &graphics_xehpc, DGFX_FEATURES, PLATFORM(PVC), + .dma_mask_size = 52, .has_display = false, .has_heci_gscfi = 1, + .max_remote_tiles = 1, .require_force_probe = true, }; @@ -331,24 +339,31 @@ static const struct xe_device_desc mtl_desc = { /* .graphics and .media determined via GMD_ID */ .require_force_probe = true, PLATFORM(METEORLAKE), + .dma_mask_size = 46, .has_display = true, + .has_pxp = true, }; static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), + .dma_mask_size = 46, .has_display = true, + .has_pxp = true, }; static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), + .dma_mask_size = 46, .has_display = true, .has_heci_cscfi = 1, }; static const struct xe_device_desc ptl_desc = { PLATFORM(PANTHERLAKE), + .dma_mask_size = 46, .has_display = true, + .has_sriov = true, .require_force_probe = true, }; @@ -502,6 +517,7 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, gt->info.type = XE_GT_TYPE_MAIN; } + xe_gt_mmio_init(gt); xe_guc_comm_init_early(>->uc.guc); /* Don't bother with GMDID if failed to negotiate the GuC ABI */ @@ -613,11 +629,12 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.subplatform = subplatform_desc ? subplatform_desc->subplatform : XE_SUBPLATFORM_NONE; + xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; - xe->info.has_mmio_ext = desc->has_mmio_ext; + xe->info.has_pxp = desc->has_pxp; xe->info.has_sriov = desc->has_sriov; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; @@ -626,6 +643,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -677,9 +695,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.graphics_name = graphics_desc->name; xe->info.media_name = media_desc ? media_desc->name : "none"; - xe->info.tile_mmio_ext_size = graphics_desc->tile_mmio_ext_size; - xe->info.dma_mask_size = graphics_desc->dma_mask_size; xe->info.vram_flags = graphics_desc->vram_flags; xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; @@ -694,17 +710,6 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; - /* - * All platforms have at least one primary GT. Any platform with media - * version 13 or higher has an additional dedicated media GT. And - * depending on the graphics IP there may be additional "remote tiles." - * All of these together determine the overall GT count. - * - * FIXME: 'tile_count' here is misnamed since the rest of the driver - * treats it as the number of GTs rather than just the number of tiles. - */ - xe->info.tile_count = 1 + graphics_desc->max_remote_tiles; - for_each_remote_tile(tile, xe, id) { int err; @@ -713,6 +718,12 @@ static int xe_info_init(struct xe_device *xe, return err; } + /* + * All platforms have at least one primary GT. Any platform with media + * version 13 or higher has an additional dedicated media GT. And + * depending on the graphics IP there may be additional "remote tiles." + * All of these together determine the overall GT count. + */ for_each_tile(tile, xe, id) { gt = tile->primary_gt; gt->info.id = xe->info.gt_count++; @@ -763,6 +774,9 @@ static void xe_pci_remove(struct pci_dev *pdev) if (IS_SRIOV_PF(xe)) xe_pci_sriov_configure(pdev, 0); + if (xe_survivability_mode_enabled(xe)) + return xe_survivability_mode_remove(xe); + xe_device_remove(xe); xe_pm_runtime_fini(xe); pci_set_drvdata(pdev, NULL); @@ -835,8 +849,19 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe_early(xe); - if (err) + + /* + * In Boot Survivability mode, no drm card is exposed + * and driver is loaded with bare minimum to allow + * for firmware to be flashed through mei. Return + * success if survivability mode is enabled. + */ + if (err) { + if (xe_survivability_mode_enabled(xe)) + return 0; + return err; + } err = xe_info_init(xe, desc->graphics, desc->media); if (err) @@ -875,8 +900,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe(xe); - if (err) + if (err) { + xe_device_call_remove_actions(xe); return err; + } err = xe_pm_init(xe); if (err) @@ -923,9 +950,13 @@ static void d3cold_toggle(struct pci_dev *pdev, enum toggle_d3cold toggle) static int xe_pci_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); int err; - err = xe_pm_suspend(pdev_to_xe_device(pdev)); + if (xe_survivability_mode_enabled(xe)) + return -EBUSY; + + err = xe_pm_suspend(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 79b0f80376a4d..b964238449526 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -13,17 +13,12 @@ struct xe_graphics_desc { u8 ver; u8 rel; - u8 dma_mask_size; /* available DMA address bits */ u8 va_bits; u8 vm_max_level; u8 vram_flags; u64 hw_engine_mask; /* hardware engines provided by graphics IP */ - u32 tile_mmio_ext_size; /* size of MMIO extension space, per-tile */ - - u8 max_remote_tiles:2; - u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; u8 has_flat_ccs:1; diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index f153ce96f69a7..2bae9afdbd352 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -49,6 +49,20 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) +/* PCODE_SCRATCH0 */ +#define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) +#define OVERFLOW_REG_OFFSET REG_GENMASK(14, 12) +#define HISTORY_TRACKING REG_BIT(11) +#define OVERFLOW_SUPPORT REG_BIT(10) +#define AUXINFO_SUPPORT REG_BIT(9) +#define BOOT_STATUS REG_GENMASK(3, 1) +#define CRITICAL_FAILURE 4 +#define NON_CRITICAL_FAILURE 7 + +/* Auxiliary info bits */ +#define AUXINFO_HISTORY_OFFSET REG_GENMASK(31, 29) + struct pcode_err_decode { int errno; const char *str; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c9cc0c091dfdd..12200be7b43df 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -22,6 +22,7 @@ #include "xe_guc.h" #include "xe_irq.h" #include "xe_pcode.h" +#include "xe_pxp.h" #include "xe_trace.h" #include "xe_wa.h" @@ -90,7 +91,7 @@ static struct lockdep_map xe_pm_runtime_nod3cold_map = { */ bool xe_rpm_reclaim_safe(const struct xe_device *xe) { - return !xe->d3cold.capable && !xe->info.has_sriov; + return !xe->d3cold.capable; } static void xe_rpm_lockmap_acquire(const struct xe_device *xe) @@ -122,6 +123,10 @@ int xe_pm_suspend(struct xe_device *xe) drm_dbg(&xe->drm, "Suspending device\n"); trace_xe_pm_suspend(xe, __builtin_return_address(0)); + err = xe_pxp_pm_suspend(xe->pxp); + if (err) + goto err; + for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); @@ -130,14 +135,12 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err; + goto err_pxp; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); - if (err) { - xe_display_pm_resume(xe); - goto err; - } + if (err) + goto err_display; } xe_irq_suspend(xe); @@ -146,6 +149,11 @@ int xe_pm_suspend(struct xe_device *xe) drm_dbg(&xe->drm, "Device suspended\n"); return 0; + +err_display: + xe_display_pm_resume(xe); +err_pxp: + xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); return err; @@ -195,6 +203,8 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; + xe_pxp_pm_resume(xe->pxp); + drm_dbg(&xe->drm, "Device resumed\n"); return 0; err: @@ -389,6 +399,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe) */ xe_rpm_lockmap_acquire(xe); + err = xe_pxp_pm_suspend(xe->pxp); + if (err) + goto out; + /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify * also checks and deletes bo entry from user fault list. @@ -404,22 +418,27 @@ int xe_pm_runtime_suspend(struct xe_device *xe) if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) - goto out; + goto out_resume; } for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) - goto out; + goto out_resume; } xe_irq_suspend(xe); xe_display_pm_runtime_suspend_late(xe); + xe_rpm_lockmap_release(xe); + xe_pm_write_callback_task(xe, NULL); + return 0; + +out_resume: + xe_display_pm_runtime_resume(xe); + xe_pxp_pm_resume(xe->pxp); out: - if (err) - xe_display_pm_runtime_resume(xe); xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; @@ -472,6 +491,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } + xe_pxp_pm_resume(xe->pxp); + out: xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c new file mode 100644 index 0000000000000..3910a82328ee3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include +#include + +#include "xe_device.h" +#include "xe_gt_idle.h" +#include "xe_pm.h" +#include "xe_pmu.h" + +/** + * DOC: Xe PMU (Performance Monitoring Unit) + * + * Expose events/counters like GT-C6 residency and GT frequency to user land via + * the perf interface. Events are per device. The GT can be selected with an + * extra config sub-field (bits 60-63). + * + * All events are listed in sysfs: + * + * $ ls -ld /sys/bus/event_source/devices/xe_* + * $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/events/ + * $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/format/ + * + * The format directory has info regarding the configs that can be used. + * The standard perf tool can be used to grep for a certain event as well. + * Example: + * + * $ perf list | grep gt-c6 + * + * To sample a specific event for a GT at regular intervals: + * + * $ perf stat -e -I + */ + +#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60) +#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0) + +static unsigned int config_to_event_id(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_ID_MASK, config); +} + +static unsigned int config_to_gt_id(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_GT_MASK, config); +} + +#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01 + +static struct xe_gt *event_to_gt(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + u64 gt = config_to_gt_id(event->attr.config); + + return xe_device_get_gt(xe, gt); +} + +static bool event_supported(struct xe_pmu *pmu, unsigned int gt, + unsigned int id) +{ + if (gt >= XE_MAX_GT_PER_TILE) + return false; + + return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && + pmu->supported_events & BIT_ULL(id); +} + +static void xe_pmu_event_destroy(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + + drm_WARN_ON(&xe->drm, event->parent); + xe_pm_runtime_put(xe); + drm_dev_put(&xe->drm); +} + +static int xe_pmu_event_init(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + unsigned int id, gt; + + if (!pmu->registered) + return -ENODEV; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + gt = config_to_gt_id(event->attr.config); + id = config_to_event_id(event->attr.config); + if (!event_supported(pmu, gt, id)) + return -ENOENT; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (!event->parent) { + drm_dev_get(&xe->drm); + xe_pm_runtime_get(xe); + event->destroy = xe_pmu_event_destroy; + } + + return 0; +} + +static u64 __xe_pmu_event_read(struct perf_event *event) +{ + struct xe_gt *gt = event_to_gt(event); + + if (!gt) + return 0; + + switch (config_to_event_id(event->attr.config)) { + case XE_PMU_EVENT_GT_C6_RESIDENCY: + return xe_gt_idle_residency_msec(>->gtidle); + } + + return 0; +} + +static void xe_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + + prev = local64_read(&hwc->prev_count); + do { + new = __xe_pmu_event_read(event); + } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new)); + + local64_add(new - prev, &event->count); +} + +static void xe_pmu_event_read(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (!pmu->registered) { + event->hw.state = PERF_HES_STOPPED; + return; + } + + xe_pmu_event_update(event); +} + +static void xe_pmu_enable(struct perf_event *event) +{ + /* + * Store the current counter value so we can report the correct delta + * for all listeners. Even when the event was already enabled and has + * an existing non-zero value. + */ + local64_set(&event->hw.prev_count, __xe_pmu_event_read(event)); +} + +static void xe_pmu_event_start(struct perf_event *event, int flags) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (!pmu->registered) + return; + + xe_pmu_enable(event); + event->hw.state = 0; +} + +static void xe_pmu_event_stop(struct perf_event *event, int flags) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (pmu->registered) + if (flags & PERF_EF_UPDATE) + xe_pmu_event_update(event); + + event->hw.state = PERF_HES_STOPPED; +} + +static int xe_pmu_event_add(struct perf_event *event, int flags) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_pmu *pmu = &xe->pmu; + + if (!pmu->registered) + return -ENODEV; + + if (flags & PERF_EF_START) + xe_pmu_event_start(event, flags); + + return 0; +} + +static void xe_pmu_event_del(struct perf_event *event, int flags) +{ + xe_pmu_event_stop(event, PERF_EF_UPDATE); +} + +PMU_FORMAT_ATTR(gt, "config:60-63"); +PMU_FORMAT_ATTR(event, "config:0-11"); + +static struct attribute *pmu_format_attrs[] = { + &format_attr_event.attr, + &format_attr_gt.attr, + NULL, +}; + +static const struct attribute_group pmu_format_attr_group = { + .name = "format", + .attrs = pmu_format_attrs, +}; + +static ssize_t event_attr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(buf, "event=%#04llx\n", pmu_attr->id); +} + +#define XE_EVENT_ATTR(name_, v_, id_) \ + PMU_EVENT_ATTR(name_, pmu_event_ ## v_, id_, event_attr_show) + +#define XE_EVENT_ATTR_UNIT(name_, v_, unit_) \ + PMU_EVENT_ATTR_STRING(name_.unit, pmu_event_unit_ ## v_, unit_) + +#define XE_EVENT_ATTR_GROUP(v_, id_, ...) \ + static struct attribute *pmu_attr_ ##v_[] = { \ + __VA_ARGS__, \ + NULL \ + }; \ + static umode_t is_visible_##v_(struct kobject *kobj, \ + struct attribute *attr, int idx) \ + { \ + struct perf_pmu_events_attr *pmu_attr; \ + struct xe_pmu *pmu; \ + \ + pmu_attr = container_of(attr, typeof(*pmu_attr), attr.attr); \ + pmu = container_of(dev_get_drvdata(kobj_to_dev(kobj)), \ + typeof(*pmu), base); \ + \ + return event_supported(pmu, 0, id_) ? attr->mode : 0; \ + } \ + static const struct attribute_group pmu_group_ ##v_ = { \ + .name = "events", \ + .attrs = pmu_attr_ ## v_, \ + .is_visible = is_visible_ ## v_, \ + } + +#define XE_EVENT_ATTR_SIMPLE(name_, v_, id_, unit_) \ + XE_EVENT_ATTR(name_, v_, id_) \ + XE_EVENT_ATTR_UNIT(name_, v_, unit_) \ + XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr, \ + &pmu_event_unit_ ##v_.attr.attr) + +#define XE_EVENT_ATTR_NOUNIT(name_, v_, id_) \ + XE_EVENT_ATTR(name_, v_, id_) \ + XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr) + +XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms"); + +static struct attribute *pmu_empty_event_attrs[] = { + /* Empty - all events are added as groups with .attr_update() */ + NULL, +}; + +static const struct attribute_group pmu_events_attr_group = { + .name = "events", + .attrs = pmu_empty_event_attrs, +}; + +static const struct attribute_group *pmu_events_attr_update[] = { + &pmu_group_gt_c6_residency, + NULL, +}; + +static void set_supported_events(struct xe_pmu *pmu) +{ + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + + if (!xe->info.skip_guc_pc) + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); +} + +/** + * xe_pmu_unregister() - Remove/cleanup PMU registration + * @arg: Ptr to pmu + */ +static void xe_pmu_unregister(void *arg) +{ + struct xe_pmu *pmu = arg; + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + + if (!pmu->registered) + return; + + pmu->registered = false; + + perf_pmu_unregister(&pmu->base); + kfree(pmu->name); +} + +/** + * xe_pmu_register() - Define basic PMU properties for Xe and add event callbacks. + * @pmu: the PMU object + * + * Returns 0 on success and an appropriate error code otherwise + */ +int xe_pmu_register(struct xe_pmu *pmu) +{ + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + static const struct attribute_group *attr_groups[] = { + &pmu_format_attr_group, + &pmu_events_attr_group, + NULL + }; + int ret = -ENOMEM; + char *name; + + BUILD_BUG_ON(XE_MAX_GT_PER_TILE != XE_PMU_MAX_GT); + + if (IS_SRIOV_VF(xe)) + return 0; + + name = kasprintf(GFP_KERNEL, "xe_%s", + dev_name(xe->drm.dev)); + if (!name) + goto err; + + /* tools/perf reserves colons as special. */ + strreplace(name, ':', '_'); + + pmu->name = name; + pmu->base.attr_groups = attr_groups; + pmu->base.attr_update = pmu_events_attr_update; + pmu->base.scope = PERF_PMU_SCOPE_SYS_WIDE; + pmu->base.module = THIS_MODULE; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = xe_pmu_event_init; + pmu->base.add = xe_pmu_event_add; + pmu->base.del = xe_pmu_event_del; + pmu->base.start = xe_pmu_event_start; + pmu->base.stop = xe_pmu_event_stop; + pmu->base.read = xe_pmu_event_read; + + set_supported_events(pmu); + + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_name; + + pmu->registered = true; + + return devm_add_action_or_reset(xe->drm.dev, xe_pmu_unregister, pmu); + +err_name: + kfree(name); +err: + drm_err(&xe->drm, "Failed to register PMU (ret=%d)!\n", ret); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_pmu.h b/drivers/gpu/drm/xe/xe_pmu.h new file mode 100644 index 0000000000000..60c37126f87ec --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PMU_H_ +#define _XE_PMU_H_ + +#include "xe_pmu_types.h" + +#if IS_ENABLED(CONFIG_PERF_EVENTS) +int xe_pmu_register(struct xe_pmu *pmu); +#else +static inline int xe_pmu_register(struct xe_pmu *pmu) { return 0; } +#endif + +#endif + diff --git a/drivers/gpu/drm/xe/xe_pmu_types.h b/drivers/gpu/drm/xe/xe_pmu_types.h new file mode 100644 index 0000000000000..f5ba4d56622cb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pmu_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PMU_TYPES_H_ +#define _XE_PMU_TYPES_H_ + +#include +#include + +#define XE_PMU_MAX_GT 2 + +/** + * struct xe_pmu - PMU related data per Xe device + * + * Stores per device PMU info that includes event/perf attributes and sampling + * counters across all GTs for this device. + */ +struct xe_pmu { + /** + * @base: PMU base. + */ + struct pmu base; + /** + * @registered: PMU is registered and not in the unregistering process. + */ + bool registered; + /** + * @name: Name as registered with perf core. + */ + const char *name; + /** + * @supported_events: Bitmap of supported events, indexed by event id + */ + u64 supported_events; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c new file mode 100644 index 0000000000000..3cd3f83e86b00 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -0,0 +1,906 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2024 Intel Corporation. + */ + +#include "xe_pxp.h" + +#include +#include + +#include "xe_bo.h" +#include "xe_bo_types.h" +#include "xe_device_types.h" +#include "xe_exec_queue.h" +#include "xe_force_wake.h" +#include "xe_guc_submit.h" +#include "xe_gsc_proxy.h" +#include "xe_gt.h" +#include "xe_gt_types.h" +#include "xe_huc.h" +#include "xe_mmio.h" +#include "xe_pm.h" +#include "xe_pxp_submit.h" +#include "xe_pxp_types.h" +#include "xe_uc_fw.h" +#include "regs/xe_irq_regs.h" +#include "regs/xe_pxp_regs.h" + +/** + * DOC: PXP + * + * PXP (Protected Xe Path) allows execution and flip to display of protected + * (i.e. encrypted) objects. This feature is currently only supported in + * integrated parts. + */ + +#define ARB_SESSION DRM_XE_PXP_HWDRM_DEFAULT_SESSION /* shorter define */ + +/* + * A submission to GSC can take up to 250ms to complete, so use a 300ms + * timeout for activation where only one of those is involved. Termination + * additionally requires a submission to VCS and an interaction with KCR, so + * bump the timeout to 500ms for that. + */ +#define PXP_ACTIVATION_TIMEOUT_MS 300 +#define PXP_TERMINATION_TIMEOUT_MS 500 + +bool xe_pxp_is_supported(const struct xe_device *xe) +{ + return xe->info.has_pxp && IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY); +} + +bool xe_pxp_is_enabled(const struct xe_pxp *pxp) +{ + return pxp; +} + +static bool pxp_prerequisites_done(const struct xe_pxp *pxp) +{ + struct xe_gt *gt = pxp->gt; + unsigned int fw_ref; + bool ready; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + /* + * If force_wake fails we could falsely report the prerequisites as not + * done even if they are; the consequence of this would be that the + * callers won't go ahead with using PXP, but if force_wake doesn't work + * the GT is very likely in a bad state so not really a problem to abort + * PXP. Therefore, we can just log the force_wake error and not escalate + * it. + */ + XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)); + + /* PXP requires both HuC authentication via GSC and GSC proxy initialized */ + ready = xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GSC) && + xe_gsc_proxy_init_done(>->uc.gsc); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return ready; +} + +/** + * xe_pxp_get_readiness_status - check whether PXP is ready for userspace use + * @pxp: the xe_pxp pointer (can be NULL if PXP is disabled) + * + * Returns: 0 if PXP is not ready yet, 1 if it is ready, a negative errno value + * if PXP is not supported/enabled or if something went wrong in the + * initialization of the prerequisites. Note that the return values of this + * function follow the uapi (see drm_xe_query_pxp_status), so they can be used + * directly in the query ioctl. + */ +int xe_pxp_get_readiness_status(struct xe_pxp *pxp) +{ + int ret = 0; + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* if the GSC or HuC FW are in an error state, PXP will never work */ + if (xe_uc_fw_status_to_error(pxp->gt->uc.huc.fw.status) || + xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status)) + return -EIO; + + xe_pm_runtime_get(pxp->xe); + + /* PXP requires both HuC loaded and GSC proxy initialized */ + if (pxp_prerequisites_done(pxp)) + ret = 1; + + xe_pm_runtime_put(pxp->xe); + return ret; +} + +static bool pxp_session_is_in_play(struct xe_pxp *pxp, u32 id) +{ + struct xe_gt *gt = pxp->gt; + + return xe_mmio_read32(>->mmio, KCR_SIP) & BIT(id); +} + +static int pxp_wait_for_session_state(struct xe_pxp *pxp, u32 id, bool in_play) +{ + struct xe_gt *gt = pxp->gt; + u32 mask = BIT(id); + + return xe_mmio_wait32(>->mmio, KCR_SIP, mask, in_play ? mask : 0, + 250, NULL, false); +} + +static void pxp_invalidate_queues(struct xe_pxp *pxp); + +static void pxp_invalidate_state(struct xe_pxp *pxp) +{ + pxp_invalidate_queues(pxp); + + if (pxp->status == XE_PXP_ACTIVE) + pxp->key_instance++; +} + +static int pxp_terminate_hw(struct xe_pxp *pxp) +{ + struct xe_gt *gt = pxp->gt; + unsigned int fw_ref; + int ret = 0; + + drm_dbg(&pxp->xe->drm, "Terminating PXP\n"); + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { + ret = -EIO; + goto out; + } + + /* terminate the hw session */ + ret = xe_pxp_submit_session_termination(pxp, ARB_SESSION); + if (ret) + goto out; + + ret = pxp_wait_for_session_state(pxp, ARB_SESSION, false); + if (ret) + goto out; + + /* Trigger full HW cleanup */ + xe_mmio_write32(>->mmio, KCR_GLOBAL_TERMINATE, 1); + + /* now we can tell the GSC to clean up its own state */ + ret = xe_pxp_submit_session_invalidation(&pxp->gsc_res, ARB_SESSION); + +out: + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return ret; +} + +static void mark_termination_in_progress(struct xe_pxp *pxp) +{ + lockdep_assert_held(&pxp->mutex); + + reinit_completion(&pxp->termination); + pxp->status = XE_PXP_TERMINATION_IN_PROGRESS; +} + +static void pxp_terminate(struct xe_pxp *pxp) +{ + int ret = 0; + struct xe_device *xe = pxp->xe; + + if (!wait_for_completion_timeout(&pxp->activation, + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) + drm_err(&xe->drm, "failed to wait for PXP start before termination\n"); + + mutex_lock(&pxp->mutex); + + pxp_invalidate_state(pxp); + + /* + * we'll mark the status as needing termination on resume, so no need to + * emit a termination now. + */ + if (pxp->status == XE_PXP_SUSPENDED) { + mutex_unlock(&pxp->mutex); + return; + } + + /* + * If we have a termination already in progress, we need to wait for + * it to complete before queueing another one. Once the first + * termination is completed we'll set the state back to + * NEEDS_TERMINATION and leave it to the pxp start code to issue it. + */ + if (pxp->status == XE_PXP_TERMINATION_IN_PROGRESS) { + pxp->status = XE_PXP_NEEDS_ADDITIONAL_TERMINATION; + mutex_unlock(&pxp->mutex); + return; + } + + mark_termination_in_progress(pxp); + + mutex_unlock(&pxp->mutex); + + ret = pxp_terminate_hw(pxp); + if (ret) { + drm_err(&xe->drm, "PXP termination failed: %pe\n", ERR_PTR(ret)); + mutex_lock(&pxp->mutex); + pxp->status = XE_PXP_ERROR; + complete_all(&pxp->termination); + mutex_unlock(&pxp->mutex); + } +} + +static void pxp_terminate_complete(struct xe_pxp *pxp) +{ + /* + * We expect PXP to be in one of 3 states when we get here: + * - XE_PXP_TERMINATION_IN_PROGRESS: a single termination event was + * requested and it is now completing, so we're ready to start. + * - XE_PXP_NEEDS_ADDITIONAL_TERMINATION: a second termination was + * requested while the first one was still being processed. + * - XE_PXP_SUSPENDED: PXP is now suspended, so we defer everything to + * when we come back on resume. + */ + mutex_lock(&pxp->mutex); + + switch (pxp->status) { + case XE_PXP_TERMINATION_IN_PROGRESS: + pxp->status = XE_PXP_READY_TO_START; + break; + case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: + pxp->status = XE_PXP_NEEDS_TERMINATION; + break; + case XE_PXP_SUSPENDED: + /* Nothing to do */ + break; + default: + drm_err(&pxp->xe->drm, + "PXP termination complete while status was %u\n", + pxp->status); + } + + complete_all(&pxp->termination); + + mutex_unlock(&pxp->mutex); +} + +static void pxp_irq_work(struct work_struct *work) +{ + struct xe_pxp *pxp = container_of(work, typeof(*pxp), irq.work); + struct xe_device *xe = pxp->xe; + u32 events = 0; + + spin_lock_irq(&xe->irq.lock); + events = pxp->irq.events; + pxp->irq.events = 0; + spin_unlock_irq(&xe->irq.lock); + + if (!events) + return; + + /* + * If we're processing a termination irq while suspending then don't + * bother, we're going to re-init everything on resume anyway. + */ + if ((events & PXP_TERMINATION_REQUEST) && !xe_pm_runtime_get_if_active(xe)) + return; + + if (events & PXP_TERMINATION_REQUEST) { + events &= ~PXP_TERMINATION_COMPLETE; + pxp_terminate(pxp); + } + + if (events & PXP_TERMINATION_COMPLETE) + pxp_terminate_complete(pxp); + + if (events & PXP_TERMINATION_REQUEST) + xe_pm_runtime_put(xe); +} + +/** + * xe_pxp_irq_handler - Handles PXP interrupts. + * @xe: the xe_device structure + * @iir: interrupt vector + */ +void xe_pxp_irq_handler(struct xe_device *xe, u16 iir) +{ + struct xe_pxp *pxp = xe->pxp; + + if (!xe_pxp_is_enabled(pxp)) { + drm_err(&xe->drm, "PXP irq 0x%x received with PXP disabled!\n", iir); + return; + } + + lockdep_assert_held(&xe->irq.lock); + + if (unlikely(!iir)) + return; + + if (iir & (KCR_PXP_STATE_TERMINATED_INTERRUPT | + KCR_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) + pxp->irq.events |= PXP_TERMINATION_REQUEST; + + if (iir & KCR_PXP_STATE_RESET_COMPLETE_INTERRUPT) + pxp->irq.events |= PXP_TERMINATION_COMPLETE; + + if (pxp->irq.events) + queue_work(pxp->irq.wq, &pxp->irq.work); +} + +static int kcr_pxp_set_status(const struct xe_pxp *pxp, bool enable) +{ + u32 val = enable ? _MASKED_BIT_ENABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES) : + _MASKED_BIT_DISABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES); + unsigned int fw_ref; + + fw_ref = xe_force_wake_get(gt_to_fw(pxp->gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) + return -EIO; + + xe_mmio_write32(&pxp->gt->mmio, KCR_INIT, val); + xe_force_wake_put(gt_to_fw(pxp->gt), fw_ref); + + return 0; +} + +static int kcr_pxp_enable(const struct xe_pxp *pxp) +{ + return kcr_pxp_set_status(pxp, true); +} + +static int kcr_pxp_disable(const struct xe_pxp *pxp) +{ + return kcr_pxp_set_status(pxp, false); +} + +static void pxp_fini(void *arg) +{ + struct xe_pxp *pxp = arg; + + destroy_workqueue(pxp->irq.wq); + xe_pxp_destroy_execution_resources(pxp); + + /* no need to explicitly disable KCR since we're going to do an FLR */ +} + +/** + * xe_pxp_init - initialize PXP support + * @xe: the xe_device structure + * + * Initialize the HW state and allocate the objects required for PXP support. + * Note that some of the requirement for PXP support (GSC proxy init, HuC auth) + * are performed asynchronously as part of the GSC init. PXP can only be used + * after both this function and the async worker have completed. + * + * Returns 0 if PXP is not supported or if PXP initialization is successful, + * other errno value if there is an error during the init. + */ +int xe_pxp_init(struct xe_device *xe) +{ + struct xe_gt *gt = xe->tiles[0].media_gt; + struct xe_pxp *pxp; + int err; + + if (!xe_pxp_is_supported(xe)) + return 0; + + /* we only support PXP on single tile devices with a media GT */ + if (xe->info.tile_count > 1 || !gt) + return 0; + + /* The GSCCS is required for submissions to the GSC FW */ + if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0))) + return 0; + + /* PXP requires both GSC and HuC firmwares to be available */ + if (!xe_uc_fw_is_loadable(>->uc.gsc.fw) || + !xe_uc_fw_is_loadable(>->uc.huc.fw)) { + drm_info(&xe->drm, "skipping PXP init due to missing FW dependencies"); + return 0; + } + + pxp = drmm_kzalloc(&xe->drm, sizeof(struct xe_pxp), GFP_KERNEL); + if (!pxp) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&pxp->queues.list); + spin_lock_init(&pxp->queues.lock); + INIT_WORK(&pxp->irq.work, pxp_irq_work); + pxp->xe = xe; + pxp->gt = gt; + + pxp->key_instance = 1; + pxp->last_suspend_key_instance = 1; + + /* + * we'll use the completions to check if there is an action pending, + * so we start them as completed and we reinit it when an action is + * triggered. + */ + init_completion(&pxp->activation); + init_completion(&pxp->termination); + complete_all(&pxp->termination); + complete_all(&pxp->activation); + + mutex_init(&pxp->mutex); + + pxp->irq.wq = alloc_ordered_workqueue("pxp-wq", 0); + if (!pxp->irq.wq) { + err = -ENOMEM; + goto out_free; + } + + err = kcr_pxp_enable(pxp); + if (err) + goto out_wq; + + err = xe_pxp_allocate_execution_resources(pxp); + if (err) + goto out_kcr_disable; + + xe->pxp = pxp; + + return devm_add_action_or_reset(xe->drm.dev, pxp_fini, pxp); + +out_kcr_disable: + kcr_pxp_disable(pxp); +out_wq: + destroy_workqueue(pxp->irq.wq); +out_free: + drmm_kfree(&xe->drm, pxp); +out: + drm_err(&xe->drm, "PXP initialization failed: %pe\n", ERR_PTR(err)); + return err; +} + +static int __pxp_start_arb_session(struct xe_pxp *pxp) +{ + int ret; + unsigned int fw_ref; + + fw_ref = xe_force_wake_get(gt_to_fw(pxp->gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) + return -EIO; + + if (pxp_session_is_in_play(pxp, ARB_SESSION)) { + ret = -EEXIST; + goto out_force_wake; + } + + ret = xe_pxp_submit_session_init(&pxp->gsc_res, ARB_SESSION); + if (ret) { + drm_err(&pxp->xe->drm, "Failed to init PXP arb session: %pe\n", ERR_PTR(ret)); + goto out_force_wake; + } + + ret = pxp_wait_for_session_state(pxp, ARB_SESSION, true); + if (ret) { + drm_err(&pxp->xe->drm, "PXP ARB session failed to go in play%pe\n", ERR_PTR(ret)); + goto out_force_wake; + } + + drm_dbg(&pxp->xe->drm, "PXP ARB session is active\n"); + +out_force_wake: + xe_force_wake_put(gt_to_fw(pxp->gt), fw_ref); + return ret; +} + +/** + * xe_pxp_exec_queue_set_type - Mark a queue as using PXP + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to mark as using PXP + * @type: the type of PXP session this queue will use + * + * Returns 0 if the selected PXP type is supported, -ENODEV otherwise. + */ +int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 type) +{ + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* we only support HWDRM sessions right now */ + xe_assert(pxp->xe, type == DRM_XE_PXP_TYPE_HWDRM); + + q->pxp.type = type; + + return 0; +} + +static void __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + spin_lock_irq(&pxp->queues.lock); + list_add_tail(&q->pxp.link, &pxp->queues.list); + spin_unlock_irq(&pxp->queues.lock); +} + +/** + * xe_pxp_exec_queue_add - add a queue to the PXP list + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to add to the list + * + * If PXP is enabled and the prerequisites are done, start the PXP ARB + * session (if not already running) and add the queue to the PXP list. Note + * that the queue must have previously been marked as using PXP with + * xe_pxp_exec_queue_set_type. + * + * Returns 0 if the PXP ARB session is running and the queue is in the list, + * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done, + * other errno value if something goes wrong during the session start. + */ +int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + int ret = 0; + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* we only support HWDRM sessions right now */ + xe_assert(pxp->xe, q->pxp.type == DRM_XE_PXP_TYPE_HWDRM); + + /* + * Runtime suspend kills PXP, so we take a reference to prevent it from + * happening while we have active queues that use PXP + */ + xe_pm_runtime_get(pxp->xe); + + if (!pxp_prerequisites_done(pxp)) { + ret = -EBUSY; + goto out; + } + +wait_for_idle: + /* + * if there is an action in progress, wait for it. We need to wait + * outside the lock because the completion is done from within the lock. + * Note that the two action should never be pending at the same time. + */ + if (!wait_for_completion_timeout(&pxp->termination, + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) { + ret = -ETIMEDOUT; + goto out; + } + + if (!wait_for_completion_timeout(&pxp->activation, + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) { + ret = -ETIMEDOUT; + goto out; + } + + mutex_lock(&pxp->mutex); + + /* If PXP is not already active, turn it on */ + switch (pxp->status) { + case XE_PXP_ERROR: + ret = -EIO; + break; + case XE_PXP_ACTIVE: + __exec_queue_add(pxp, q); + mutex_unlock(&pxp->mutex); + goto out; + case XE_PXP_READY_TO_START: + pxp->status = XE_PXP_START_IN_PROGRESS; + reinit_completion(&pxp->activation); + break; + case XE_PXP_START_IN_PROGRESS: + /* If a start is in progress then the completion must not be done */ + XE_WARN_ON(completion_done(&pxp->activation)); + mutex_unlock(&pxp->mutex); + goto wait_for_idle; + case XE_PXP_NEEDS_TERMINATION: + mark_termination_in_progress(pxp); + break; + case XE_PXP_TERMINATION_IN_PROGRESS: + case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: + /* If a termination is in progress then the completion must not be done */ + XE_WARN_ON(completion_done(&pxp->termination)); + mutex_unlock(&pxp->mutex); + goto wait_for_idle; + case XE_PXP_SUSPENDED: + default: + drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u\n", pxp->status); + ret = -EIO; + break; + } + + mutex_unlock(&pxp->mutex); + + if (ret) + goto out; + + if (!completion_done(&pxp->termination)) { + ret = pxp_terminate_hw(pxp); + if (ret) { + drm_err(&pxp->xe->drm, "PXP termination failed before start\n"); + mutex_lock(&pxp->mutex); + pxp->status = XE_PXP_ERROR; + mutex_unlock(&pxp->mutex); + + goto out; + } + + goto wait_for_idle; + } + + /* All the cases except for start should have exited earlier */ + XE_WARN_ON(completion_done(&pxp->activation)); + ret = __pxp_start_arb_session(pxp); + + mutex_lock(&pxp->mutex); + + complete_all(&pxp->activation); + + /* + * Any other process should wait until the state goes away from + * XE_PXP_START_IN_PROGRESS, so if the state is not that something went + * wrong. Mark the status as needing termination and try again. + */ + if (pxp->status != XE_PXP_START_IN_PROGRESS) { + drm_err(&pxp->xe->drm, "unexpected state after PXP start: %u\n", pxp->status); + pxp->status = XE_PXP_NEEDS_TERMINATION; + mutex_unlock(&pxp->mutex); + goto wait_for_idle; + } + + /* If everything went ok, update the status and add the queue to the list */ + if (!ret) { + pxp->status = XE_PXP_ACTIVE; + __exec_queue_add(pxp, q); + } else { + pxp->status = XE_PXP_ERROR; + } + + mutex_unlock(&pxp->mutex); + +out: + /* + * in the successful case the PM ref is released from + * xe_pxp_exec_queue_remove + */ + if (ret) + xe_pm_runtime_put(pxp->xe); + + return ret; +} + +/** + * xe_pxp_exec_queue_remove - remove a queue from the PXP list + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to remove from the list + * + * If PXP is enabled and the exec_queue is in the list, the queue will be + * removed from the list and its PM reference will be released. It is safe to + * call this function multiple times for the same queue. + */ +void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + bool need_pm_put = false; + + if (!xe_pxp_is_enabled(pxp)) + return; + + spin_lock_irq(&pxp->queues.lock); + + if (!list_empty(&q->pxp.link)) { + list_del_init(&q->pxp.link); + need_pm_put = true; + } + + q->pxp.type = DRM_XE_PXP_TYPE_NONE; + + spin_unlock_irq(&pxp->queues.lock); + + if (need_pm_put) + xe_pm_runtime_put(pxp->xe); +} + +static void pxp_invalidate_queues(struct xe_pxp *pxp) +{ + struct xe_exec_queue *tmp, *q; + + spin_lock_irq(&pxp->queues.lock); + + /* + * Removing a queue from the PXP list requires a put of the RPM ref that + * the queue holds to keep the PXP session alive, which can't be done + * under spinlock. Since it is safe to kill a queue multiple times, we + * can leave the invalid queue in the list for now and postpone the + * removal and associated RPM put to when the queue is destroyed. + */ + list_for_each_entry(tmp, &pxp->queues.list, pxp.link) { + q = xe_exec_queue_get_unless_zero(tmp); + + if (!q) + continue; + + xe_exec_queue_kill(q); + xe_exec_queue_put(q); + } + + spin_unlock_irq(&pxp->queues.lock); +} + +/** + * xe_pxp_key_assign - mark a BO as using the current PXP key iteration + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @bo: the BO to mark + * + * Returns: -ENODEV if PXP is disabled, 0 otherwise. + */ +int xe_pxp_key_assign(struct xe_pxp *pxp, struct xe_bo *bo) +{ + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + xe_assert(pxp->xe, !bo->pxp_key_instance); + + /* + * Note that the PXP key handling is inherently racey, because the key + * can theoretically change at any time (although it's unlikely to do + * so without triggers), even right after we copy it. Taking a lock + * wouldn't help because the value might still change as soon as we + * release the lock. + * Userspace needs to handle the fact that their BOs can go invalid at + * any point. + */ + bo->pxp_key_instance = pxp->key_instance; + + return 0; +} + +/** + * xe_pxp_bo_key_check - check if the key used by a xe_bo is valid + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @bo: the BO we want to check + * + * Checks whether a BO was encrypted with the current key or an obsolete one. + * + * Returns: 0 if the key is valid, -ENODEV if PXP is disabled, -EINVAL if the + * BO is not using PXP, -ENOEXEC if the key is not valid. + */ +int xe_pxp_bo_key_check(struct xe_pxp *pxp, struct xe_bo *bo) +{ + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + if (!xe_bo_is_protected(bo)) + return -EINVAL; + + xe_assert(pxp->xe, bo->pxp_key_instance); + + /* + * Note that the PXP key handling is inherently racey, because the key + * can theoretically change at any time (although it's unlikely to do + * so without triggers), even right after we check it. Taking a lock + * wouldn't help because the value might still change as soon as we + * release the lock. + * We mitigate the risk by checking the key at multiple points (on each + * submission involving the BO and right before flipping it on the + * display), but there is still a very small chance that we could + * operate on an invalid BO for a single submission or a single frame + * flip. This is a compromise made to protect the encrypted data (which + * is what the key termination is for). + */ + if (bo->pxp_key_instance != pxp->key_instance) + return -ENOEXEC; + + return 0; +} + +/** + * xe_pxp_obj_key_check - check if the key used by a drm_gem_obj is valid + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @obj: the drm_gem_obj we want to check + * + * Checks whether a drm_gem_obj was encrypted with the current key or an + * obsolete one. + * + * Returns: 0 if the key is valid, -ENODEV if PXP is disabled, -EINVAL if the + * obj is not using PXP, -ENOEXEC if the key is not valid. + */ +int xe_pxp_obj_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj) +{ + return xe_pxp_bo_key_check(pxp, gem_to_xe_bo(obj)); +} + +/** + * xe_pxp_pm_suspend - prepare PXP for HW suspend + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * + * Makes sure all PXP actions have completed and invalidates all PXP queues + * and objects before we go into a suspend state. + * + * Returns: 0 if successful, a negative errno value otherwise. + */ +int xe_pxp_pm_suspend(struct xe_pxp *pxp) +{ + int ret = 0; + + if (!xe_pxp_is_enabled(pxp)) + return 0; + +wait_for_activation: + if (!wait_for_completion_timeout(&pxp->activation, + msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) + ret = -ETIMEDOUT; + + mutex_lock(&pxp->mutex); + + switch (pxp->status) { + case XE_PXP_ERROR: + case XE_PXP_READY_TO_START: + case XE_PXP_SUSPENDED: + case XE_PXP_TERMINATION_IN_PROGRESS: + case XE_PXP_NEEDS_ADDITIONAL_TERMINATION: + /* + * If PXP is not running there is nothing to cleanup. If there + * is a termination pending then no need to issue another one. + */ + break; + case XE_PXP_START_IN_PROGRESS: + mutex_unlock(&pxp->mutex); + goto wait_for_activation; + case XE_PXP_NEEDS_TERMINATION: + /* If PXP was never used we can skip the cleanup */ + if (pxp->key_instance == pxp->last_suspend_key_instance) + break; + fallthrough; + case XE_PXP_ACTIVE: + pxp_invalidate_state(pxp); + break; + default: + drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u", + pxp->status); + ret = -EIO; + goto out; + } + + /* + * We set this even if we were in error state, hoping the suspend clears + * the error. Worse case we fail again and go in error state again. + */ + pxp->status = XE_PXP_SUSPENDED; + + mutex_unlock(&pxp->mutex); + + /* + * if there is a termination in progress, wait for it. + * We need to wait outside the lock because the completion is done from + * within the lock + */ + if (!wait_for_completion_timeout(&pxp->termination, + msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) + ret = -ETIMEDOUT; + + pxp->last_suspend_key_instance = pxp->key_instance; + +out: + return ret; +} + +/** + * xe_pxp_pm_resume - re-init PXP after HW suspend + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + */ +void xe_pxp_pm_resume(struct xe_pxp *pxp) +{ + int err; + + if (!xe_pxp_is_enabled(pxp)) + return; + + err = kcr_pxp_enable(pxp); + + mutex_lock(&pxp->mutex); + + xe_assert(pxp->xe, pxp->status == XE_PXP_SUSPENDED); + + if (err) + pxp->status = XE_PXP_ERROR; + else + pxp->status = XE_PXP_NEEDS_TERMINATION; + + mutex_unlock(&pxp->mutex); +} diff --git a/drivers/gpu/drm/xe/xe_pxp.h b/drivers/gpu/drm/xe/xe_pxp.h new file mode 100644 index 0000000000000..546b156d63aa5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2024, Intel Corporation. All rights reserved. + */ + +#ifndef __XE_PXP_H__ +#define __XE_PXP_H__ + +#include + +struct drm_gem_object; +struct xe_bo; +struct xe_device; +struct xe_exec_queue; +struct xe_pxp; + +bool xe_pxp_is_supported(const struct xe_device *xe); +bool xe_pxp_is_enabled(const struct xe_pxp *pxp); +int xe_pxp_get_readiness_status(struct xe_pxp *pxp); + +int xe_pxp_init(struct xe_device *xe); +void xe_pxp_irq_handler(struct xe_device *xe, u16 iir); + +int xe_pxp_pm_suspend(struct xe_pxp *pxp); +void xe_pxp_pm_resume(struct xe_pxp *pxp); + +int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 type); +int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q); +void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q); + +int xe_pxp_key_assign(struct xe_pxp *pxp, struct xe_bo *bo); +int xe_pxp_bo_key_check(struct xe_pxp *pxp, struct xe_bo *bo); +int xe_pxp_obj_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj); + +#endif /* __XE_PXP_H__ */ diff --git a/drivers/gpu/drm/xe/xe_pxp_debugfs.c b/drivers/gpu/drm/xe/xe_pxp_debugfs.c new file mode 100644 index 0000000000000..ccfbacf08efc1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp_debugfs.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include "xe_pxp_debugfs.h" + +#include + +#include +#include +#include + +#include "xe_device.h" +#include "xe_pxp.h" +#include "xe_pxp_types.h" +#include "regs/xe_irq_regs.h" + +static struct xe_pxp *node_to_pxp(struct drm_info_node *node) +{ + return node->info_ent->data; +} + +static const char *pxp_status_to_str(struct xe_pxp *pxp) +{ + lockdep_assert_held(&pxp->mutex); + + switch (pxp->status) { + case XE_PXP_ERROR: + return "error"; + case XE_PXP_NEEDS_TERMINATION: + return "needs termination"; + case XE_PXP_TERMINATION_IN_PROGRESS: + return "termination in progress"; + case XE_PXP_READY_TO_START: + return "ready to start"; + case XE_PXP_ACTIVE: + return "active"; + case XE_PXP_SUSPENDED: + return "suspended"; + default: + return "unknown"; + } +}; + +static int pxp_info(struct seq_file *m, void *data) +{ + struct xe_pxp *pxp = node_to_pxp(m->private); + struct drm_printer p = drm_seq_file_printer(m); + const char *status; + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + mutex_lock(&pxp->mutex); + status = pxp_status_to_str(pxp); + + drm_printf(&p, "status: %s\n", status); + drm_printf(&p, "instance counter: %u\n", pxp->key_instance); + mutex_unlock(&pxp->mutex); + + return 0; +} + +static int pxp_terminate(struct seq_file *m, void *data) +{ + struct xe_pxp *pxp = node_to_pxp(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + if (!xe_pxp_is_enabled(pxp)) + return -ENODEV; + + /* simulate a termination interrupt */ + spin_lock_irq(&pxp->xe->irq.lock); + xe_pxp_irq_handler(pxp->xe, KCR_PXP_STATE_TERMINATED_INTERRUPT); + spin_unlock_irq(&pxp->xe->irq.lock); + + drm_printf(&p, "PXP termination queued\n"); + + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + {"info", pxp_info, 0}, + {"terminate", pxp_terminate, 0}, +}; + +void xe_pxp_debugfs_register(struct xe_pxp *pxp) +{ + struct drm_minor *minor; + struct drm_info_list *local; + struct dentry *root; + int i; + + if (!xe_pxp_is_enabled(pxp)) + return; + + minor = pxp->xe->drm.primary; + if (!minor->debugfs_root) + return; + +#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) + local = drmm_kmalloc(&pxp->xe->drm, DEBUGFS_SIZE, GFP_KERNEL); + if (!local) + return; + + memcpy(local, debugfs_list, DEBUGFS_SIZE); +#undef DEBUGFS_SIZE + + for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) + local[i].data = pxp; + + root = debugfs_create_dir("pxp", minor->debugfs_root); + if (IS_ERR(root)) + return; + + drm_debugfs_create_files(local, + ARRAY_SIZE(debugfs_list), + root, minor); +} diff --git a/drivers/gpu/drm/xe/xe_pxp_debugfs.h b/drivers/gpu/drm/xe/xe_pxp_debugfs.h new file mode 100644 index 0000000000000..988466aad50b3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __XE_PXP_DEBUGFS_H__ +#define __XE_PXP_DEBUGFS_H__ + +struct xe_pxp; + +void xe_pxp_debugfs_register(struct xe_pxp *pxp); + +#endif /* __XE_PXP_DEBUGFS_H__ */ diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c new file mode 100644 index 0000000000000..d92ec0f515b03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2024 Intel Corporation. + */ + +#include "xe_pxp_submit.h" + +#include +#include + +#include "xe_device_types.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_exec_queue.h" +#include "xe_gsc_submit.h" +#include "xe_gt.h" +#include "xe_lrc.h" +#include "xe_map.h" +#include "xe_pxp.h" +#include "xe_pxp_types.h" +#include "xe_sched_job.h" +#include "xe_vm.h" +#include "abi/gsc_command_header_abi.h" +#include "abi/gsc_pxp_commands_abi.h" +#include "instructions/xe_gsc_commands.h" +#include "instructions/xe_mfx_commands.h" +#include "instructions/xe_mi_commands.h" + +/* + * The VCS is used for kernel-owned GGTT submissions to issue key termination. + * Terminations are serialized, so we only need a single queue and a single + * batch. + */ +static int allocate_vcs_execution_resources(struct xe_pxp *pxp) +{ + struct xe_gt *gt = pxp->gt; + struct xe_device *xe = pxp->xe; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_hw_engine *hwe; + struct xe_exec_queue *q; + struct xe_bo *bo; + int err; + + hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true); + if (!hwe) + return -ENODEV; + + q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe, + EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0); + if (IS_ERR(q)) + return PTR_ERR(q); + + /* + * Each termination is 16 DWORDS, so 4K is enough to contain a + * termination for each sessions. + */ + bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto out_queue; + } + + pxp->vcs_exec.q = q; + pxp->vcs_exec.bo = bo; + + return 0; + +out_queue: + xe_exec_queue_put(q); + return err; +} + +static void destroy_vcs_execution_resources(struct xe_pxp *pxp) +{ + if (pxp->vcs_exec.bo) + xe_bo_unpin_map_no_vm(pxp->vcs_exec.bo); + + if (pxp->vcs_exec.q) + xe_exec_queue_put(pxp->vcs_exec.q); +} + +#define PXP_BB_SIZE XE_PAGE_SIZE +static int allocate_gsc_client_resources(struct xe_gt *gt, + struct xe_pxp_gsc_client_resources *gsc_res, + size_t inout_size) +{ + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_hw_engine *hwe; + struct xe_vm *vm; + struct xe_bo *bo; + struct xe_exec_queue *q; + struct dma_fence *fence; + long timeout; + int err = 0; + + hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true); + + /* we shouldn't reach here if the GSC engine is not available */ + xe_assert(xe, hwe); + + /* PXP instructions must be issued from PPGTT */ + vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + /* We allocate a single object for the batch and the in/out memory */ + xe_vm_lock(vm, false); + bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC); + xe_vm_unlock(vm); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto vm_out; + } + + fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto bo_out; + } + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + if (timeout <= 0) { + err = timeout ?: -ETIME; + goto bo_out; + } + + q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT, 0); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto bo_out; + } + + gsc_res->vm = vm; + gsc_res->bo = bo; + gsc_res->inout_size = inout_size; + gsc_res->batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); + gsc_res->msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE); + gsc_res->msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE + inout_size); + gsc_res->q = q; + + /* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */ + gsc_res->host_session_handle = xe_gsc_create_host_session_id(); + + return 0; + +bo_out: + xe_bo_unpin_map_no_vm(bo); +vm_out: + xe_vm_close_and_put(vm); + + return err; +} + +static void destroy_gsc_client_resources(struct xe_pxp_gsc_client_resources *gsc_res) +{ + if (!gsc_res->q) + return; + + xe_exec_queue_put(gsc_res->q); + xe_bo_unpin_map_no_vm(gsc_res->bo); + xe_vm_close_and_put(gsc_res->vm); +} + +/** + * xe_pxp_allocate_execution_resources - Allocate PXP submission objects + * @pxp: the xe_pxp structure + * + * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS + * submissions are done via PPGTT, so this function allocates a VM for it and + * maps the object into it. + * + * Returns 0 if the allocation and mapping is successful, an errno value + * otherwise. + */ +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp) +{ + int err; + + err = allocate_vcs_execution_resources(pxp); + if (err) + return err; + + /* + * PXP commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE), + * but we currently only support a subset of commands that are small + * (< 20 dwords), so a single page is enough for now. + */ + err = allocate_gsc_client_resources(pxp->gt, &pxp->gsc_res, XE_PAGE_SIZE); + if (err) + goto destroy_vcs_context; + + return 0; + +destroy_vcs_context: + destroy_vcs_execution_resources(pxp); + return err; +} + +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp) +{ + destroy_gsc_client_resources(&pxp->gsc_res); + destroy_vcs_execution_resources(pxp); +} + +#define emit_cmd(xe_, map_, offset_, val_) \ + xe_map_wr(xe_, map_, (offset_) * sizeof(u32), u32, val_) + +/* stall until prior PXP and MFX/HCP/HUC objects are completed */ +#define MFX_WAIT_PXP (MFX_WAIT | \ + MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG | \ + MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG) +static u32 pxp_emit_wait(struct xe_device *xe, struct iosys_map *batch, u32 offset) +{ + /* wait for cmds to go through */ + emit_cmd(xe, batch, offset++, MFX_WAIT_PXP); + emit_cmd(xe, batch, offset++, 0); + + return offset; +} + +static u32 pxp_emit_session_selection(struct xe_device *xe, struct iosys_map *batch, + u32 offset, u32 idx) +{ + offset = pxp_emit_wait(xe, batch, offset); + + /* pxp off */ + emit_cmd(xe, batch, offset++, MI_FLUSH_DW | MI_FLUSH_IMM_DW); + emit_cmd(xe, batch, offset++, 0); + emit_cmd(xe, batch, offset++, 0); + emit_cmd(xe, batch, offset++, 0); + + /* select session */ + emit_cmd(xe, batch, offset++, MI_SET_APPID | MI_SET_APPID_SESSION_ID(idx)); + emit_cmd(xe, batch, offset++, 0); + + offset = pxp_emit_wait(xe, batch, offset); + + /* pxp on */ + emit_cmd(xe, batch, offset++, MI_FLUSH_DW | + MI_FLUSH_DW_PROTECTED_MEM_EN | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_IMM_DW); + emit_cmd(xe, batch, offset++, LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + emit_cmd(xe, batch, offset++, 0); + emit_cmd(xe, batch, offset++, 0); + + offset = pxp_emit_wait(xe, batch, offset); + + return offset; +} + +static u32 pxp_emit_inline_termination(struct xe_device *xe, + struct iosys_map *batch, u32 offset) +{ + /* session inline termination */ + emit_cmd(xe, batch, offset++, CRYPTO_KEY_EXCHANGE); + emit_cmd(xe, batch, offset++, 0); + + return offset; +} + +static u32 pxp_emit_session_termination(struct xe_device *xe, struct iosys_map *batch, + u32 offset, u32 idx) +{ + offset = pxp_emit_session_selection(xe, batch, offset, idx); + offset = pxp_emit_inline_termination(xe, batch, offset); + + return offset; +} + +/** + * xe_pxp_submit_session_termination - submits a PXP inline termination + * @pxp: the xe_pxp structure + * @id: the session to terminate + * + * Emit an inline termination via the VCS engine to terminate a session. + * + * Returns 0 if the submission is successful, an errno value otherwise. + */ +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id) +{ + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + u32 offset = 0; + u64 addr = xe_bo_ggtt_addr(pxp->vcs_exec.bo); + + offset = pxp_emit_session_termination(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, id); + offset = pxp_emit_wait(pxp->xe, &pxp->vcs_exec.bo->vmap, offset); + emit_cmd(pxp->xe, &pxp->vcs_exec.bo->vmap, offset, MI_BATCH_BUFFER_END); + + job = xe_sched_job_create(pxp->vcs_exec.q, &addr); + if (IS_ERR(job)) + return PTR_ERR(job); + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + + dma_fence_put(fence); + + if (!timeout) + return -ETIMEDOUT; + else if (timeout < 0) + return timeout; + + return 0; +} + +static bool +is_fw_err_platform_config(u32 type) +{ + switch (type) { + case PXP_STATUS_ERROR_API_VERSION: + case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF: + case PXP_STATUS_PLATFCONFIG_KF1_BAD: + case PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED: + return true; + default: + break; + } + return false; +} + +static const char * +fw_err_to_string(u32 type) +{ + switch (type) { + case PXP_STATUS_ERROR_API_VERSION: + return "ERR_API_VERSION"; + case PXP_STATUS_NOT_READY: + return "ERR_NOT_READY"; + case PXP_STATUS_PLATFCONFIG_KF1_NOVERIF: + case PXP_STATUS_PLATFCONFIG_KF1_BAD: + case PXP_STATUS_PLATFCONFIG_FIXED_KF1_NOT_SUPPORTED: + return "ERR_PLATFORM_CONFIG"; + default: + break; + } + return NULL; +} + +static int pxp_pkt_submit(struct xe_exec_queue *q, u64 batch_addr) +{ + struct xe_gt *gt = q->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_sched_job *job; + struct dma_fence *fence; + long timeout; + + xe_assert(xe, q->hwe->engine_id == XE_HW_ENGINE_GSCCS0); + + job = xe_sched_job_create(q, &batch_addr); + if (IS_ERR(job)) + return PTR_ERR(job); + + xe_sched_job_arm(job); + fence = dma_fence_get(&job->drm.s_fence->finished); + xe_sched_job_push(job); + + timeout = dma_fence_wait_timeout(fence, false, HZ); + dma_fence_put(fence); + if (timeout < 0) + return timeout; + else if (!timeout) + return -ETIME; + + return 0; +} + +static void emit_pxp_heci_cmd(struct xe_device *xe, struct iosys_map *batch, + u64 addr_in, u32 size_in, u64 addr_out, u32 size_out) +{ + u32 len = 0; + + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, GSC_HECI_CMD_PKT); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, lower_32_bits(addr_in)); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, upper_32_bits(addr_in)); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_in); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, lower_32_bits(addr_out)); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, upper_32_bits(addr_out)); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, size_out); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, 0); + xe_map_wr(xe, batch, len++ * sizeof(u32), u32, MI_BATCH_BUFFER_END); +} + +#define GSC_PENDING_RETRY_MAXCOUNT 40 +#define GSC_PENDING_RETRY_PAUSE_MS 50 +static int gsccs_send_message(struct xe_pxp_gsc_client_resources *gsc_res, + void *msg_in, size_t msg_in_size, + void *msg_out, size_t msg_out_size_max) +{ + struct xe_device *xe = gsc_res->vm->xe; + const size_t max_msg_size = gsc_res->inout_size - sizeof(struct intel_gsc_mtl_header); + u32 wr_offset; + u32 rd_offset; + u32 reply_size; + u32 min_reply_size = 0; + int ret; + int retry = GSC_PENDING_RETRY_MAXCOUNT; + + if (msg_in_size > max_msg_size || msg_out_size_max > max_msg_size) + return -ENOSPC; + + wr_offset = xe_gsc_emit_header(xe, &gsc_res->msg_in, 0, + HECI_MEADDRESS_PXP, + gsc_res->host_session_handle, + msg_in_size); + + /* NOTE: zero size packets are used for session-cleanups */ + if (msg_in && msg_in_size) { + xe_map_memcpy_to(xe, &gsc_res->msg_in, wr_offset, + msg_in, msg_in_size); + min_reply_size = sizeof(struct pxp_cmd_header); + } + + /* Make sure the reply header does not contain stale data */ + xe_gsc_poison_header(xe, &gsc_res->msg_out, 0); + + /* + * The BO is mapped at address 0 of the PPGTT, so no need to add its + * base offset when calculating the in/out addresses. + */ + emit_pxp_heci_cmd(xe, &gsc_res->batch, PXP_BB_SIZE, + wr_offset + msg_in_size, PXP_BB_SIZE + gsc_res->inout_size, + wr_offset + msg_out_size_max); + + xe_device_wmb(xe); + + /* + * If the GSC needs to communicate with CSME to complete our request, + * it'll set the "pending" flag in the return header. In this scenario + * we're expected to wait 50ms to give some time to the proxy code to + * handle the GSC<->CSME communication and then try again. Note that, + * although in most case the 50ms window is enough, the proxy flow is + * not actually guaranteed to complete within that time period, so we + * might have to try multiple times, up to a worst case of 2 seconds, + * after which the request is considered aborted. + */ + do { + ret = pxp_pkt_submit(gsc_res->q, 0); + if (ret) + break; + + if (xe_gsc_check_and_update_pending(xe, &gsc_res->msg_in, 0, + &gsc_res->msg_out, 0)) { + ret = -EAGAIN; + msleep(GSC_PENDING_RETRY_PAUSE_MS); + } + } while (--retry && ret == -EAGAIN); + + if (ret) { + drm_err(&xe->drm, "failed to submit GSC PXP message (%pe)\n", ERR_PTR(ret)); + return ret; + } + + ret = xe_gsc_read_out_header(xe, &gsc_res->msg_out, 0, + min_reply_size, &rd_offset); + if (ret) { + drm_err(&xe->drm, "invalid GSC reply for PXP (%pe)\n", ERR_PTR(ret)); + return ret; + } + + if (msg_out && min_reply_size) { + reply_size = xe_map_rd_field(xe, &gsc_res->msg_out, rd_offset, + struct pxp_cmd_header, buffer_len); + reply_size += sizeof(struct pxp_cmd_header); + + if (reply_size > msg_out_size_max) { + drm_warn(&xe->drm, "PXP reply size overflow: %u (%zu)\n", + reply_size, msg_out_size_max); + reply_size = msg_out_size_max; + } + + xe_map_memcpy_from(xe, msg_out, &gsc_res->msg_out, + rd_offset, reply_size); + } + + xe_gsc_poison_header(xe, &gsc_res->msg_in, 0); + + return ret; +} + +/** + * xe_pxp_submit_session_init - submits a PXP GSC session initialization + * @gsc_res: the pxp client resources + * @id: the session to initialize + * + * Submit a message to the GSC FW to initialize (i.e. start) a PXP session. + * + * Returns 0 if the submission is successful, an errno value otherwise. + */ +int xe_pxp_submit_session_init(struct xe_pxp_gsc_client_resources *gsc_res, u32 id) +{ + struct xe_device *xe = gsc_res->vm->xe; + struct pxp43_create_arb_in msg_in = {0}; + struct pxp43_create_arb_out msg_out = {0}; + int ret; + + msg_in.header.api_version = PXP_APIVER(4, 3); + msg_in.header.command_id = PXP43_CMDID_INIT_SESSION; + msg_in.header.stream_id = (FIELD_PREP(PXP43_INIT_SESSION_APPID, id) | + FIELD_PREP(PXP43_INIT_SESSION_VALID, 1) | + FIELD_PREP(PXP43_INIT_SESSION_APPTYPE, 0)); + msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); + + if (id == DRM_XE_PXP_HWDRM_DEFAULT_SESSION) + msg_in.protection_mode = PXP43_INIT_SESSION_PROTECTION_ARB; + + ret = gsccs_send_message(gsc_res, &msg_in, sizeof(msg_in), + &msg_out, sizeof(msg_out)); + if (ret) { + drm_err(&xe->drm, "Failed to init PXP session %u (%pe)\n", id, ERR_PTR(ret)); + } else if (msg_out.header.status != 0) { + ret = -EIO; + + if (is_fw_err_platform_config(msg_out.header.status)) + drm_info_once(&xe->drm, + "Failed to init PXP session %u due to BIOS/SOC, s=0x%x(%s)\n", + id, msg_out.header.status, + fw_err_to_string(msg_out.header.status)); + else + drm_dbg(&xe->drm, "Failed to init PXP session %u, s=0x%x\n", + id, msg_out.header.status); + } + + return ret; +} + +/** + * xe_pxp_submit_session_invalidation - submits a PXP GSC invalidation + * @gsc_res: the pxp client resources + * @id: the session to invalidate + * + * Submit a message to the GSC FW to notify it that a session has been + * terminated and is therefore invalid. + * + * Returns 0 if the submission is successful, an errno value otherwise. + */ +int xe_pxp_submit_session_invalidation(struct xe_pxp_gsc_client_resources *gsc_res, u32 id) +{ + struct xe_device *xe = gsc_res->vm->xe; + struct pxp43_inv_stream_key_in msg_in = {0}; + struct pxp43_inv_stream_key_out msg_out = {0}; + int ret = 0; + + /* + * Stream key invalidation reuses the same version 4.2 input/output + * command format but firmware requires 4.3 API interaction + */ + msg_in.header.api_version = PXP_APIVER(4, 3); + msg_in.header.command_id = PXP43_CMDID_INVALIDATE_STREAM_KEY; + msg_in.header.buffer_len = sizeof(msg_in) - sizeof(msg_in.header); + + msg_in.header.stream_id = FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_VALID, 1); + msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_APP_TYPE, 0); + msg_in.header.stream_id |= FIELD_PREP(PXP_CMDHDR_EXTDATA_SESSION_ID, id); + + ret = gsccs_send_message(gsc_res, &msg_in, sizeof(msg_in), + &msg_out, sizeof(msg_out)); + if (ret) { + drm_err(&xe->drm, "Failed to invalidate PXP stream-key %u (%pe)\n", + id, ERR_PTR(ret)); + } else if (msg_out.header.status != 0) { + ret = -EIO; + + if (is_fw_err_platform_config(msg_out.header.status)) + drm_info_once(&xe->drm, + "Failed to invalidate PXP stream-key %u: BIOS/SOC 0x%08x(%s)\n", + id, msg_out.header.status, + fw_err_to_string(msg_out.header.status)); + else + drm_dbg(&xe->drm, "Failed to invalidate stream-key %u, s=0x%08x\n", + id, msg_out.header.status); + } + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h b/drivers/gpu/drm/xe/xe_pxp_submit.h new file mode 100644 index 0000000000000..c9efda02f4b0a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2024, Intel Corporation. All rights reserved. + */ + +#ifndef __XE_PXP_SUBMIT_H__ +#define __XE_PXP_SUBMIT_H__ + +#include + +struct xe_pxp; +struct xe_pxp_gsc_client_resources; + +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp); +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp); + +int xe_pxp_submit_session_init(struct xe_pxp_gsc_client_resources *gsc_res, u32 id); +int xe_pxp_submit_session_termination(struct xe_pxp *pxp, u32 id); +int xe_pxp_submit_session_invalidation(struct xe_pxp_gsc_client_resources *gsc_res, + u32 id); + +#endif /* __XE_PXP_SUBMIT_H__ */ diff --git a/drivers/gpu/drm/xe/xe_pxp_types.h b/drivers/gpu/drm/xe/xe_pxp_types.h new file mode 100644 index 0000000000000..53e9d48d10fb8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pxp_types.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2024, Intel Corporation. All rights reserved. + */ + +#ifndef __XE_PXP_TYPES_H__ +#define __XE_PXP_TYPES_H__ + +#include +#include +#include +#include +#include +#include + +struct xe_bo; +struct xe_exec_queue; +struct xe_device; +struct xe_gt; +struct xe_vm; + +enum xe_pxp_status { + XE_PXP_ERROR = -1, + XE_PXP_NEEDS_TERMINATION = 0, /* starting status */ + XE_PXP_NEEDS_ADDITIONAL_TERMINATION, + XE_PXP_TERMINATION_IN_PROGRESS, + XE_PXP_READY_TO_START, + XE_PXP_START_IN_PROGRESS, + XE_PXP_ACTIVE, + XE_PXP_SUSPENDED, +}; + +/** + * struct xe_pxp_gsc_client_resources - resources for GSC submission by a PXP + * client. The GSC FW supports multiple GSC client active at the same time. + */ +struct xe_pxp_gsc_client_resources { + /** + * @host_session_handle: handle used to identify the client in messages + * sent to the GSC firmware. + */ + u64 host_session_handle; + /** @vm: VM used for PXP submissions to the GSCCS */ + struct xe_vm *vm; + /** @q: GSCCS exec queue for PXP submissions */ + struct xe_exec_queue *q; + + /** + * @bo: BO used for submissions to the GSCCS and GSC FW. It includes + * space for the GSCCS batch and the input/output buffers read/written + * by the FW + */ + struct xe_bo *bo; + /** @inout_size: size of each of the msg_in/out sections individually */ + u32 inout_size; + /** @batch: iosys_map to the batch memory within the BO */ + struct iosys_map batch; + /** @msg_in: iosys_map to the input memory within the BO */ + struct iosys_map msg_in; + /** @msg_out: iosys_map to the output memory within the BO */ + struct iosys_map msg_out; +}; + +/** + * struct xe_pxp - pxp state + */ +struct xe_pxp { + /** @xe: Backpoiner to the xe_device struct */ + struct xe_device *xe; + + /** + * @gt: pointer to the gt that owns the submission-side of PXP + * (VDBOX, KCR and GSC) + */ + struct xe_gt *gt; + + /** @vcs_exec: kernel-owned objects for PXP submissions to the VCS */ + struct { + /** @vcs_exec.q: kernel-owned VCS exec queue used for PXP terminations */ + struct xe_exec_queue *q; + /** @vcs_exec.bo: BO used for submissions to the VCS */ + struct xe_bo *bo; + } vcs_exec; + + /** @gsc_res: kernel-owned objects for PXP submissions to the GSCCS */ + struct xe_pxp_gsc_client_resources gsc_res; + + /** @irq: wrapper for the worker and queue used for PXP irq support */ + struct { + /** @irq.work: worker that manages irq events. */ + struct work_struct work; + /** @irq.wq: workqueue on which to queue the irq work. */ + struct workqueue_struct *wq; + /** @irq.events: pending events, protected with xe->irq.lock. */ + u32 events; +#define PXP_TERMINATION_REQUEST BIT(0) +#define PXP_TERMINATION_COMPLETE BIT(1) + } irq; + + /** @mutex: protects the pxp status and the queue list */ + struct mutex mutex; + /** @status: the current pxp status */ + enum xe_pxp_status status; + /** @activation: completion struct that tracks pxp start */ + struct completion activation; + /** @termination: completion struct that tracks terminations */ + struct completion termination; + + /** @queues: management of exec_queues that use PXP */ + struct { + /** @queues.lock: spinlock protecting the queue management */ + spinlock_t lock; + /** @queues.list: list of exec_queues that use PXP */ + struct list_head list; + } queues; + + /** + * @key_instance: keep track of the current iteration of the PXP key. + * Note that, due to the time needed for PXP termination and re-start + * to complete, the minimum time between 2 subsequent increases of this + * variable is 50ms, and even that only if there is a continuous attack; + * normal behavior is for this to increase much much slower than that. + * This means that we don't expect this to ever wrap and don't implement + * that case in the code. + */ + u32 key_instance; + /** + * @last_suspend_key_instance: value of key_instance at the last + * suspend. Used to check if any PXP session has been created between + * suspend cycles. + */ + u32 last_suspend_key_instance; +}; + +#endif /* __XE_PXP_TYPES_H__ */ diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index c059639613f7b..ebfae746f8613 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -24,6 +24,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" +#include "xe_pxp.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" @@ -120,6 +121,9 @@ query_engine_cycles(struct xe_device *xe, struct xe_gt *gt; unsigned int fw_ref; + if (IS_SRIOV_VF(xe)) + return -EOPNOTSUPP; + if (query->size == 0) { query->size = size; return 0; @@ -698,6 +702,33 @@ static int query_oa_units(struct xe_device *xe, return ret ? -EFAULT : 0; } +static int query_pxp_status(struct xe_device *xe, struct drm_xe_device_query *query) +{ + struct drm_xe_query_pxp_status __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = sizeof(struct drm_xe_query_pxp_status); + struct drm_xe_query_pxp_status resp = { 0 }; + int ret; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + ret = xe_pxp_get_readiness_status(xe->pxp); + if (ret < 0) + return ret; + + resp.status = ret; + resp.supported_session_types = BIT(DRM_XE_PXP_TYPE_HWDRM); + + if (copy_to_user(query_ptr, &resp, size)) + return -EFAULT; + + return 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -709,6 +740,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_engine_cycles, query_uc_fw_version, query_oa_units, + query_pxp_status, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 9f327f27c0726..0c230ee53bba5 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -118,7 +118,7 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i) dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; dw[i++] = ~0U; @@ -156,7 +156,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, flags &= ~mask_flags; - return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0); + return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); } static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index e055bed7ae555..f8fe61e255188 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -31,47 +31,55 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) sa_manager->bo = NULL; } -struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) +/** + * __xe_sa_bo_manager_init() - Create and initialize the suballocator + * @tile: the &xe_tile where allocate + * @size: number of bytes to allocate + * @guard: number of bytes to exclude from suballocations + * @align: alignment for each suballocated chunk + * + * Prepares the suballocation manager for suballocations. + * + * Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure. + */ +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align) { struct xe_device *xe = tile_to_xe(tile); - u32 managed_size = size - SZ_4K; + struct xe_sa_manager *sa_manager; + u32 managed_size; struct xe_bo *bo; int ret; - struct xe_sa_manager *sa_manager = drmm_kzalloc(&tile_to_xe(tile)->drm, - sizeof(*sa_manager), - GFP_KERNEL); + xe_tile_assert(tile, size > guard); + managed_size = size - guard; + + sa_manager = drmm_kzalloc(&xe->drm, sizeof(*sa_manager), GFP_KERNEL); if (!sa_manager) return ERR_PTR(-ENOMEM); - sa_manager->bo = NULL; - bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { - drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", - PTR_ERR(bo)); + drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n", + size / SZ_1K, bo); return ERR_CAST(bo); } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - - drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); - if (!sa_manager->cpu_ptr) { - sa_manager->bo = NULL; + if (!sa_manager->cpu_ptr) return ERR_PTR(-ENOMEM); - } } else { sa_manager->cpu_ptr = bo->vmap.vaddr; memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size); } + drm_suballoc_manager_init(&sa_manager->base, managed_size, align); ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini, sa_manager); if (ret) @@ -80,8 +88,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 return sa_manager; } -struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - unsigned int size) +/** + * __xe_sa_bo_new() - Make a suballocation but use custom gfp flags. + * @sa_manager: the &xe_sa_manager + * @size: number of bytes we want to suballocate + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL. + * + * Try to make a suballocation of size @size. + * + * Return: a &drm_suballoc, or an ERR_PTR. + */ +struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp) { /* * BB to large, return -ENOBUFS indicating user should split @@ -90,7 +107,7 @@ struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, if (size > sa_manager->base.size) return ERR_PTR(-ENOBUFS); - return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0); + return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo) diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 4e96483057d70..1170ee5a81a82 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -5,19 +5,37 @@ #ifndef _XE_SA_H_ #define _XE_SA_H_ +#include +#include #include "xe_sa_types.h" struct dma_fence; -struct xe_bo; struct xe_tile; -struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align); +struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align); +struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp); + +static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align) +{ + return __xe_sa_bo_manager_init(tile, size, SZ_4K, align); +} + +/** + * xe_sa_bo_new() - Make a suballocation. + * @sa_manager: the &xe_sa_manager + * @size: number of bytes we want to suballocate + * + * Try to make a suballocation of size @size. + * + * Return: a &drm_suballoc, or an ERR_PTR. + */ +static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size) +{ + return __xe_sa_bo_new(sa_manager, size, GFP_KERNEL); +} -struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager, - u32 size); void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); -void xe_sa_bo_free(struct drm_suballoc *sa_bo, - struct dma_fence *fence); +void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); static inline struct xe_sa_manager * to_xe_sa_manager(struct drm_suballoc_manager *mng) diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index d942b20a9f29c..dbf260dded8df 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -18,9 +18,9 @@ struct dma_fence_chain; * struct xe_job_ptrs - Per hw engine instance data */ struct xe_job_ptrs { - /** @lrc_fence: Pre-allocated uinitialized lrc fence.*/ + /** @lrc_fence: Pre-allocated uninitialized lrc fence.*/ struct dma_fence *lrc_fence; - /** @chain_fence: Pre-allocated ninitialized fence chain node. */ + /** @chain_fence: Pre-allocated uninitialized fence chain node. */ struct dma_fence_chain *chain_fence; /** @batch_addr: Batch buffer address. */ u64 batch_addr; diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 04e2f539ccd94..a0eab44c0e768 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -81,7 +81,7 @@ void xe_sriov_probe_early(struct xe_device *xe) xe->sriov.__mode = mode; xe_assert(xe, xe->sriov.__mode); - if (has_sriov) + if (IS_SRIOV(xe)) drm_info(&xe->drm, "Running in %s mode\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c new file mode 100644 index 0000000000000..02b4eadf84079 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_survivability_mode.h" +#include "xe_survivability_mode_types.h" + +#include +#include +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_heci_gsc.h" +#include "xe_mmio.h" +#include "xe_pcode_api.h" +#include "xe_vsec.h" + +#define MAX_SCRATCH_MMIO 8 + +/** + * DOC: Xe Boot Survivability + * + * Boot Survivability is a software based workflow for recovering a system in a failed boot state + * Here system recoverability is concerned with recovering the firmware responsible for boot. + * + * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware + * to be flashed through mei and collect telemetry. The driver's probe flow is modified + * such that it enters survivability mode when pcode initialization is incomplete and boot status + * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating + * survivability mode and provides additional information required for debug + * + * KMD exposes below admin-only readable sysfs in survivability mode + * + * device/survivability_mode: The presence of this file indicates that the card is in survivability + * mode. Also, provides additional information on why the driver entered + * survivability mode. + * + * Capability Information - Provides boot status + * Postcode Information - Provides information about the failure + * Overflow Information - Provides history of previous failures + * Auxiliary Information - Certain failures may have information in + * addition to postcode information + */ + +static u32 aux_history_offset(u32 reg_value) +{ + return REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, reg_value); +} + +static void set_survivability_info(struct xe_mmio *mmio, struct xe_survivability_info *info, + int id, char *name) +{ + strscpy(info[id].name, name, sizeof(info[id].name)); + info[id].reg = PCODE_SCRATCH(id).raw; + info[id].value = xe_mmio_read32(mmio, PCODE_SCRATCH(id)); +} + +static void populate_survivability_info(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info = survivability->info; + struct xe_mmio *mmio; + u32 id = 0, reg_value; + char name[NAME_MAX]; + int index; + + mmio = xe_root_tile_mmio(xe); + set_survivability_info(mmio, info, id, "Capability Info"); + reg_value = info[id].value; + + if (reg_value & HISTORY_TRACKING) { + id++; + set_survivability_info(mmio, info, id, "Postcode Info"); + + if (reg_value & OVERFLOW_SUPPORT) { + id = REG_FIELD_GET(OVERFLOW_REG_OFFSET, reg_value); + set_survivability_info(mmio, info, id, "Overflow Info"); + } + } + + if (reg_value & AUXINFO_SUPPORT) { + id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value); + + for (index = 0; id && reg_value; index++, reg_value = info[id].value, + id = aux_history_offset(reg_value)) { + snprintf(name, NAME_MAX, "Auxiliary Info %d", index); + set_survivability_info(mmio, info, id, name); + } + } +} + +static void log_survivability_info(struct pci_dev *pdev) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info = survivability->info; + int id; + + dev_info(&pdev->dev, "Survivability Boot Status : Critical Failure (%d)\n", + survivability->boot_status); + for (id = 0; id < MAX_SCRATCH_MMIO; id++) { + if (info[id].reg) + dev_info(&pdev->dev, "%s: 0x%x - 0x%x\n", info[id].name, + info[id].reg, info[id].value); + } +} + +static ssize_t survivability_mode_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info = survivability->info; + int index = 0, count = 0; + + for (index = 0; index < MAX_SCRATCH_MMIO; index++) { + if (info[index].reg) + count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name, + info[index].reg, info[index].value); + } + + return count; +} + +static DEVICE_ATTR_ADMIN_RO(survivability_mode); + +static void enable_survivability_mode(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + int ret = 0; + + /* set survivability mode */ + survivability->mode = true; + dev_info(dev, "In Survivability Mode\n"); + + /* create survivability mode sysfs */ + ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); + if (ret) { + dev_warn(dev, "Failed to create survivability sysfs files\n"); + return; + } + + xe_heci_gsc_init(xe); + + xe_vsec_init(xe); +} + +/** + * xe_survivability_mode_enabled - check if survivability mode is enabled + * @xe: xe device instance + * + * Returns true if in survivability mode, false otherwise + */ +bool xe_survivability_mode_enabled(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + + return survivability->mode; +} + +/** + * xe_survivability_mode_required - checks if survivability mode is required + * @xe: xe device instance + * + * This function reads the boot status from Pcode + * + * Return: true if boot status indicates failure, false otherwise + */ +bool xe_survivability_mode_required(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + u32 data; + + if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe)) + return false; + + data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); + survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); + + return (survivability->boot_status == NON_CRITICAL_FAILURE || + survivability->boot_status == CRITICAL_FAILURE); +} + +/** + * xe_survivability_mode_remove - remove survivability mode + * @xe: xe device instance + * + * clean up sysfs entries of survivability mode + */ +void xe_survivability_mode_remove(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct device *dev = &pdev->dev; + + sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); + xe_heci_gsc_fini(xe); + kfree(survivability->info); + pci_set_drvdata(pdev, NULL); +} + +/** + * xe_survivability_mode_init - Initialize the survivability mode + * @xe: xe device instance + * + * Initializes survivability information and enables survivability mode + */ +void xe_survivability_mode_init(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + survivability->size = MAX_SCRATCH_MMIO; + + info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL); + if (!info) + return; + + survivability->info = info; + + populate_survivability_info(xe); + + /* Only log debug information and exit if it is a critical failure */ + if (survivability->boot_status == CRITICAL_FAILURE) { + log_survivability_info(pdev); + kfree(survivability->info); + return; + } + + enable_survivability_mode(pdev); +} diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h new file mode 100644 index 0000000000000..f530507a22c62 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SURVIVABILITY_MODE_H_ +#define _XE_SURVIVABILITY_MODE_H_ + +#include + +struct xe_device; + +void xe_survivability_mode_init(struct xe_device *xe); +void xe_survivability_mode_remove(struct xe_device *xe); +bool xe_survivability_mode_enabled(struct xe_device *xe); +bool xe_survivability_mode_required(struct xe_device *xe); + +#endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h new file mode 100644 index 0000000000000..19d433e253dfe --- /dev/null +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SURVIVABILITY_MODE_TYPES_H_ +#define _XE_SURVIVABILITY_MODE_TYPES_H_ + +#include +#include + +struct xe_survivability_info { + char name[NAME_MAX]; + u32 reg; + u32 value; +}; + +/** + * struct xe_survivability: Contains survivability mode information + */ +struct xe_survivability { + /** @info: struct that holds survivability info from scratch registers */ + struct xe_survivability_info *info; + + /** @size: number of scratch registers */ + u32 size; + + /** @boot_status: indicates critical/non critical boot failure */ + u8 boot_status; + + /** @mode: boolean to indicate survivability mode */ + bool mode; +}; + +#endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 42f5bebd09e50..f87276df18f28 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -210,6 +210,7 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, return 0; } +ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) { diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 07cf7cfe4abd5..d29658ff4dd41 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -94,10 +94,6 @@ static int xe_tile_alloc(struct xe_tile *tile) return -ENOMEM; tile->mem.ggtt->tile = tile; - tile->mem.vram_mgr = drmm_kzalloc(drm, sizeof(*tile->mem.vram_mgr), GFP_KERNEL); - if (!tile->mem.vram_mgr) - return -ENOMEM; - return 0; } @@ -139,7 +135,7 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) int err; if (tile->mem.vram.usable_size) { - err = xe_ttm_vram_mgr_init(tile, tile->mem.vram_mgr); + err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm); if (err) return err; xe->info.mem_region_mask |= BIT(tile->id) << 1; @@ -170,17 +166,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile) if (err) return err; + xe_wa_apply_tile_workarounds(tile); + + return xe_tile_sysfs_init(tile); +} + +int xe_tile_init(struct xe_tile *tile) +{ tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); - xe_wa_apply_tile_workarounds(tile); - - err = xe_tile_sysfs_init(tile); - return 0; } - void xe_tile_migrate_wait(struct xe_tile *tile) { xe_migrate_wait(tile->migrate); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 1c9e42ade6b05..eb939316d55b0 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -12,6 +12,7 @@ struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); +int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ea50fee50c7de..ccebd5f0878e5 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -53,6 +53,11 @@ DEFINE_EVENT(xe_bo, xe_bo_validate, TP_ARGS(bo) ); +DEFINE_EVENT(xe_bo, xe_bo_create, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + TRACE_EVENT(xe_bo_move, TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, bool move_lacks_source), @@ -87,6 +92,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_STRUCT__entry( __string(dev, __dev_name_vma(vma)) __field(struct xe_vma *, vma) + __field(struct xe_vm *, vm) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -96,14 +102,16 @@ DECLARE_EVENT_CLASS(xe_vma, TP_fast_assign( __assign_str(dev); __entry->vma = vma; + __entry->vm = xe_vma_vm(vma); __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("dev=%s, vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", - __get_str(dev), __entry->vma, __entry->asid, __entry->start, + TP_printk("dev=%s, vma=%p, vm=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx", + __get_str(dev), __entry->vma, __entry->vm, + __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -185,16 +193,19 @@ DECLARE_EVENT_CLASS(xe_vm, __string(dev, __dev_name_vm(vm)) __field(struct xe_vm *, vm) __field(u32, asid) + __field(u32, flags) ), TP_fast_assign( __assign_str(dev); __entry->vm = vm; __entry->asid = vm->usm.asid; + __entry->flags = vm->flags; ), - TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), - __entry->vm, __entry->asid) + TP_printk("dev=%s, vm=%p, asid=0x%05x, vm flags=0x%05x", + __get_str(dev), __entry->vm, __entry->asid, + __entry->flags) ); DEFINE_EVENT(xe_vm, xe_vm_kill, diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d414421f8c131..d9c9d2547aadf 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -207,17 +207,16 @@ static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) #endif } -void xe_ttm_stolen_mgr_init(struct xe_device *xe) +int xe_ttm_stolen_mgr_init(struct xe_device *xe) { - struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_ttm_stolen_mgr *mgr; u64 stolen_size, io_size; int err; - if (!mgr) { - drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); - return; - } + mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return -ENOMEM; if (IS_SRIOV_VF(xe)) stolen_size = 0; @@ -230,7 +229,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (!stolen_size) { drm_dbg_kms(&xe->drm, "No stolen memory support\n"); - return; + return 0; } /* @@ -246,7 +245,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) io_size, PAGE_SIZE); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); - return; + return err; } drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", @@ -254,6 +253,8 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (io_size) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); + + return 0; } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index 1777245ff8101..8e877d1e839bd 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -12,7 +12,7 @@ struct ttm_resource; struct xe_bo; struct xe_device; -void xe_ttm_stolen_mgr_init(struct xe_device *xe); +int xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index f4a16e5fa7700..9e375a40aee90 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -340,9 +340,8 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) { struct xe_device *xe = tile_to_xe(tile); - struct xe_mem_region *vram = &tile->mem.vram; + struct xe_vram_region *vram = &tile->mem.vram; - mgr->vram = vram; return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, vram->usable_size, vram->io_size, PAGE_SIZE); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h index 2d75cf1262893..1144f9232ebbb 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h @@ -9,8 +9,6 @@ #include #include -struct xe_mem_region; - /** * struct xe_ttm_vram_mgr - XE TTM VRAM manager * @@ -21,8 +19,6 @@ struct xe_ttm_vram_mgr { struct ttm_resource_manager manager; /** @mm: DRM buddy allocator which manages the VRAM */ struct drm_buddy mm; - /** @vram: ptr to details of associated VRAM region */ - struct xe_mem_region *vram; /** @visible_size: Proped size of the CPU visible portion */ u64 visible_size; /** @visible_avail: CPU visible portion still unallocated */ diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0d073a9987c2e..d8167e818280b 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -288,19 +288,6 @@ int xe_uc_suspend(struct xe_uc *uc) return xe_guc_suspend(&uc->guc); } -/** - * xe_uc_remove() - Clean up the UC structures before driver removal - * @uc: the UC object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_uc_remove(struct xe_uc *uc) -{ - xe_gsc_remove(&uc->gsc); -} - /** * xe_uc_declare_wedged() - Declare UC wedged * @uc: the UC object diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 506517c113339..3813c1ede450e 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -20,7 +20,6 @@ void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); -void xe_uc_remove(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 690330352d4cd..d664f2e418b26 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -33,6 +33,7 @@ #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_pt.h" +#include "xe_pxp.h" #include "xe_res_cursor.h" #include "xe_sync.h" #include "xe_trace_bo.h" @@ -1382,6 +1383,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) struct xe_tile *tile; u8 id; + /* + * Since the GSCCS is not user-accessible, we don't expect a GSC VM to + * ever be in faulting mode. + */ + xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE))); + vm = kzalloc(sizeof(*vm), GFP_KERNEL); if (!vm) return ERR_PTR(-ENOMEM); @@ -1392,7 +1399,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->flags = flags; - init_rwsem(&vm->lock); + /** + * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be + * manipulated under the PXP mutex. However, the PXP mutex can be taken + * under a user-VM lock when the PXP session is started at exec_queue + * creation time. Those are different VMs and therefore there is no risk + * of deadlock, but we need to tell lockdep that this is the case or it + * will print a warning. + */ + if (flags & XE_VM_FLAG_GSC) { + static struct lock_class_key gsc_vm_key; + + __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key); + } else { + init_rwsem(&vm->lock); + } mutex_init(&vm->snap_mutex); INIT_LIST_HEAD(&vm->rebind_list); @@ -2668,11 +2689,10 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, for (i = 0; i < vops->num_syncs; i++) xe_sync_entry_signal(vops->syncs + i, fence); xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); } -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops) +static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops) { struct drm_exec exec; struct dma_fence *fence; @@ -2685,21 +2705,21 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, drm_exec_until_all_locked(&exec) { err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); drm_exec_retry_on_contention(&exec); - if (err) + if (err) { + fence = ERR_PTR(err); goto unlock; + } fence = ops_execute(vm, vops); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); + if (IS_ERR(fence)) goto unlock; - } vm_bind_ioctl_ops_fini(vm, vops, fence); } unlock: drm_exec_fini(&exec); - return err; + return fence; } ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); @@ -2707,7 +2727,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); (DRM_XE_VM_BIND_FLAG_READONLY | \ DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ DRM_XE_VM_BIND_FLAG_NULL | \ - DRM_XE_VM_BIND_FLAG_DUMPABLE) + DRM_XE_VM_BIND_FLAG_DUMPABLE | \ + DRM_XE_VM_BIND_FLAG_CHECK_PXP) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -2870,7 +2891,7 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, u64 addr, u64 range, u64 obj_offset, - u16 pat_index) + u16 pat_index, u32 op, u32 bind_flags) { u16 coh_mode; @@ -2914,6 +2935,12 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, return -EINVAL; } + /* If a BO is protected it can only be mapped if the key is still valid */ + if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && + op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) + if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0)) + return -ENOEXEC; + return 0; } @@ -2931,6 +2958,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; struct xe_vma_ops vops; + struct dma_fence *fence; int err; int i; @@ -3002,6 +3030,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; u16 pat_index = bind_ops[i].pat_index; + u32 op = bind_ops[i].op; + u32 bind_flags = bind_ops[i].flags; if (!obj) continue; @@ -3014,7 +3044,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bos[i] = gem_to_xe_bo(gem_obj); err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, - obj_offset, pat_index); + obj_offset, pat_index, op, + bind_flags); if (err) goto put_obj; } @@ -3095,7 +3126,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (err) goto unwind_ops; - err = vm_bind_ioctl_ops_execute(vm, &vops); + fence = vm_bind_ioctl_ops_execute(vm, &vops); + if (IS_ERR(fence)) + err = PTR_ERR(fence); + else + dma_fence_put(fence); unwind_ops: if (err && err != -ENODATA) @@ -3129,6 +3164,81 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return err; } +/** + * xe_vm_bind_kernel_bo - bind a kernel BO to a VM + * @vm: VM to bind the BO to + * @bo: BO to bind + * @q: exec queue to use for the bind (optional) + * @addr: address at which to bind the BO + * @cache_lvl: PAT cache level to use + * + * Execute a VM bind map operation on a kernel-owned BO to bind it into a + * kernel-owned VM. + * + * Returns a dma_fence to track the binding completion if the job to do so was + * successfully submitted, an error pointer otherwise. + */ +struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, + struct xe_exec_queue *q, u64 addr, + enum xe_cache_level cache_lvl) +{ + struct xe_vma_ops vops; + struct drm_gpuva_ops *ops = NULL; + struct dma_fence *fence; + int err; + + xe_bo_get(bo); + xe_vm_get(vm); + if (q) + xe_exec_queue_get(q); + + down_write(&vm->lock); + + xe_vma_ops_init(&vops, vm, q, NULL, 0); + + ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size, + DRM_XE_VM_BIND_OP_MAP, 0, 0, + vm->xe->pat.idx[cache_lvl]); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto release_vm_lock; + } + + err = vm_bind_ioctl_ops_parse(vm, ops, &vops); + if (err) + goto release_vm_lock; + + xe_assert(vm->xe, !list_empty(&vops.list)); + + err = xe_vma_ops_alloc(&vops, false); + if (err) + goto unwind_ops; + + fence = vm_bind_ioctl_ops_execute(vm, &vops); + if (IS_ERR(fence)) + err = PTR_ERR(fence); + +unwind_ops: + if (err && err != -ENODATA) + vm_bind_ioctl_ops_unwind(vm, &ops, 1); + + xe_vma_ops_fini(&vops); + drm_gpuva_ops_free(&vm->gpuvm, ops); + +release_vm_lock: + up_write(&vm->lock); + + if (q) + xe_exec_queue_put(q); + xe_vm_put(vm); + xe_bo_put(bo); + + if (err) + fence = ERR_PTR(err); + + return fence; +} + /** * xe_vm_lock() - Lock the vm's dma_resv object * @vm: The struct xe_vm whose lock is to be locked @@ -3235,6 +3345,35 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) return ret; } +int xe_vm_validate_protected(struct xe_vm *vm) +{ + struct drm_gpuva *gpuva; + int err = 0; + + if (!vm) + return -ENODEV; + + mutex_lock(&vm->snap_mutex); + + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + struct xe_bo *bo = vma->gpuva.gem.obj ? + gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; + + if (!bo) + continue; + + if (xe_bo_is_protected(bo)) { + err = xe_pxp_bo_key_check(vm->xe->pxp, bo); + if (err) + break; + } + } + + mutex_unlock(&vm->snap_mutex); + return err; +} + struct xe_vm_snapshot { unsigned long num_snaps; struct { diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 23adb74428815..f66075f8a6fe8 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -18,6 +18,8 @@ struct drm_file; struct ttm_buffer_object; +struct dma_fence; + struct xe_exec_queue; struct xe_file; struct xe_sync_entry; @@ -213,6 +215,8 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, int xe_vm_invalidate_vma(struct xe_vma *vma); +int xe_vm_validate_protected(struct xe_vm *vm); + static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) { xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); @@ -247,6 +251,10 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, unsigned int num_fences); +struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, + struct xe_exec_queue *q, u64 addr, + enum xe_cache_level cache_lvl); + /** * xe_vm_resv() - Return's the vm's reservation object * @vm: The vm diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 0787869584036..1030ce214032c 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -431,7 +431,7 @@ * bind path also acquires this lock in write while the exec / compute mode * rebind worker acquires this lock in read mode. * - * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv + * VM dma-resv lock (vm->gpuvm.r_obj->resv->lock) - WW lock. Protects VM dma-resv * slots which is shared with any private BO in the VM. Expected to be acquired * during VM binds, execs, and compute mode rebind worker. This lock is also * held when private BOs are being evicted. diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7f9a303e51d89..52467b9b5348f 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -164,6 +164,7 @@ struct xe_vm { #define XE_VM_FLAG_BANNED BIT(5) #define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags) #define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id) +#define XE_VM_FLAG_GSC BIT(8) unsigned long flags; /** @composite_fence_ctx: context composite fence */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 570fe03764025..d4982799383cc 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -599,7 +599,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe3_LPG */ { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, { XE_RTP_NAME("18034896535"), @@ -613,6 +614,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER, SMP_FORCE_128B_OVERFETCH)) }, + { XE_RTP_NAME("14023061436"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 40438c3d9b723..228436532282c 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -28,6 +28,7 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) + GRAPHICS_VERSION(3001) 14022293748 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 22019794406 GRAPHICS_VERSION(2001) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 9c3a339e26af1..4736ea525048e 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -849,19 +849,20 @@ MACRO__(0xE20B, ## __VA_ARGS__), \ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ - MACRO__(0xE212, ## __VA_ARGS__) + MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__), \ + MACRO__(0xE215, ## __VA_ARGS__), \ + MACRO__(0xE216, ## __VA_ARGS__) /* PTL */ #define INTEL_PTL_IDS(MACRO__, ...) \ MACRO__(0xB080, ## __VA_ARGS__), \ MACRO__(0xB081, ## __VA_ARGS__), \ MACRO__(0xB082, ## __VA_ARGS__), \ + MACRO__(0xB083, ## __VA_ARGS__), \ + MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ - MACRO__(0xB091, ## __VA_ARGS__), \ - MACRO__(0xB092, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ - MACRO__(0xB0A1, ## __VA_ARGS__), \ - MACRO__(0xB0A2, ## __VA_ARGS__), \ MACRO__(0xB0B0, ## __VA_ARGS__) #endif /* __PCIIDS_H__ */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f62689ca861a4..892f54d3aa091 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -629,6 +629,39 @@ struct drm_xe_query_uc_fw_version { __u64 reserved; }; +/** + * struct drm_xe_query_pxp_status - query if PXP is ready + * + * If PXP is enabled and no fatal error has occurred, the status will be set to + * one of the following values: + * 0: PXP init still in progress + * 1: PXP init complete + * + * If PXP is not enabled or something has gone wrong, the query will be failed + * with one of the following error codes: + * -ENODEV: PXP not supported or disabled; + * -EIO: fatal error occurred during init, so PXP will never be enabled; + * -EINVAL: incorrect value provided as part of the query; + * -EFAULT: error copying the memory between kernel and userspace. + * + * The status can only be 0 in the first few seconds after driver load. If + * everything works as expected, the status will transition to init complete in + * less than 1 second, while in case of errors the driver might take longer to + * start returning an error code, but it should still take less than 10 seconds. + * + * The supported session type bitmask is based on the values in + * enum drm_xe_pxp_session_type. TYPE_NONE is always supported and therefore + * is not reported in the bitmask. + * + */ +struct drm_xe_query_pxp_status { + /** @status: current PXP status */ + __u32 status; + + /** @supported_session_types: bitmask of supported PXP session types */ + __u32 supported_session_types; +}; + /** * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main * structure to query device information @@ -648,6 +681,7 @@ struct drm_xe_query_uc_fw_version { * attributes. * - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY * - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES + * - %DRM_XE_DEVICE_QUERY_PXP_STATUS * * If size is set to 0, the driver fills it with the required size for * the requested type of data to query. If size is equal to the required @@ -700,6 +734,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 #define DRM_XE_DEVICE_QUERY_OA_UNITS 8 +#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9 /** @query: The type of data to query */ __u32 query; @@ -743,8 +778,23 @@ struct drm_xe_device_query { * - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This * is uncached. Scanout surfaces should likely use this. All objects * that can be placed in VRAM must use this. + * + * This ioctl supports setting the following properties via the + * %DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this object will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. Objects used with session of type + * %DRM_XE_PXP_TYPE_HWDRM will be marked as invalid if a PXP invalidation + * event occurs after their creation. Attempting to flip an invalid object + * will cause a black frame to be displayed instead. Submissions with invalid + * objects mapped in the VM will be rejected. */ struct drm_xe_gem_create { +#define DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE 0 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -811,6 +861,32 @@ struct drm_xe_gem_create { /** * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * + * The @flags can be: + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset + * for use in mmap ioctl. Writing to the returned mmap address will generate a + * PCI memory barrier with low overhead (avoiding IOCTL call as well as writing + * to VRAM which would also add overhead), acting like an MI_MEM_FENCE + * instruction. + * + * Note: The mmap size can be at most 4K, due to HW limitations. As a result + * this interface is only supported on CPU architectures that support 4K page + * size. The mmap_offset ioctl will detect this and gracefully return an + * error, where userspace is expected to have a different fallback method for + * triggering a barrier. + * + * Roughly the usage would be as follows: + * + * .. code-block:: C + * + * struct drm_xe_gem_mmap_offset mmo = { + * .handle = 0, // must be set to 0 + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, + * }; + * + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset); + * map[i] = 0xdeadbeaf; // issue barrier */ struct drm_xe_gem_mmap_offset { /** @extensions: Pointer to the first extension struct, if any */ @@ -819,7 +895,8 @@ struct drm_xe_gem_mmap_offset { /** @handle: Handle for the object being mapped. */ __u32 handle; - /** @flags: Must be zero */ +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0) + /** @flags: Flags */ __u32 flags; /** @offset: The fake offset to use for subsequent mmap call */ @@ -906,6 +983,9 @@ struct drm_xe_vm_destroy { * will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. This flag is intended to * implement VK sparse bindings. + * - %DRM_XE_VM_BIND_FLAG_CHECK_PXP - If the object is encrypted via PXP, + * reject the binding if the encryption key is no longer valid. This + * flag has no effect on BOs that are not marked as using PXP. */ struct drm_xe_vm_bind_op { /** @extensions: Pointer to the first extension struct, if any */ @@ -996,6 +1076,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) +#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) /** @flags: Bind flags */ __u32 flags; @@ -1087,6 +1168,24 @@ struct drm_xe_vm_bind { /** * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE * + * This ioctl supports setting the following properties via the + * %DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY - set the queue priority. + * CAP_SYS_NICE is required to set a value above normal. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE - set the queue timeslice + * duration in microseconds. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this queue will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. When a queue of type + * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session + * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * Given that going into a power-saving state kills PXP HWDRM sessions, + * runtime PM will be blocked while queues of this type are alive. + * All PXP queues will be killed if a PXP invalidation event occurs. + * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class * &DRM_XE_ENGINE_CLASS_RENDER. @@ -1110,7 +1209,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 - +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; @@ -1729,6 +1828,26 @@ struct drm_xe_oa_stream_info { __u64 reserved[3]; }; +/** + * enum drm_xe_pxp_session_type - Supported PXP session types. + * + * We currently only support HWDRM sessions, which are used for protected + * content that ends up being displayed, but the HW supports multiple types, so + * we might extend support in the future. + */ +enum drm_xe_pxp_session_type { + /** @DRM_XE_PXP_TYPE_NONE: PXP not used */ + DRM_XE_PXP_TYPE_NONE = 0, + /** + * @DRM_XE_PXP_TYPE_HWDRM: HWDRM sessions are used for content that ends + * up on the display. + */ + DRM_XE_PXP_TYPE_HWDRM = 1, +}; + +/* ID of the protected content session managed by Xe when PXP is active */ +#define DRM_XE_PXP_HWDRM_DEFAULT_SESSION 0xf + #if defined(__cplusplus) } #endif