diff options
Diffstat (limited to 'drivers/gpu')
53 files changed, 2157 insertions, 348 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 5ce65ccb3c08..856b14fe1c4d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -33,6 +33,7 @@ xe-y += xe_bb.o \ xe_device_sysfs.o \ xe_dma_buf.o \ xe_drm_client.o \ + xe_eu_stall.o \ xe_exec.o \ xe_exec_queue.o \ xe_execlist.o \ @@ -60,6 +61,7 @@ xe-y += xe_bb.o \ xe_guc_capture.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ + xe_guc_engine_activity.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index fee385532fb0..ec516e838ee8 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -140,6 +140,7 @@ enum xe_guc_action { XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 02a413a07382..d5d453dc927a 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -170,6 +170,7 @@ static void xe_display_fini(void *arg) intel_hpd_poll_fini(xe); intel_hdcp_component_fini(display); intel_audio_deinit(display); + intel_display_driver_remove(display); } int xe_display_init(struct xe_device *xe) @@ -184,7 +185,7 @@ int xe_display_init(struct xe_device *xe) if (err) return err; - return xe_device_add_action_or_reset(xe, xe_display_fini, xe); + return devm_add_action_or_reset(xe->drm.dev, xe_display_fini, xe); } void xe_display_register(struct xe_device *xe) @@ -209,16 +210,6 @@ void xe_display_unregister(struct xe_device *xe) intel_display_driver_unregister(display); } -void xe_display_driver_remove(struct xe_device *xe) -{ - struct intel_display *display = &xe->display; - - if (!xe->info.probe_display) - return; - - intel_display_driver_remove(display); -} - /* IRQ-related functions */ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 685dc74402fb..46e14f8dee28 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -14,7 +14,6 @@ struct drm_driver; bool xe_display_driver_probe_defer(struct pci_dev *pdev); void xe_display_driver_set_hooks(struct drm_driver *driver); -void xe_display_driver_remove(struct xe_device *xe); int xe_display_create(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c8fd3d5ca502..4f372dc2cb89 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -53,7 +53,6 @@ #define RING_CTL(base) XE_REG((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_START_UDW(base) XE_REG((base) + 0x48) diff --git a/drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h b/drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h new file mode 100644 index 000000000000..c53f57fdde65 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_EU_STALL_REGS_H_ +#define _XE_EU_STALL_REGS_H_ + +#include "regs/xe_reg_defs.h" + +#define XEHPC_EUSTALL_BASE XE_REG_MCR(0xe520) +#define XEHPC_EUSTALL_BASE_BUF_ADDR REG_GENMASK(31, 6) +#define XEHPC_EUSTALL_BASE_XECORE_BUF_SZ REG_GENMASK(5, 3) +#define XEHPC_EUSTALL_BASE_ENABLE_SAMPLING REG_BIT(1) + +#define XEHPC_EUSTALL_BASE_UPPER XE_REG_MCR(0xe524) + +#define XEHPC_EUSTALL_REPORT XE_REG_MCR(0xe528, XE_REG_OPTION_MASKED) +#define XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK REG_GENMASK(15, 2) +#define XEHPC_EUSTALL_REPORT_OVERFLOW_DROP REG_BIT(1) + +#define XEHPC_EUSTALL_REPORT1 XE_REG_MCR(0xe52c, XE_REG_OPTION_MASKED) +#define XEHPC_EUSTALL_REPORT1_READ_PTR_MASK REG_GENMASK(15, 2) + +#define XEHPC_EUSTALL_CTRL XE_REG_MCR(0xe53c, XE_REG_OPTION_MASKED) +#define EUSTALL_MOCS REG_GENMASK(9, 3) +#define EUSTALL_SAMPLE_RATE REG_GENMASK(2, 0) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 096859072396..d08dd437172f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -358,6 +358,8 @@ #define RENDER_AWAKE_STATUS REG_BIT(1) #define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0) +#define MISC_STATUS_0 XE_REG(0xa500) + #define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) #define FORCEWAKE_GSC XE_REG(0xa618) @@ -478,6 +480,7 @@ #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) #define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) +#define RES_CHK_SPR_DIS REG_BIT(6) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 6cf282618836..3abb17d2ca33 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,10 +7,6 @@ #include "regs/xe_reg_defs.h" -#define TIMESTAMP_OVERRIDE XE_REG(0x44074) -#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12) -#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0) - #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 39fe485d2085..81b9d9bb3f57 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -237,7 +237,7 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) /* * NB: Despite passing a GFP_ flags parameter here, more allocations are done - * internally using GFP_KERNEL expliictly. Hence this call must be in the worker + * internally using GFP_KERNEL explicitly. Hence this call must be in the worker * thread and not in the initial capture call. */ dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, @@ -423,11 +423,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); if (offset & 3) - drm_printf(p, "Offset not word aligned: %zu", size); + drm_printf(p, "Offset not word aligned: %zu", offset); line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); - if (IS_ERR_OR_NULL(line_buff)) { - drm_printf(p, "Failed to allocate line buffer: %pe", line_buff); + if (!line_buff) { + drm_printf(p, "Failed to allocate line buffer\n"); return; } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 64d3a26ad4a3..9454b51f7ad8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -53,7 +53,6 @@ #include "xe_pxp.h" #include "xe_query.h" #include "xe_sriov.h" -#include "xe_survivability_mode.h" #include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" @@ -65,12 +64,6 @@ #include <generated/xe_wa_oob.h> -struct xe_device_remove_action { - struct list_head node; - void (*action)(void *); - void *data; -}; - static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -667,7 +660,7 @@ static int wait_for_lmem_ready(struct xe_device *xe) } ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ -static void update_device_info(struct xe_device *xe) +static void sriov_update_device_info(struct xe_device *xe) { /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { @@ -698,15 +691,11 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); - update_device_info(xe); + sriov_update_device_info(xe); err = xe_pcode_probe_early(xe); - if (err) { - if (xe_survivability_mode_required(xe)) - xe_survivability_mode_init(xe); - + if (err) return err; - } err = wait_for_lmem_ready(xe); if (err) @@ -752,9 +741,6 @@ int xe_device_probe(struct xe_device *xe) int err; u8 id; - xe->probing = true; - INIT_LIST_HEAD(&xe->remove_action_list); - xe_pat_init_early(xe); err = xe_sriov_init(xe); @@ -762,6 +748,7 @@ int xe_device_probe(struct xe_device *xe) return err; xe->info.mem_region_mask = 1; + err = xe_set_dma_info(xe); if (err) return err; @@ -770,7 +757,9 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - xe_ttm_sys_mgr_init(xe); + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); @@ -865,7 +854,9 @@ int xe_device_probe(struct xe_device *xe) return err; } - xe_heci_gsc_init(xe); + err = xe_heci_gsc_init(xe); + if (err) + return err; err = xe_oa_init(xe); if (err) @@ -877,11 +868,11 @@ int xe_device_probe(struct xe_device *xe) err = xe_pxp_init(xe); if (err) - goto err_remove_display; + return err; err = drm_dev_register(&xe->drm, 0); if (err) - goto err_remove_display; + return err; xe_display_register(xe); @@ -904,84 +895,19 @@ int xe_device_probe(struct xe_device *xe) xe_vsec_init(xe); - xe->probing = false; - return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: xe_display_unregister(xe); -err_remove_display: - xe_display_driver_remove(xe); return err; } -/** - * xe_device_call_remove_actions - Call the remove actions - * @xe: xe device instance - * - * This is only to be used by xe_pci and xe_device to call the remove actions - * while removing the driver or handling probe failures. - */ -void xe_device_call_remove_actions(struct xe_device *xe) -{ - struct xe_device_remove_action *ra, *tmp; - - list_for_each_entry_safe(ra, tmp, &xe->remove_action_list, node) { - ra->action(ra->data); - list_del(&ra->node); - kfree(ra); - } - - xe->probing = false; -} - -/** - * xe_device_add_action_or_reset - Add an action to run on driver removal - * @xe: xe device instance - * @action: Function that should be called on device remove - * @data: Pointer to data passed to @action implementation - * - * This adds a custom action to the list of remove callbacks executed on device - * remove, before any dev or drm managed resources are removed. This is only - * needed if the action leads to component_del()/component_master_del() since - * that is not compatible with devres cleanup. - * - * Returns: 0 on success or a negative error code on failure, in which case - * @action is already called. - */ -int xe_device_add_action_or_reset(struct xe_device *xe, - void (*action)(void *), void *data) -{ - struct xe_device_remove_action *ra; - - drm_WARN_ON(&xe->drm, !xe->probing); - - ra = kmalloc(sizeof(*ra), GFP_KERNEL); - if (!ra) { - action(data); - return -ENOMEM; - } - - INIT_LIST_HEAD(&ra->node); - ra->action = action; - ra->data = data; - list_add(&ra->node, &xe->remove_action_list); - - return 0; -} - void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); drm_dev_unplug(&xe->drm); - - xe_display_driver_remove(xe); - - xe_heci_gsc_fini(xe); - - xe_device_call_remove_actions(xe); } void xe_device_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 079dad32a6f5..0bc3bc8e6803 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -45,9 +45,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, const struct pci_device_id *ent); int xe_device_probe_early(struct xe_device *xe); int xe_device_probe(struct xe_device *xe); -int xe_device_add_action_or_reset(struct xe_device *xe, - void (*action)(void *), void *data); -void xe_device_call_remove_actions(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 7375937934fa..7efbd4c52791 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -32,9 +32,6 @@ vram_d3cold_threshold_show(struct device *dev, struct xe_device *xe = pdev_to_xe_device(pdev); int ret; - if (!xe) - return -EINVAL; - xe_pm_runtime_get(xe); ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); xe_pm_runtime_put(xe); @@ -51,9 +48,6 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, u32 vram_d3cold_threshold; int ret; - if (!xe) - return -EINVAL; - ret = kstrtou32(buff, 0, &vram_d3cold_threshold); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 4656305dd45a..833c29fed3a3 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -429,20 +429,6 @@ struct xe_device { struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; /** - * @remove_action_list: list of actions to execute on device remove. - * Use xe_device_add_remove_action() for that. Actions can only be added - * during probe and are executed during the call from PCI subsystem to - * remove the driver from the device. - */ - struct list_head remove_action_list; - - /** - * @probing: cover the section in which @remove_action_list can be used - * to post cleaning actions - */ - bool probing; - - /** * @mem_access: keep track of memory access in the device, possibly * triggering additional actions when they occur. */ diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c new file mode 100644 index 000000000000..88a92baf5c95 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -0,0 +1,960 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/anon_inodes.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/types.h> + +#include <drm/drm_drv.h> +#include <generated/xe_wa_oob.h> +#include <uapi/drm/xe_drm.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_eu_stall.h" +#include "xe_force_wake.h" +#include "xe_gt_mcr.h" +#include "xe_gt_printk.h" +#include "xe_gt_topology.h" +#include "xe_macros.h" +#include "xe_observation.h" +#include "xe_pm.h" +#include "xe_trace.h" +#include "xe_wa.h" + +#include "regs/xe_eu_stall_regs.h" +#include "regs/xe_gt_regs.h" + +#define POLL_PERIOD_MS 5 + +static size_t per_xecore_buf_size = SZ_512K; + +struct per_xecore_buf { + /* Buffer vaddr */ + u8 *vaddr; + /* Write pointer */ + u32 write; + /* Read pointer */ + u32 read; +}; + +struct xe_eu_stall_data_stream { + bool pollin; + bool enabled; + int wait_num_reports; + int sampling_rate_mult; + wait_queue_head_t poll_wq; + size_t data_record_size; + size_t per_xecore_buf_size; + + struct xe_gt *gt; + struct xe_bo *bo; + struct per_xecore_buf *xecore_buf; + struct { + bool reported_to_user; + xe_dss_mask_t mask; + } data_drop; + struct delayed_work buf_poll_work; +}; + +struct xe_eu_stall_gt { + /* Lock to protect stream */ + struct mutex stream_lock; + /* EU stall data stream */ + struct xe_eu_stall_data_stream *stream; + /* Workqueue to schedule buffer pointers polling work */ + struct workqueue_struct *buf_ptr_poll_wq; +}; + +/** + * struct eu_stall_open_properties - EU stall sampling properties received + * from user space at open. + * @sampling_rate_mult: EU stall sampling rate multiplier. + * HW will sample every (sampling_rate_mult x 251) cycles. + * @wait_num_reports: Minimum number of EU stall data reports to unblock poll(). + * @gt: GT on which EU stall data will be captured. + */ +struct eu_stall_open_properties { + int sampling_rate_mult; + int wait_num_reports; + struct xe_gt *gt; +}; + +/* + * EU stall data format for PVC + */ +struct xe_eu_stall_data_pvc { + __u64 ip_addr:29; /* Bits 0 to 28 */ + __u64 active_count:8; /* Bits 29 to 36 */ + __u64 other_count:8; /* Bits 37 to 44 */ + __u64 control_count:8; /* Bits 45 to 52 */ + __u64 pipestall_count:8; /* Bits 53 to 60 */ + __u64 send_count:8; /* Bits 61 to 68 */ + __u64 dist_acc_count:8; /* Bits 69 to 76 */ + __u64 sbid_count:8; /* Bits 77 to 84 */ + __u64 sync_count:8; /* Bits 85 to 92 */ + __u64 inst_fetch_count:8; /* Bits 93 to 100 */ + __u64 unused_bits:27; + __u64 unused[6]; +} __packed; + +/* + * EU stall data format for Xe2 arch GPUs (LNL, BMG). + */ +struct xe_eu_stall_data_xe2 { + __u64 ip_addr:29; /* Bits 0 to 28 */ + __u64 tdr_count:8; /* Bits 29 to 36 */ + __u64 other_count:8; /* Bits 37 to 44 */ + __u64 control_count:8; /* Bits 45 to 52 */ + __u64 pipestall_count:8; /* Bits 53 to 60 */ + __u64 send_count:8; /* Bits 61 to 68 */ + __u64 dist_acc_count:8; /* Bits 69 to 76 */ + __u64 sbid_count:8; /* Bits 77 to 84 */ + __u64 sync_count:8; /* Bits 85 to 92 */ + __u64 inst_fetch_count:8; /* Bits 93 to 100 */ + __u64 active_count:8; /* Bits 101 to 108 */ + __u64 ex_id:3; /* Bits 109 to 111 */ + __u64 end_flag:1; /* Bit 112 */ + __u64 unused_bits:15; + __u64 unused[6]; +} __packed; + +const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; + +/** + * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information. + * + * @num_rates: Pointer to a u32 to return the number of sampling rates. + * @rates: double u64 pointer to point to an array of sampling rates. + * + * Stores the number of sampling rates and pointer to the array of + * sampling rates in the input pointers. + * + * Returns: Size of the EU stall sampling rates array. + */ +size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates) +{ + *num_rates = ARRAY_SIZE(eu_stall_sampling_rates); + *rates = eu_stall_sampling_rates; + + return sizeof(eu_stall_sampling_rates); +} + +/** + * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size. + * + * Returns: The per XeCore buffer size used to allocate the per GT + * EU stall data buffer. + */ +size_t xe_eu_stall_get_per_xecore_buf_size(void) +{ + return per_xecore_buf_size; +} + +/** + * xe_eu_stall_data_record_size - get EU stall data record size. + * + * @xe: Pointer to a Xe device. + * + * Returns: EU stall data record size. + */ +size_t xe_eu_stall_data_record_size(struct xe_device *xe) +{ + size_t record_size = 0; + + if (xe->info.platform == XE_PVC) + record_size = sizeof(struct xe_eu_stall_data_pvc); + else if (GRAPHICS_VER(xe) >= 20) + record_size = sizeof(struct xe_eu_stall_data_xe2); + + xe_assert(xe, is_power_of_2(record_size)); + + return record_size; +} + +/** + * num_data_rows - Return the number of EU stall data rows of 64B each + * for a given data size. + * + * @data_size: EU stall data size + */ +static u32 num_data_rows(u32 data_size) +{ + return data_size >> 6; +} + +static void xe_eu_stall_fini(void *arg) +{ + struct xe_gt *gt = arg; + + destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); + mutex_destroy(>->eu_stall->stream_lock); + kfree(gt->eu_stall); +} + +/** + * xe_eu_stall_init() - Allocate and initialize GT level EU stall data + * structure xe_eu_stall_gt within struct xe_gt. + * + * @gt: GT being initialized. + * + * Returns: zero on success or a negative error code. + */ +int xe_eu_stall_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int ret; + + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); + if (!gt->eu_stall) { + ret = -ENOMEM; + goto exit; + } + + mutex_init(>->eu_stall->stream_lock); + + gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0); + if (!gt->eu_stall->buf_ptr_poll_wq) { + ret = -ENOMEM; + goto exit_free; + } + + ret = devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); + if (ret) + goto exit_destroy; + + return 0; +exit_destroy: + destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); +exit_free: + mutex_destroy(>->eu_stall->stream_lock); + kfree(gt->eu_stall); +exit: + return ret; +} + +static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value, + struct eu_stall_open_properties *props) +{ + value = div_u64(value, 251); + if (value == 0 || value > 7) { + drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value); + return -EINVAL; + } + props->sampling_rate_mult = value; + return 0; +} + +static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, + struct eu_stall_open_properties *props) +{ + props->wait_num_reports = value; + + return 0; +} + +static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, + struct eu_stall_open_properties *props) +{ + if (value >= xe->info.gt_count) { + drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); + return -EINVAL; + } + props->gt = xe_device_get_gt(xe, value); + return 0; +} + +typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value, + struct eu_stall_open_properties *props); + +static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = { + [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate, + [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports, + [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id, +}; + +static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension, + struct eu_stall_open_properties *props) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) || + XE_IOCTL_DBG(xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs)); + return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props); +} + +typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension, + struct eu_stall_open_properties *props); +static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = { + [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 5 +static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, + int ext_number, struct eu_stall_open_properties *props) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs)); + err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props); + if (XE_IOCTL_DBG(xe, err)) + return err; + + if (ext.next_extension) + return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props); + + return 0; +} + +/** + * buf_data_size - Calculate the number of bytes in a circular buffer + * given the read and write pointers and the size of + * the buffer. + * + * @buf_size: Size of the circular buffer + * @read_ptr: Read pointer with an additional overflow bit + * @write_ptr: Write pointer with an additional overflow bit + * + * Since the read and write pointers have an additional overflow bit, + * this function calculates the offsets from the pointers and use the + * offsets to calculate the data size in the buffer. + * + * Returns: number of bytes of data in the buffer + */ +static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr) +{ + u32 read_offset, write_offset, size = 0; + + if (read_ptr == write_ptr) + goto exit; + + read_offset = read_ptr & (buf_size - 1); + write_offset = write_ptr & (buf_size - 1); + + if (write_offset > read_offset) + size = write_offset - read_offset; + else + size = buf_size - read_offset + write_offset; +exit: + return size; +} + +/** + * eu_stall_data_buf_poll - Poll for EU stall data in the buffer. + * + * @stream: xe EU stall data stream instance + * + * Returns: true if the EU stall buffer contains minimum stall data as + * specified by the event report count, else false. + */ +static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) +{ + u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0; + u32 buf_size = stream->per_xecore_buf_size; + struct per_xecore_buf *xecore_buf; + struct xe_gt *gt = stream->gt; + bool min_data_present = false; + u16 group, instance; + unsigned int xecore; + + mutex_lock(>->eu_stall->stream_lock); + for_each_dss_steering(xecore, gt, group, instance) { + xecore_buf = &stream->xecore_buf[xecore]; + read_ptr = xecore_buf->read; + write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, + group, instance); + write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); + write_ptr <<= 6; + write_ptr &= ((buf_size << 1) - 1); + if (!min_data_present) { + total_data += buf_data_size(buf_size, read_ptr, write_ptr); + if (num_data_rows(total_data) >= stream->wait_num_reports) + min_data_present = true; + } + if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) + set_bit(xecore, stream->data_drop.mask); + xecore_buf->write = write_ptr; + } + mutex_unlock(>->eu_stall->stream_lock); + + return min_data_present; +} + +static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 write_ptr_reg; + + /* On PVC, the overflow bit has to be cleared by writing 1 to it. + * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it. + */ + if (GRAPHICS_VER(xe) >= 20) + write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); + else + write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); + + xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance); +} + +static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream, + char __user *buf, size_t count, + size_t *total_data_size, struct xe_gt *gt, + u16 group, u16 instance, unsigned int xecore) +{ + size_t read_data_size, copy_size, buf_size; + u32 read_ptr_reg, read_ptr, write_ptr; + u8 *xecore_start_vaddr, *read_vaddr; + struct per_xecore_buf *xecore_buf; + u32 read_offset, write_offset; + + /* Hardware increments the read and write pointers such that they can + * overflow into one additional bit. For example, a 256KB size buffer + * offset pointer needs 18 bits. But HW uses 19 bits for the read and + * write pointers. This technique avoids wasting a slot in the buffer. + * Read and write offsets are calculated from the pointers in order to + * check if the write pointer has wrapped around the array. + */ + xecore_buf = &stream->xecore_buf[xecore]; + xecore_start_vaddr = xecore_buf->vaddr; + read_ptr = xecore_buf->read; + write_ptr = xecore_buf->write; + buf_size = stream->per_xecore_buf_size; + + read_data_size = buf_data_size(buf_size, read_ptr, write_ptr); + /* Read only the data that the user space buffer can accommodate */ + read_data_size = min_t(size_t, count - *total_data_size, read_data_size); + if (read_data_size == 0) + goto exit_drop; + + read_offset = read_ptr & (buf_size - 1); + write_offset = write_ptr & (buf_size - 1); + read_vaddr = xecore_start_vaddr + read_offset; + + if (write_offset > read_offset) { + if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size)) + return -EFAULT; + } else { + if (read_data_size >= buf_size - read_offset) + copy_size = buf_size - read_offset; + else + copy_size = read_data_size; + if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size)) + return -EFAULT; + if (copy_to_user(buf + *total_data_size + copy_size, + xecore_start_vaddr, read_data_size - copy_size)) + return -EFAULT; + } + + *total_data_size += read_data_size; + read_ptr += read_data_size; + + /* Read pointer can overflow into one additional bit */ + read_ptr &= (buf_size << 1) - 1; + read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6)); + read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); + xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); + xecore_buf->read = read_ptr; + trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr, + read_data_size, *total_data_size); +exit_drop: + /* Clear drop bit (if set) after any data was read or if the buffer was empty. + * Drop bit can be set even if the buffer is empty as the buffer may have been emptied + * in the previous read() and the data drop bit was set during the previous read(). + */ + if (test_bit(xecore, stream->data_drop.mask)) { + clear_dropped_eviction_line_bit(gt, group, instance); + clear_bit(xecore, stream->data_drop.mask); + } + return 0; +} + +/** + * xe_eu_stall_stream_read_locked - copy EU stall counters data from the + * per xecore buffers to the userspace buffer + * @stream: A stream opened for EU stall count metrics + * @file: An xe EU stall data stream file + * @buf: destination buffer given by userspace + * @count: the number of bytes userspace wants to read + * + * Returns: Number of bytes copied or a negative error code + * If we've successfully copied any data then reporting that takes + * precedence over any internal error status, so the data isn't lost. + */ +static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream, + struct file *file, char __user *buf, + size_t count) +{ + struct xe_gt *gt = stream->gt; + size_t total_size = 0; + u16 group, instance; + unsigned int xecore; + int ret = 0; + + if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { + if (!stream->data_drop.reported_to_user) { + stream->data_drop.reported_to_user = true; + xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", + XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + return -EIO; + } + stream->data_drop.reported_to_user = false; + } + + for_each_dss_steering(xecore, gt, group, instance) { + ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size, + gt, group, instance, xecore); + if (ret || count == total_size) + break; + } + return total_size ?: (ret ?: -EAGAIN); +} + +/* + * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE + * before calling read(). + * + * Returns: The number of bytes copied or a negative error code on failure. + * -EIO if HW drops any EU stall data when the buffer is full. + */ +static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct xe_eu_stall_data_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + ssize_t ret, aligned_count; + + aligned_count = ALIGN_DOWN(count, stream->data_record_size); + if (aligned_count == 0) + return -EINVAL; + + if (!stream->enabled) { + xe_gt_dbg(gt, "EU stall data stream not enabled to read\n"); + return -EINVAL; + } + + if (!(file->f_flags & O_NONBLOCK)) { + do { + ret = wait_event_interruptible(stream->poll_wq, stream->pollin); + if (ret) + return -EINTR; + + mutex_lock(>->eu_stall->stream_lock); + ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); + mutex_unlock(>->eu_stall->stream_lock); + } while (ret == -EAGAIN); + } else { + mutex_lock(>->eu_stall->stream_lock); + ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); + mutex_unlock(>->eu_stall->stream_lock); + } + + /* + * This may not work correctly if the user buffer is very small. + * We don't want to block the next read() when there is data in the buffer + * now, but couldn't be accommodated in the small user buffer. + */ + stream->pollin = false; + + return ret; +} + +static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) +{ + struct xe_gt *gt = stream->gt; + + gt->eu_stall->stream = NULL; + kfree(stream); +} + +static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream) +{ + xe_bo_unpin_map_no_vm(stream->bo); + kfree(stream->xecore_buf); +} + +static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, + u16 last_xecore) +{ + struct xe_tile *tile = stream->gt->tile; + struct xe_bo *bo; + u32 size; + + stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL); + if (!stream->xecore_buf) + return -ENOMEM; + + size = stream->per_xecore_buf_size * last_xecore; + + bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL, + size, ~0ull, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64); + if (IS_ERR(bo)) { + kfree(stream->xecore_buf); + return PTR_ERR(bo); + } + + XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64)); + stream->bo = bo; + + return 0; +} + +static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) +{ + u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value; + struct per_xecore_buf *xecore_buf; + struct xe_gt *gt = stream->gt; + u16 group, instance; + unsigned int fw_ref; + int xecore; + + /* Take runtime pm ref and forcewake to disable RC6 */ + xe_pm_runtime_get(gt_to_xe(gt)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) { + xe_gt_err(gt, "Failed to get RENDER forcewake\n"); + xe_pm_runtime_put(gt_to_xe(gt)); + return -ETIMEDOUT; + } + + if (XE_WA(gt, 22016596838)) + xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); + + for_each_dss_steering(xecore, gt, group, instance) { + write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance); + /* Clear any drop bits set and not cleared in the previous session. */ + if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) + clear_dropped_eviction_line_bit(gt, group, instance); + write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); + read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr); + read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); + /* Initialize the read pointer to the write pointer */ + xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); + write_ptr <<= 6; + write_ptr &= (stream->per_xecore_buf_size << 1) - 1; + xecore_buf = &stream->xecore_buf[xecore]; + xecore_buf->write = write_ptr; + xecore_buf->read = write_ptr; + } + stream->data_drop.reported_to_user = false; + bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS); + + reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE, + REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) | + REG_FIELD_PREP(EUSTALL_SAMPLE_RATE, + stream->sampling_rate_mult)); + xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value); + /* GGTT addresses can never be > 32 bits */ + xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0); + reg_value = xe_bo_ggtt_addr(stream->bo); + reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ, + stream->per_xecore_buf_size / SZ_256K); + reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING; + xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value); + + return 0; +} + +static void eu_stall_data_buf_poll_work_fn(struct work_struct *work) +{ + struct xe_eu_stall_data_stream *stream = + container_of(work, typeof(*stream), buf_poll_work.work); + struct xe_gt *gt = stream->gt; + + if (eu_stall_data_buf_poll(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); + } + queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, + &stream->buf_poll_work, + msecs_to_jiffies(POLL_PERIOD_MS)); +} + +static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, + struct eu_stall_open_properties *props) +{ + unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores; + struct per_xecore_buf *xecore_buf; + struct xe_gt *gt = stream->gt; + xe_dss_mask_t all_xecores; + u16 group, instance; + u32 vaddr_offset; + int ret; + + bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS); + last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1; + + max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores); + if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) { + xe_gt_dbg(gt, "Invalid EU stall event report count %u\n", + props->wait_num_reports); + xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n", + max_wait_num_reports); + return -EINVAL; + } + + init_waitqueue_head(&stream->poll_wq); + INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); + stream->per_xecore_buf_size = per_xecore_buf_size; + stream->sampling_rate_mult = props->sampling_rate_mult; + stream->wait_num_reports = props->wait_num_reports; + stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt)); + + ret = xe_eu_stall_data_buf_alloc(stream, last_xecore); + if (ret) + return ret; + + for_each_dss_steering(xecore, gt, group, instance) { + xecore_buf = &stream->xecore_buf[xecore]; + vaddr_offset = xecore * stream->per_xecore_buf_size; + xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset; + } + return 0; +} + +static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream, + struct file *file, poll_table *wait) +{ + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + if (stream->pollin) + events |= EPOLLIN; + + return events; +} + +static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait) +{ + struct xe_eu_stall_data_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + __poll_t ret; + + mutex_lock(>->eu_stall->stream_lock); + ret = xe_eu_stall_stream_poll_locked(stream, file, wait); + mutex_unlock(>->eu_stall->stream_lock); + + return ret; +} + +static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream) +{ + struct xe_gt *gt = stream->gt; + int ret = 0; + + if (stream->enabled) + return ret; + + stream->enabled = true; + + ret = xe_eu_stall_stream_enable(stream); + + queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, + &stream->buf_poll_work, + msecs_to_jiffies(POLL_PERIOD_MS)); + return ret; +} + +static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) +{ + struct xe_gt *gt = stream->gt; + + if (!stream->enabled) + return 0; + + stream->enabled = false; + + xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0); + + cancel_delayed_work_sync(&stream->buf_poll_work); + + if (XE_WA(gt, 22016596838)) + xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, + _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + +static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case DRM_XE_OBSERVATION_IOCTL_ENABLE: + return xe_eu_stall_enable_locked(stream); + case DRM_XE_OBSERVATION_IOCTL_DISABLE: + return xe_eu_stall_disable_locked(stream); + } + + return -EINVAL; +} + +static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct xe_eu_stall_data_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + long ret; + + mutex_lock(>->eu_stall->stream_lock); + ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg); + mutex_unlock(>->eu_stall->stream_lock); + + return ret; +} + +static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) +{ + struct xe_eu_stall_data_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + + drm_dev_put(>->tile->xe->drm); + + mutex_lock(>->eu_stall->stream_lock); + xe_eu_stall_disable_locked(stream); + xe_eu_stall_data_buf_destroy(stream); + xe_eu_stall_stream_free(stream); + mutex_unlock(>->eu_stall->stream_lock); + + return 0; +} + +static const struct file_operations fops_eu_stall = { + .owner = THIS_MODULE, + .llseek = noop_llseek, + .release = xe_eu_stall_stream_close, + .poll = xe_eu_stall_stream_poll, + .read = xe_eu_stall_stream_read, + .unlocked_ioctl = xe_eu_stall_stream_ioctl, + .compat_ioctl = xe_eu_stall_stream_ioctl, +}; + +static int xe_eu_stall_stream_open_locked(struct drm_device *dev, + struct eu_stall_open_properties *props, + struct drm_file *file) +{ + struct xe_eu_stall_data_stream *stream; + struct xe_gt *gt = props->gt; + unsigned long f_flags = 0; + int ret, stream_fd; + + /* Only one session can be active at any time */ + if (gt->eu_stall->stream) { + xe_gt_dbg(gt, "EU stall sampling session already active\n"); + return -EBUSY; + } + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + return -ENOMEM; + + gt->eu_stall->stream = stream; + stream->gt = gt; + + ret = xe_eu_stall_stream_init(stream, props); + if (ret) { + xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret); + goto err_free; + } + + stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags); + if (stream_fd < 0) { + ret = stream_fd; + xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret); + goto err_destroy; + } + + /* Take a reference on the driver that will be kept with stream_fd + * until its release. + */ + drm_dev_get(>->tile->xe->drm); + + return stream_fd; + +err_destroy: + xe_eu_stall_data_buf_destroy(stream); +err_free: + xe_eu_stall_stream_free(stream); + return ret; +} + +/** + * xe_eu_stall_stream_open - Open a xe EU stall data stream fd + * + * @dev: DRM device pointer + * @data: pointer to first struct @drm_xe_ext_set_property in + * the chain of input properties from the user space. + * @file: DRM file pointer + * + * This function opens a EU stall data stream with input properties from + * the user space. + * + * Returns: EU stall data stream fd on success or a negative error code. + */ +int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct eu_stall_open_properties props = {}; + int ret; + + if (!xe_eu_stall_supported_on_platform(xe)) { + drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); + return -ENODEV; + } + + if (xe_observation_paranoid && !perfmon_capable()) { + drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); + return -EACCES; + } + + /* Initialize and set default values */ + props.wait_num_reports = 1; + props.sampling_rate_mult = 4; + + ret = xe_eu_stall_user_extensions(xe, data, 0, &props); + if (ret) + return ret; + + if (!props.gt) { + drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n"); + return -EINVAL; + } + + mutex_lock(&props.gt->eu_stall->stream_lock); + ret = xe_eu_stall_stream_open_locked(dev, &props, file); + mutex_unlock(&props.gt->eu_stall->stream_lock); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h new file mode 100644 index 000000000000..ed9d0f233566 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __XE_EU_STALL_H__ +#define __XE_EU_STALL_H__ + +#include "xe_gt_types.h" + +size_t xe_eu_stall_get_per_xecore_buf_size(void); +size_t xe_eu_stall_data_record_size(struct xe_device *xe); +size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates); + +int xe_eu_stall_init(struct xe_gt *gt); +int xe_eu_stall_stream_open(struct drm_device *dev, + u64 data, + struct drm_file *file); + +static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) +{ + return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; +} +#endif diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index 904cf47925aa..ed9183599e31 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -28,10 +28,10 @@ "\n" \ "#endif\n" -static void print_usage(FILE *f) +static void print_usage(FILE *f, const char *progname) { fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n", - program_invocation_short_name); + progname); } static void print_parse_error(const char *err_msg, const char *line, @@ -144,7 +144,7 @@ int main(int argc, const char *argv[]) if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); - print_usage(stderr); + print_usage(stderr, argv[0]); return 1; } diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 31c90577faf0..8cf70b228ff3 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -490,7 +490,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) gsc->proxy.component_added = true; - return xe_device_add_action_or_reset(xe, xe_gsc_proxy_remove, gsc); + return devm_add_action_or_reset(xe->drm.dev, xe_gsc_proxy_remove, gsc); } /** diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 650a0ee56e97..5bd8dfdce300 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -19,6 +19,7 @@ #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_eu_stall.h" #include "xe_exec_queue.h" #include "xe_execlist.h" #include "xe_force_wake.h" @@ -613,6 +614,10 @@ int xe_gt_init(struct xe_gt *gt) xe_gt_record_user_engines(gt); + err = xe_eu_stall_init(gt); + if (err) + return err; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index cc2ae159298e..2a958c92d8ea 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -12,25 +12,10 @@ #include "xe_assert.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" -static u32 read_reference_ts_freq(struct xe_gt *gt) -{ - u32 ts_override = xe_mmio_read32(>->mmio, TIMESTAMP_OVERRIDE); - u32 base_freq, frac_freq; - - base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK, - ts_override) + 1; - base_freq *= 1000000; - - frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK, - ts_override); - frac_freq = 1000000 / (frac_freq + 1); - - return base_freq + frac_freq; -} - static u32 get_crystal_clock_freq(u32 rpm_config_reg) { const u32 f19_2_mhz = 19200000; @@ -57,26 +42,30 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg) int xe_gt_clock_init(struct xe_gt *gt) { - u32 ctc_reg = xe_mmio_read32(>->mmio, CTC_MODE); + u32 c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); u32 freq = 0; - /* Assuming gen11+ so assert this assumption is correct */ - xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11); - - if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) { - freq = read_reference_ts_freq(gt); - } else { - u32 c0 = xe_mmio_read32(>->mmio, RPM_CONFIG0); - - freq = get_crystal_clock_freq(c0); - - /* - * Now figure out how the command stream's timestamp - * register increments from this frequency (it might - * increment only every few clock cycle). - */ - freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); - } + /* + * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later + * platforms. In theory it could be a valid setting for pre-Xe2 + * platforms, but there's no documentation on how to properly handle + * this case. Reading TIMESTAMP_OVERRIDE, as the driver attempted in + * the past has been confirmed as incorrect by the hardware architects. + * + * For now just warn if we ever encounter hardware in the wild that + * has this setting and move on as if it hadn't been set. + */ + if (xe_mmio_read32(>->mmio, CTC_MODE) & CTC_SOURCE_DIVIDE_LOGIC) + xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n"); + + freq = get_crystal_clock_freq(c0); + + /* + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); gt->info.reference_clock = freq; return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 46701ca11ce0..17d69039b866 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -137,7 +137,7 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct pagefault *pf, bool atomic; xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES, xe_vma_size(vma)); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); trace_xe_vma_pagefault(vma); atomic = access_is_atomic(pf->access_type); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 6b5f849a0722..4efde5f46b43 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -114,7 +114,6 @@ static const struct xe_reg tgl_runtime_regs[] = { GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ - TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; static const struct xe_reg ats_m_runtime_regs[] = { @@ -127,7 +126,6 @@ static const struct xe_reg ats_m_runtime_regs[] = { XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ - TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; static const struct xe_reg pvc_runtime_regs[] = { @@ -140,7 +138,6 @@ static const struct xe_reg pvc_runtime_regs[] = { XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ CTC_MODE, /* _MMIO(0xA26C) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ - TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; static const struct xe_reg ver_1270_runtime_regs[] = { @@ -155,7 +152,6 @@ static const struct xe_reg ver_1270_runtime_regs[] = { XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ - TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; static const struct xe_reg ver_2000_runtime_regs[] = { @@ -173,7 +169,6 @@ static const struct xe_reg ver_2000_runtime_regs[] = { XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ CTC_MODE, /* _MMIO(0xa26c) */ HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ - TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ }; static const struct xe_reg ver_3000_runtime_regs[] = { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 4831549da319..a439261bf4d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -47,12 +47,19 @@ static int guc_action_vf_reset(struct xe_guc *guc) return ret > 0 ? -EPROTO : ret; } +#define GUC_RESET_VF_STATE_RETRY_MAX 10 static int vf_reset_guc_state(struct xe_gt *gt) { + unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX; struct xe_guc *guc = >->uc.guc; int err; - err = guc_action_vf_reset(guc); + do { + err = guc_action_vf_reset(guc); + if (!err || err != -ETIMEDOUT) + break; + } while (--retry); + if (unlikely(err)) xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 2e9879ea4674..6155ea354432 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -23,13 +23,13 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) if (id >= __XE_GT_STATS_NUM_IDS) return; - atomic_add(incr, >->stats.counters[id]); + atomic64_add(incr, >->stats.counters[id]); } static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { "tlb_inval_count", "vma_pagefault_count", - "vma_pagefault_bytes", + "vma_pagefault_kb", }; /** @@ -44,8 +44,8 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) enum xe_gt_stats_id id; for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) - drm_printf(p, "%s: %d\n", stat_description[id], - atomic_read(>->stats.counters[id])); + drm_printf(p, "%s: %lld\n", stat_description[id], + atomic64_read(>->stats.counters[id])); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index b072bd80c4b9..d556771f99d6 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -9,7 +9,7 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_TLB_INVAL, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, - XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES, + XE_GT_STATS_ID_VMA_PAGEFAULT_KB, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index 746b325bbf6e..a72d26ba0653 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -25,6 +25,19 @@ void xe_gt_topology_init(struct xe_gt *gt); void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p); +/** + * xe_gt_topology_mask_last_dss() - Returns the index of the last DSS in a mask. + * @mask: Input DSS mask + * + * Return: Index of the last DSS in the input DSS mask, + * XE_MAX_DSS_FUSE_BITS if DSS mask is empty. + */ +static inline unsigned int +xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) +{ + return find_last_bit(mask, XE_MAX_DSS_FUSE_BITS); +} + unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6e66bf0e8b3f..f67474e06fb3 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -139,7 +139,7 @@ struct xe_gt { /** @stats: GT stats */ struct { /** @stats.counters: counters for various GT stats */ - atomic_t counters[__XE_GT_STATS_NUM_IDS]; + atomic64_t counters[__XE_GT_STATS_NUM_IDS]; } stats; #endif @@ -430,6 +430,9 @@ struct xe_gt { /** @oa: oa observation subsystem per gt info */ struct xe_oa_gt oa; + + /** @eu_stall: EU stall counters subsystem per gt info */ + struct xe_eu_stall_gt *eu_stall; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 1619c0a52db9..bc1ff0a4e1e7 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -27,6 +27,7 @@ #include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_guc_db_mgr.h" +#include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" @@ -744,6 +745,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_engine_activity_init(guc); + if (ret) + return ret; + ret = xe_guc_buf_cache_init(&guc->buf); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index fab259adc380..e7c9e095a19f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -342,7 +342,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads) offset = guc_ads_waklv_offset(ads); remain = guc_ads_waklv_size(ads); - if (XE_WA(gt, 14019882105)) + if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562)) guc_waklv_enable_simple(ads, GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, &offset, &remain); diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c new file mode 100644 index 000000000000..2a457dcf31d5 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "regs/xe_gt_regs.h" + +#include "xe_bo.h" +#include "xe_force_wake.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_engine_activity.h" +#include "xe_guc_ct.h" +#include "xe_hw_engine.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_trace_guc.h" + +#define TOTAL_QUANTA 0x8000 + +static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + u16 guc_class = xe_engine_class_to_guc_class(hwe->class); + size_t offset; + + offset = offsetof(struct guc_engine_activity_data, + engine_activity[guc_class][hwe->logical_instance]); + + return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset); +} + +static struct iosys_map engine_metadata_map(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + + return buffer->metadata_bo->vmap; +} + +static int allocate_engine_activity_group(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct xe_device *xe = guc_to_xe(guc); + u32 num_activity_group = 1; /* Will be modified for VF */ + + engine_activity->eag = drmm_kcalloc(&xe->drm, num_activity_group, + sizeof(struct engine_activity_group), GFP_KERNEL); + + if (!engine_activity->eag) + return -ENOMEM; + + engine_activity->num_activity_group = num_activity_group; + + return 0; +} + +static int allocate_engine_activity_buffers(struct xe_guc *guc, + struct engine_activity_buffer *buffer) +{ + u32 metadata_size = sizeof(struct guc_engine_activity_metadata); + u32 size = sizeof(struct guc_engine_activity_data); + struct xe_gt *gt = guc_to_gt(guc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo, *metadata_bo; + + metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size), + ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + + if (IS_ERR(metadata_bo)) + return PTR_ERR(metadata_bo); + + bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size), + ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + + if (IS_ERR(bo)) { + xe_bo_unpin_map_no_vm(metadata_bo); + return PTR_ERR(bo); + } + + buffer->metadata_bo = metadata_bo; + buffer->activity_bo = bo; + return 0; +} + +static void free_engine_activity_buffers(struct engine_activity_buffer *buffer) +{ + xe_bo_unpin_map_no_vm(buffer->metadata_bo); + xe_bo_unpin_map_no_vm(buffer->activity_bo); +} + +static bool is_engine_activity_supported(struct xe_guc *guc) +{ + struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; + struct xe_uc_fw_version required = { 1, 14, 1 }; + struct xe_gt *gt = guc_to_gt(guc); + + if (IS_SRIOV_VF(gt_to_xe(gt))) { + xe_gt_info(gt, "engine activity stats not supported on VFs\n"); + return false; + } + + /* engine activity stats is supported from GuC interface version (1.14.1) */ + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER_STRUCT(required)) { + xe_gt_info(gt, + "engine activity stats unsupported in GuC interface v%u.%u.%u, need v%u.%u.%u or higher\n", + version->major, version->minor, version->patch, required.major, + required.minor, required.patch); + return false; + } + + return true; +} + +static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe) +{ + struct xe_guc *guc = &hwe->gt->uc.guc; + struct engine_activity_group *eag = &guc->engine_activity.eag[0]; + u16 guc_class = xe_engine_class_to_guc_class(hwe->class); + + return &eag->engine[guc_class][hwe->logical_instance]; +} + +static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq) +{ + return mul_u64_u32_div(ns, freq, NSEC_PER_SEC); +} + +#define read_engine_activity_record(xe_, map_, field_) \ + xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity, field_) + +#define read_metadata_record(xe_, map_, field_) \ + xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_) + +static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +{ + struct engine_activity *ea = hw_engine_to_engine_activity(hwe); + struct guc_engine_activity *cached_activity = &ea->activity; + struct guc_engine_activity_metadata *cached_metadata = &ea->metadata; + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct iosys_map activity_map, metadata_map; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 last_update_tick, global_change_num; + u64 active_ticks, gpm_ts; + u16 change_num; + + activity_map = engine_activity_map(guc, hwe); + metadata_map = engine_metadata_map(guc); + global_change_num = read_metadata_record(xe, &metadata_map, global_change_num); + + /* GuC has not initialized activity data yet, return 0 */ + if (!global_change_num) + goto update; + + if (global_change_num == cached_metadata->global_change_num) + goto update; + + cached_metadata->global_change_num = global_change_num; + change_num = read_engine_activity_record(xe, &activity_map, change_num); + + if (!change_num || change_num == cached_activity->change_num) + goto update; + + /* read engine activity values */ + last_update_tick = read_engine_activity_record(xe, &activity_map, last_update_tick); + active_ticks = read_engine_activity_record(xe, &activity_map, active_ticks); + + /* activity calculations */ + ea->running = !!last_update_tick; + ea->total += active_ticks - cached_activity->active_ticks; + ea->active = 0; + + /* cache the counter */ + cached_activity->change_num = change_num; + cached_activity->last_update_tick = last_update_tick; + cached_activity->active_ticks = active_ticks; + +update: + if (ea->running) { + gpm_ts = xe_mmio_read64_2x32(>->mmio, MISC_STATUS_0) >> + engine_activity->gpm_timestamp_shift; + ea->active = lower_32_bits(gpm_ts) - cached_activity->last_update_tick; + } + + trace_xe_guc_engine_activity(xe, ea, hwe->name, hwe->instance); + + return ea->total + ea->active; +} + +static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +{ + struct engine_activity *ea = hw_engine_to_engine_activity(hwe); + struct guc_engine_activity_metadata *cached_metadata = &ea->metadata; + struct guc_engine_activity *cached_activity = &ea->activity; + struct iosys_map activity_map, metadata_map; + struct xe_device *xe = guc_to_xe(guc); + ktime_t now, cpu_delta; + u64 numerator; + u16 quanta_ratio; + + activity_map = engine_activity_map(guc, hwe); + metadata_map = engine_metadata_map(guc); + + if (!cached_metadata->guc_tsc_frequency_hz) + cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map, + guc_tsc_frequency_hz); + + quanta_ratio = read_engine_activity_record(xe, &activity_map, quanta_ratio); + cached_activity->quanta_ratio = quanta_ratio; + + /* Total ticks calculations */ + now = ktime_get(); + cpu_delta = now - ea->last_cpu_ts; + ea->last_cpu_ts = now; + numerator = (ea->quanta_remainder_ns + cpu_delta) * cached_activity->quanta_ratio; + ea->quanta_ns += numerator / TOTAL_QUANTA; + ea->quanta_remainder_ns = numerator % TOTAL_QUANTA; + ea->quanta = cpu_ns_to_guc_tsc_tick(ea->quanta_ns, cached_metadata->guc_tsc_frequency_hz); + + trace_xe_guc_engine_activity(xe, ea, hwe->name, hwe->instance); + + return ea->quanta; +} + +static int enable_engine_activity_stats(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + u32 action[] = { + XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER, + xe_bo_ggtt_addr(buffer->metadata_bo), + 0, + xe_bo_ggtt_addr(buffer->activity_bo), + 0, + }; + + /* Blocking here to ensure the buffers are ready before reading them */ + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static void engine_activity_set_cpu_ts(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct engine_activity_group *eag = &engine_activity->eag[0]; + int i, j; + + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; j++) + eag->engine[i][j].last_cpu_ts = ktime_get(); +} + +static u32 gpm_timestamp_shift(struct xe_gt *gt) +{ + u32 reg; + + reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); + + return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); +} + +/** + * xe_guc_engine_activity_active_ticks - Get engine active ticks + * @guc: The GuC object + * @hwe: The hw_engine object + * + * Return: accumulated ticks @hwe was active since engine activity stats were enabled. + */ +u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +{ + if (!xe_guc_engine_activity_supported(guc)) + return 0; + + return get_engine_active_ticks(guc, hwe); +} + +/** + * xe_guc_engine_activity_total_ticks - Get engine total ticks + * @guc: The GuC object + * @hwe: The hw_engine object + * + * Return: accumulated quanta of ticks allocated for the engine + */ +u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe) +{ + if (!xe_guc_engine_activity_supported(guc)) + return 0; + + return get_engine_total_ticks(guc, hwe); +} + +/** + * xe_guc_engine_activity_supported - Check support for engine activity stats + * @guc: The GuC object + * + * Engine activity stats is supported from GuC interface version (1.14.1) + * + * Return: true if engine activity stats supported, false otherwise + */ +bool xe_guc_engine_activity_supported(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + + return engine_activity->supported; +} + +/** + * xe_guc_engine_activity_enable_stats - Enable engine activity stats + * @guc: The GuC object + * + * Enable engine activity stats and set initial timestamps + */ +void xe_guc_engine_activity_enable_stats(struct xe_guc *guc) +{ + int ret; + + if (!xe_guc_engine_activity_supported(guc)) + return; + + ret = enable_engine_activity_stats(guc); + if (ret) + xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret); + else + engine_activity_set_cpu_ts(guc); +} + +static void engine_activity_fini(void *arg) +{ + struct xe_guc_engine_activity *engine_activity = arg; + struct engine_activity_buffer *buffer = &engine_activity->device_buffer; + + free_engine_activity_buffers(buffer); +} + +/** + * xe_guc_engine_activity_init - Initialize the engine activity data + * @guc: The GuC object + * + * Return: 0 on success, negative error code otherwise. + */ +int xe_guc_engine_activity_init(struct xe_guc *guc) +{ + struct xe_guc_engine_activity *engine_activity = &guc->engine_activity; + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + engine_activity->supported = is_engine_activity_supported(guc); + if (!engine_activity->supported) + return 0; + + ret = allocate_engine_activity_group(guc); + if (ret) { + xe_gt_err(gt, "failed to allocate engine activity group (%pe)\n", ERR_PTR(ret)); + return ret; + } + + ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer); + if (ret) { + xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret)); + return ret; + } + + engine_activity->gpm_timestamp_shift = gpm_timestamp_shift(gt); + + return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, engine_activity_fini, + engine_activity); +} diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.h b/drivers/gpu/drm/xe/xe_guc_engine_activity.h new file mode 100644 index 000000000000..a042d4cb404c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_ENGINE_ACTIVITY_H_ +#define _XE_GUC_ENGINE_ACTIVITY_H_ + +#include <linux/types.h> + +struct xe_hw_engine; +struct xe_guc; + +int xe_guc_engine_activity_init(struct xe_guc *guc); +bool xe_guc_engine_activity_supported(struct xe_guc *guc); +void xe_guc_engine_activity_enable_stats(struct xe_guc *guc); +u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe); +u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe); +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h new file mode 100644 index 000000000000..5cdd034b6b70 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_ENGINE_ACTIVITY_TYPES_H_ +#define _XE_GUC_ENGINE_ACTIVITY_TYPES_H_ + +#include <linux/types.h> + +#include "xe_guc_fwif.h" +/** + * struct engine_activity - Engine specific activity data + * + * Contains engine specific activity data and snapshot of the + * structures from GuC + */ +struct engine_activity { + /** @active: current activity */ + u64 active; + + /** @last_cpu_ts: cpu timestamp in nsec of previous sample */ + u64 last_cpu_ts; + + /** @quanta: total quanta used on HW */ + u64 quanta; + + /** @quanta_ns: total quanta_ns used on HW */ + u64 quanta_ns; + + /** + * @quanta_remainder_ns: remainder when the CPU time is scaled as + * per the quanta_ratio. This remainder is used in subsequent + * quanta calculations. + */ + u64 quanta_remainder_ns; + + /** @total: total engine activity */ + u64 total; + + /** @running: true if engine is running some work */ + bool running; + + /** @metadata: snapshot of engine activity metadata */ + struct guc_engine_activity_metadata metadata; + + /** @activity: snapshot of engine activity counter */ + struct guc_engine_activity activity; +}; + +/** + * struct engine_activity_group - Activity data for all engines + */ +struct engine_activity_group { + /** @engine: engine specific activity data */ + struct engine_activity engine[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +}; + +/** + * struct engine_activity_buffer - engine activity buffers + * + * This contains the buffers allocated for metadata and activity data + */ +struct engine_activity_buffer { + /** @activity_bo: object allocated to hold activity data */ + struct xe_bo *activity_bo; + + /** @metadata_bo: object allocated to hold activity metadata */ + struct xe_bo *metadata_bo; +}; + +/** + * struct xe_guc_engine_activity - Data used by engine activity implementation + */ +struct xe_guc_engine_activity { + /** @gpm_timestamp_shift: Right shift value for the gpm timestamp */ + u32 gpm_timestamp_shift; + + /** @num_activity_group: number of activity groups */ + u32 num_activity_group; + + /** @supported: indicates support for engine activity stats */ + bool supported; + + /** @eag: holds the device level engine activity data */ + struct engine_activity_group *eag; + + /** @device_buffer: buffer object for global engine activity */ + struct engine_activity_buffer device_buffer; +}; +#endif + diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 057153f89b30..6f57578b07cb 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -208,6 +208,25 @@ struct guc_engine_usage { struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; } __packed; +/* Engine Activity stats */ +struct guc_engine_activity { + u16 change_num; + u16 quanta_ratio; + u32 last_update_tick; + u64 active_ticks; +} __packed; + +struct guc_engine_activity_data { + struct guc_engine_activity engine_activity[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; +} __packed; + +struct guc_engine_activity_metadata { + u32 guc_tsc_frequency_hz; + u32 lag_latency_usec; + u32 global_change_num; + u32 reserved; +} __packed; + /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum xe_guc_recv_message { XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 913c74d6e2ae..b6a2dd742ebd 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1248,6 +1248,8 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&ge->sched.base.work_tdr); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 573aa6308380..63bac64429a5 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -13,6 +13,7 @@ #include "xe_guc_ads_types.h" #include "xe_guc_buf_types.h" #include "xe_guc_ct_types.h" +#include "xe_guc_engine_activity_types.h" #include "xe_guc_fwif.h" #include "xe_guc_log_types.h" #include "xe_guc_pc_types.h" @@ -103,6 +104,9 @@ struct xe_guc { /** @relay: GuC Relay Communication used in SR-IOV */ struct xe_guc_relay relay; + /** @engine_activity: Device specific engine activity */ + struct xe_guc_engine_activity engine_activity; + /** * @notify_reg: Register which is written to notify GuC of H2G messages */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 06dc78d3a812..27d11e06a82b 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -89,12 +89,9 @@ static void heci_gsc_release_dev(struct device *dev) kfree(adev); } -void xe_heci_gsc_fini(struct xe_device *xe) +static void xe_heci_gsc_fini(void *arg) { - struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; - - if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) - return; + struct xe_heci_gsc *heci_gsc = arg; if (heci_gsc->adev) { struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev; @@ -106,6 +103,7 @@ void xe_heci_gsc_fini(struct xe_device *xe) if (heci_gsc->irq >= 0) irq_free_desc(heci_gsc->irq); + heci_gsc->irq = -1; } @@ -172,14 +170,14 @@ static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def * return ret; } -void xe_heci_gsc_init(struct xe_device *xe) +int xe_heci_gsc_init(struct xe_device *xe) { struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; - const struct heci_gsc_def *def; + const struct heci_gsc_def *def = NULL; int ret; if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) - return; + return 0; heci_gsc->irq = -1; @@ -191,29 +189,24 @@ void xe_heci_gsc_init(struct xe_device *xe) def = &heci_gsc_def_dg2; } else if (xe->info.platform == XE_DG1) { def = &heci_gsc_def_dg1; - } else { - drm_warn_once(&xe->drm, "Unknown platform\n"); - return; } - if (!def->name) { - drm_warn_once(&xe->drm, "HECI is not implemented!\n"); - return; + if (!def || !def->name) { + drm_warn(&xe->drm, "HECI is not implemented!\n"); + return 0; } - if (!def->use_polling && !xe_survivability_mode_enabled(xe)) { + ret = devm_add_action_or_reset(xe->drm.dev, xe_heci_gsc_fini, heci_gsc); + if (ret) + return ret; + + if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) { ret = heci_gsc_irq_setup(xe); if (ret) - goto fail; + return ret; } - ret = heci_gsc_add_device(xe, def); - if (ret) - goto fail; - - return; -fail: - xe_heci_gsc_fini(xe); + return heci_gsc_add_device(xe, def); } void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h index 48b3b1838045..745eb6783942 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.h +++ b/drivers/gpu/drm/xe/xe_heci_gsc.h @@ -33,8 +33,7 @@ struct xe_heci_gsc { int irq; }; -void xe_heci_gsc_init(struct xe_device *xe); -void xe_heci_gsc_fini(struct xe_device *xe); +int xe_heci_gsc_init(struct xe_device *xe); void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir); void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir); diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 089834467880..2e4ae61567d8 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -166,13 +166,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, { unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + unsigned long *pfns; struct xe_userptr *userptr; struct xe_vma *vma = &uvma->vma; u64 userptr_start = xe_vma_userptr(vma); u64 userptr_end = userptr_start + xe_vma_size(vma); struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range; + struct hmm_range hmm_range = { + .pfn_flags_mask = 0, /* ignore pfns */ + .default_flags = HMM_PFN_REQ_FAULT, + .start = userptr_start, + .end = userptr_end, + .notifier = &uvma->userptr.notifier, + .dev_private_owner = vm->xe, + }; bool write = !xe_vma_read_only(vma); unsigned long notifier_seq; u64 npages; @@ -199,19 +206,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, return -ENOMEM; if (write) - flags |= HMM_PFN_REQ_WRITE; + hmm_range.default_flags |= HMM_PFN_REQ_WRITE; if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto free_pfns; } - hmm_range.default_flags = flags; hmm_range.hmm_pfns = pfns; - hmm_range.notifier = &userptr->notifier; - hmm_range.start = userptr_start; - hmm_range.end = userptr_end; - hmm_range.dev_private_owner = vm->xe; while (true) { hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2c5a24a13e87..6f185632da14 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -12,6 +12,8 @@ #include <drm/drm_managed.h> #include <uapi/drm/xe_drm.h> +#include <generated/xe_wa_oob.h> + #include "abi/guc_actions_slpc_abi.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" @@ -35,6 +37,7 @@ #include "xe_sched_job.h" #include "xe_sriov.h" #include "xe_sync.h" +#include "xe_wa.h" #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) @@ -812,11 +815,8 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) struct xe_mmio *mmio = &stream->gt->mmio; u32 sqcnt1; - /* - * Wa_1508761755:xehpsdv, dg2 - * Enable thread stall DOP gating and EU DOP gating. - */ - if (stream->oa->xe->info.platform == XE_DG2) { + /* Enable thread stall DOP gating and EU DOP gating. */ + if (XE_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -1065,11 +1065,10 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) int ret; /* - * Wa_1508761755:xehpsdv, dg2 * EU NOA signals behave incorrectly if EU clock gating is enabled. * Disable thread stall DOP gating and EU DOP gating. */ - if (stream->oa->xe->info.platform == XE_DG2) { + if (XE_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -1690,7 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; + stream->periodic = param->period_exponent >= 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; stream->wait_num_reports = param->wait_num_reports; @@ -1720,12 +1719,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, } /* - * Wa_1509372804:pvc - * * GuC reset of engines causes OA to lose configuration * state. Prevent this by overriding GUCRC mode. */ - if (stream->oa->xe->info.platform == XE_PVC) { + if (XE_WA(stream->gt, 1509372804)) { ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) @@ -1857,23 +1854,14 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt) { u32 reg, shift; - /* - * Wa_18013179988:dg2 - * Wa_14015568240:pvc - * Wa_14015846243:mtl - */ - switch (gt_to_xe(gt)->info.platform) { - case XE_DG2: - case XE_PVC: - case XE_METEORLAKE: + if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) { xe_pm_runtime_get(gt_to_xe(gt)); reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); xe_pm_runtime_put(gt_to_xe(gt)); shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); return gt->info.reference_clock << (3 - shift); - - default: + } else { return gt->info.reference_clock; } } @@ -1971,6 +1959,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f } param.xef = xef; + param.period_exponent = -1; ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -2025,7 +2014,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - if (param.period_exponent > 0) { + if (param.period_exponent >= 0) { u64 oa_period, oa_freq_hz; /* Requesting samples from OAG buffer is a privileged operation */ diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index 57cf01efc07f..e3f9b546207e 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -8,6 +8,7 @@ #include <uapi/drm/xe_drm.h> +#include "xe_eu_stall.h" #include "xe_oa.h" #include "xe_observation.h" @@ -29,6 +30,17 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_observation_param * } } +static int xe_eu_stall_ioctl(struct drm_device *dev, struct drm_xe_observation_param *arg, + struct drm_file *file) +{ + switch (arg->observation_op) { + case DRM_XE_OBSERVATION_OP_STREAM_OPEN: + return xe_eu_stall_stream_open(dev, arg->param, file); + default: + return -EINVAL; + } +} + /** * xe_observation_ioctl - The top level observation layer ioctl * @dev: @drm_device @@ -51,6 +63,8 @@ int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *fi switch (arg->observation_type) { case DRM_XE_OBSERVATION_TYPE_OA: return xe_oa_ioctl(dev, arg, file); + case DRM_XE_OBSERVATION_TYPE_EU_STALL: + return xe_eu_stall_ioctl(dev, arg, file); default: return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f8417f4d8ce6..8b6658b214be 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -765,21 +765,16 @@ static int xe_info_init(struct xe_device *xe, static void xe_pci_remove(struct pci_dev *pdev) { - struct xe_device *xe; - - xe = pdev_to_xe_device(pdev); - if (!xe) /* driver load aborted, nothing to cleanup */ - return; + struct xe_device *xe = pdev_to_xe_device(pdev); if (IS_SRIOV_PF(xe)) xe_pci_sriov_configure(pdev, 0); - if (xe_survivability_mode_enabled(xe)) - return xe_survivability_mode_remove(xe); + if (xe_survivability_mode_is_enabled(xe)) + return; xe_device_remove(xe); xe_pm_runtime_fini(xe); - pci_set_drvdata(pdev, NULL); } /* @@ -851,13 +846,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = xe_device_probe_early(xe); /* - * In Boot Survivability mode, no drm card is exposed - * and driver is loaded with bare minimum to allow - * for firmware to be flashed through mei. Return - * success if survivability mode is enabled. + * In Boot Survivability mode, no drm card is exposed and driver is + * loaded with bare minimum to allow for firmware to be flashed through + * mei. If early probe fails, check if survivability mode is flagged by + * HW to be enabled. In that case enable it and return success. */ if (err) { - if (xe_survivability_mode_enabled(xe)) + if (xe_survivability_mode_required(xe) && + xe_survivability_mode_enable(xe)) return 0; return err; @@ -900,10 +896,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe(xe); - if (err) { - xe_device_call_remove_actions(xe); + if (err) return err; - } err = xe_pm_init(xe); if (err) @@ -953,7 +947,7 @@ static int xe_pci_suspend(struct device *dev) struct xe_device *xe = pdev_to_xe_device(pdev); int err; - if (xe_survivability_mode_enabled(xe)) + if (xe_survivability_mode_is_enabled(xe)) return -EBUSY; err = xe_pm_suspend(xe); diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index aaceee748287..09ee8a06fe2e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -62,6 +62,55 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_control_trigger_flr(gt, n); } +static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* caller must use pci_dev_put() */ + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vf_id)); +} + +static void pf_link_vfs(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev); + struct device_link *link; + struct pci_dev *pdev_vf; + unsigned int n; + + /* + * When both PF and VF devices are enabled on the host, during system + * resume they are resuming in parallel. + * + * But PF has to complete the provision of VF first to allow any VFs to + * successfully resume. + * + * Create a parent-child device link between PF and VF devices that will + * enforce correct resume order. + */ + for (n = 1; n <= num_vfs; n++) { + pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + + /* unlikely, something weird is happening, abort */ + if (!pdev_vf) { + xe_sriov_err(xe, "Cannot find VF%u device, aborting link%s creation!\n", + n, str_plural(num_vfs)); + break; + } + + link = device_link_add(&pdev_vf->dev, &pdev_pf->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + /* unlikely and harmless, continue with other VFs */ + if (!link) + xe_sriov_notice(xe, "Failed linking VF%u\n", n); + + pci_dev_put(pdev_vf); + } +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -92,6 +141,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + pf_link_vfs(xe, num_vfs); + xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); return num_vfs; diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 3910a82328ee..4f62a6e515d6 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -7,16 +7,18 @@ #include <linux/device.h> #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt_idle.h" +#include "xe_guc_engine_activity.h" +#include "xe_hw_engine.h" #include "xe_pm.h" #include "xe_pmu.h" /** * DOC: Xe PMU (Performance Monitoring Unit) * - * Expose events/counters like GT-C6 residency and GT frequency to user land via - * the perf interface. Events are per device. The GT can be selected with an - * extra config sub-field (bits 60-63). + * Expose events/counters like GT-C6 residency, GT frequency and per-class-engine + * activity to user land via the perf interface. Events are per device. * * All events are listed in sysfs: * @@ -24,7 +26,18 @@ * $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/events/ * $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/format/ * - * The format directory has info regarding the configs that can be used. + * The following format parameters are available to read events, + * but only few are valid with each event: + * + * gt[60:63] Selects gt for the event + * engine_class[20:27] Selects engine-class for event + * engine_instance[12:19] Selects the engine-instance for the event + * + * For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be + * set as populated by DRM_XE_DEVICE_QUERY_ENGINES. + * + * For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0. + * * The standard perf tool can be used to grep for a certain event as well. * Example: * @@ -35,20 +48,34 @@ * $ perf stat -e <event_name,gt=> -I <interval> */ -#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60) -#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0) +#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60) +#define XE_PMU_EVENT_ENGINE_CLASS_MASK GENMASK_ULL(27, 20) +#define XE_PMU_EVENT_ENGINE_INSTANCE_MASK GENMASK_ULL(19, 12) +#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0) static unsigned int config_to_event_id(u64 config) { return FIELD_GET(XE_PMU_EVENT_ID_MASK, config); } +static unsigned int config_to_engine_class(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config); +} + +static unsigned int config_to_engine_instance(u64 config) +{ + return FIELD_GET(XE_PMU_EVENT_ENGINE_INSTANCE_MASK, config); +} + static unsigned int config_to_gt_id(u64 config) { return FIELD_GET(XE_PMU_EVENT_GT_MASK, config); } -#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01 +#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01 +#define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS 0x02 +#define XE_PMU_EVENT_ENGINE_TOTAL_TICKS 0x03 static struct xe_gt *event_to_gt(struct perf_event *event) { @@ -58,6 +85,59 @@ static struct xe_gt *event_to_gt(struct perf_event *event) return xe_device_get_gt(xe, gt); } +static struct xe_hw_engine *event_to_hwe(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct drm_xe_engine_class_instance eci; + u64 config = event->attr.config; + struct xe_hw_engine *hwe; + + eci.engine_class = config_to_engine_class(config); + eci.engine_instance = config_to_engine_instance(config); + eci.gt_id = config_to_gt_id(config); + + hwe = xe_hw_engine_lookup(xe, eci); + if (!hwe || xe_hw_engine_is_reserved(hwe)) + return NULL; + + return hwe; +} + +static bool is_engine_event(u64 config) +{ + unsigned int event_id = config_to_event_id(config); + + return (event_id == XE_PMU_EVENT_ENGINE_TOTAL_TICKS || + event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); +} + +static bool event_gt_forcewake(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + u64 config = event->attr.config; + struct xe_gt *gt; + unsigned int *fw_ref; + + if (!is_engine_event(config)) + return true; + + gt = xe_device_get_gt(xe, config_to_gt_id(config)); + + fw_ref = kzalloc(sizeof(*fw_ref), GFP_KERNEL); + if (!fw_ref) + return false; + + *fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!*fw_ref) { + kfree(fw_ref); + return false; + } + + event->pmu_private = fw_ref; + + return true; +} + static bool event_supported(struct xe_pmu *pmu, unsigned int gt, unsigned int id) { @@ -68,9 +148,47 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt, pmu->supported_events & BIT_ULL(id); } +static bool event_param_valid(struct perf_event *event) +{ + struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + unsigned int engine_class, engine_instance; + u64 config = event->attr.config; + struct xe_gt *gt; + + gt = xe_device_get_gt(xe, config_to_gt_id(config)); + if (!gt) + return false; + + engine_class = config_to_engine_class(config); + engine_instance = config_to_engine_instance(config); + + switch (config_to_event_id(config)) { + case XE_PMU_EVENT_GT_C6_RESIDENCY: + if (engine_class || engine_instance) + return false; + break; + case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS: + case XE_PMU_EVENT_ENGINE_TOTAL_TICKS: + if (!event_to_hwe(event)) + return false; + break; + } + + return true; +} + static void xe_pmu_event_destroy(struct perf_event *event) { struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base); + struct xe_gt *gt; + unsigned int *fw_ref = event->pmu_private; + + if (fw_ref) { + gt = xe_device_get_gt(xe, config_to_gt_id(event->attr.config)); + xe_force_wake_put(gt_to_fw(gt), *fw_ref); + kfree(fw_ref); + event->pmu_private = NULL; + } drm_WARN_ON(&xe->drm, event->parent); xe_pm_runtime_put(xe); @@ -104,15 +222,37 @@ static int xe_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) return -EOPNOTSUPP; + if (!event_param_valid(event)) + return -ENOENT; + if (!event->parent) { drm_dev_get(&xe->drm); xe_pm_runtime_get(xe); + if (!event_gt_forcewake(event)) { + xe_pm_runtime_put(xe); + drm_dev_put(&xe->drm); + return -EINVAL; + } event->destroy = xe_pmu_event_destroy; } return 0; } +static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event) +{ + struct xe_hw_engine *hwe; + u64 val = 0; + + hwe = event_to_hwe(event); + if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS) + val = xe_guc_engine_activity_active_ticks(>->uc.guc, hwe); + else + val = xe_guc_engine_activity_total_ticks(>->uc.guc, hwe); + + return val; +} + static u64 __xe_pmu_event_read(struct perf_event *event) { struct xe_gt *gt = event_to_gt(event); @@ -123,6 +263,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event) switch (config_to_event_id(event->attr.config)) { case XE_PMU_EVENT_GT_C6_RESIDENCY: return xe_gt_idle_residency_msec(>->gtidle); + case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS: + case XE_PMU_EVENT_ENGINE_TOTAL_TICKS: + return read_engine_events(gt, event); } return 0; @@ -207,11 +350,15 @@ static void xe_pmu_event_del(struct perf_event *event, int flags) xe_pmu_event_stop(event, PERF_EF_UPDATE); } -PMU_FORMAT_ATTR(gt, "config:60-63"); -PMU_FORMAT_ATTR(event, "config:0-11"); +PMU_FORMAT_ATTR(gt, "config:60-63"); +PMU_FORMAT_ATTR(engine_class, "config:20-27"); +PMU_FORMAT_ATTR(engine_instance, "config:12-19"); +PMU_FORMAT_ATTR(event, "config:0-11"); static struct attribute *pmu_format_attrs[] = { &format_attr_event.attr, + &format_attr_engine_class.attr, + &format_attr_engine_instance.attr, &format_attr_gt.attr, NULL, }; @@ -270,6 +417,8 @@ static ssize_t event_attr_show(struct device *dev, XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr) XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms"); +XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); +XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS); static struct attribute *pmu_empty_event_attrs[] = { /* Empty - all events are added as groups with .attr_update() */ @@ -283,15 +432,23 @@ static const struct attribute_group pmu_events_attr_group = { static const struct attribute_group *pmu_events_attr_update[] = { &pmu_group_gt_c6_residency, + &pmu_group_engine_active_ticks, + &pmu_group_engine_total_ticks, NULL, }; static void set_supported_events(struct xe_pmu *pmu) { struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, 0); if (!xe->info.skip_guc_pc) pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY); + + if (xe_guc_engine_activity_supported(>->uc.guc)) { + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS); + pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_TOTAL_TICKS); + } } /** diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 3cd3f83e86b0..47499ca02693 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -132,14 +132,6 @@ static int pxp_wait_for_session_state(struct xe_pxp *pxp, u32 id, bool in_play) static void pxp_invalidate_queues(struct xe_pxp *pxp); -static void pxp_invalidate_state(struct xe_pxp *pxp) -{ - pxp_invalidate_queues(pxp); - - if (pxp->status == XE_PXP_ACTIVE) - pxp->key_instance++; -} - static int pxp_terminate_hw(struct xe_pxp *pxp) { struct xe_gt *gt = pxp->gt; @@ -193,7 +185,8 @@ static void pxp_terminate(struct xe_pxp *pxp) mutex_lock(&pxp->mutex); - pxp_invalidate_state(pxp); + if (pxp->status == XE_PXP_ACTIVE) + pxp->key_instance++; /* * we'll mark the status as needing termination on resume, so no need to @@ -220,6 +213,8 @@ static void pxp_terminate(struct xe_pxp *pxp) mutex_unlock(&pxp->mutex); + pxp_invalidate_queues(pxp); + ret = pxp_terminate_hw(pxp); if (ret) { drm_err(&xe->drm, "PXP termination failed: %pe\n", ERR_PTR(ret)); @@ -665,23 +660,15 @@ out: return ret; } -/** - * xe_pxp_exec_queue_remove - remove a queue from the PXP list - * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) - * @q: the queue to remove from the list - * - * If PXP is enabled and the exec_queue is in the list, the queue will be - * removed from the list and its PM reference will be released. It is safe to - * call this function multiple times for the same queue. - */ -void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q) +static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock) { bool need_pm_put = false; if (!xe_pxp_is_enabled(pxp)) return; - spin_lock_irq(&pxp->queues.lock); + if (lock) + spin_lock_irq(&pxp->queues.lock); if (!list_empty(&q->pxp.link)) { list_del_init(&q->pxp.link); @@ -690,36 +677,54 @@ void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q) q->pxp.type = DRM_XE_PXP_TYPE_NONE; - spin_unlock_irq(&pxp->queues.lock); + if (lock) + spin_unlock_irq(&pxp->queues.lock); if (need_pm_put) xe_pm_runtime_put(pxp->xe); } +/** + * xe_pxp_exec_queue_remove - remove a queue from the PXP list + * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled) + * @q: the queue to remove from the list + * + * If PXP is enabled and the exec_queue is in the list, the queue will be + * removed from the list and its PM reference will be released. It is safe to + * call this function multiple times for the same queue. + */ +void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q) +{ + __pxp_exec_queue_remove(pxp, q, true); +} + static void pxp_invalidate_queues(struct xe_pxp *pxp) { struct xe_exec_queue *tmp, *q; + LIST_HEAD(to_clean); spin_lock_irq(&pxp->queues.lock); - /* - * Removing a queue from the PXP list requires a put of the RPM ref that - * the queue holds to keep the PXP session alive, which can't be done - * under spinlock. Since it is safe to kill a queue multiple times, we - * can leave the invalid queue in the list for now and postpone the - * removal and associated RPM put to when the queue is destroyed. - */ - list_for_each_entry(tmp, &pxp->queues.list, pxp.link) { - q = xe_exec_queue_get_unless_zero(tmp); - + list_for_each_entry_safe(q, tmp, &pxp->queues.list, pxp.link) { + q = xe_exec_queue_get_unless_zero(q); if (!q) continue; + list_move_tail(&q->pxp.link, &to_clean); + } + spin_unlock_irq(&pxp->queues.lock); + + list_for_each_entry_safe(q, tmp, &to_clean, pxp.link) { xe_exec_queue_kill(q); + + /* + * We hold a ref to the queue so there is no risk of racing with + * the calls to exec_queue_remove coming from exec_queue_destroy. + */ + __pxp_exec_queue_remove(pxp, q, false); + xe_exec_queue_put(q); } - - spin_unlock_irq(&pxp->queues.lock); } /** @@ -816,6 +821,7 @@ int xe_pxp_obj_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj) */ int xe_pxp_pm_suspend(struct xe_pxp *pxp) { + bool needs_queue_inval = false; int ret = 0; if (!xe_pxp_is_enabled(pxp)) @@ -848,7 +854,8 @@ wait_for_activation: break; fallthrough; case XE_PXP_ACTIVE: - pxp_invalidate_state(pxp); + pxp->key_instance++; + needs_queue_inval = true; break; default: drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u", @@ -865,6 +872,9 @@ wait_for_activation: mutex_unlock(&pxp->mutex); + if (needs_queue_inval) + pxp_invalidate_queues(pxp); + /* * if there is a termination in progress, wait for it. * We need to wait outside the lock because the completion is done from diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index ebfae746f861..781dd21682e5 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -16,6 +16,7 @@ #include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_eu_stall.h" #include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_ggtt.h" @@ -729,6 +730,47 @@ static int query_pxp_status(struct xe_device *xe, struct drm_xe_device_query *qu return 0; } +static int query_eu_stall(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + struct drm_xe_query_eu_stall *info; + size_t size, array_size; + const u64 *rates; + u32 num_rates; + int ret; + + if (!xe_eu_stall_supported_on_platform(xe)) { + drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); + return -ENODEV; + } + + array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates); + size = sizeof(struct drm_xe_query_eu_stall) + array_size; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + info = kzalloc(size, GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->num_sampling_rates = num_rates; + info->capabilities = DRM_XE_EU_STALL_CAPS_BASE; + info->record_size = xe_eu_stall_data_record_size(xe); + info->per_xecore_buf_size = xe_eu_stall_get_per_xecore_buf_size(); + memcpy(info->sampling_rates, rates, array_size); + + ret = copy_to_user(query_ptr, info, size); + kfree(info); + + return ret ? -EFAULT : 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -741,6 +783,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_uc_fw_version, query_oa_units, query_pxp_status, + query_eu_stall, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 0c230ee53bba..d2f604aa96fa 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -177,6 +177,10 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 flags; + if (XE_WA(gt, 14016712196)) + i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, + LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); + flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 02b4eadf8407..d939ce70e6fa 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -127,40 +127,55 @@ static ssize_t survivability_mode_show(struct device *dev, static DEVICE_ATTR_ADMIN_RO(survivability_mode); -static void enable_survivability_mode(struct pci_dev *pdev) +static void xe_survivability_mode_fini(void *arg) +{ + struct xe_device *xe = arg; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct device *dev = &pdev->dev; + + sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); +} + +static int enable_survivability_mode(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct xe_device *xe = pdev_to_xe_device(pdev); struct xe_survivability *survivability = &xe->survivability; int ret = 0; - /* set survivability mode */ - survivability->mode = true; - dev_info(dev, "In Survivability Mode\n"); - /* create survivability mode sysfs */ ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); if (ret) { dev_warn(dev, "Failed to create survivability sysfs files\n"); - return; + return ret; } - xe_heci_gsc_init(xe); + ret = devm_add_action_or_reset(xe->drm.dev, + xe_survivability_mode_fini, xe); + if (ret) + return ret; + + ret = xe_heci_gsc_init(xe); + if (ret) + return ret; xe_vsec_init(xe); + + survivability->mode = true; + dev_err(dev, "In Survivability Mode\n"); + + return 0; } /** - * xe_survivability_mode_enabled - check if survivability mode is enabled + * xe_survivability_mode_is_enabled - check if survivability mode is enabled * @xe: xe device instance * * Returns true if in survivability mode, false otherwise */ -bool xe_survivability_mode_enabled(struct xe_device *xe) +bool xe_survivability_mode_is_enabled(struct xe_device *xe) { - struct xe_survivability *survivability = &xe->survivability; - - return survivability->mode; + return xe->survivability.mode; } /** @@ -183,35 +198,19 @@ bool xe_survivability_mode_required(struct xe_device *xe) data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); - return (survivability->boot_status == NON_CRITICAL_FAILURE || - survivability->boot_status == CRITICAL_FAILURE); + return survivability->boot_status == NON_CRITICAL_FAILURE || + survivability->boot_status == CRITICAL_FAILURE; } /** - * xe_survivability_mode_remove - remove survivability mode + * xe_survivability_mode_enable - Initialize and enable the survivability mode * @xe: xe device instance * - * clean up sysfs entries of survivability mode - */ -void xe_survivability_mode_remove(struct xe_device *xe) -{ - struct xe_survivability *survivability = &xe->survivability; - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - struct device *dev = &pdev->dev; - - sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); - xe_heci_gsc_fini(xe); - kfree(survivability->info); - pci_set_drvdata(pdev, NULL); -} - -/** - * xe_survivability_mode_init - Initialize the survivability mode - * @xe: xe device instance + * Initialize survivability information and enable survivability mode * - * Initializes survivability information and enables survivability mode + * Return: 0 for success, negative error code otherwise. */ -void xe_survivability_mode_init(struct xe_device *xe) +int xe_survivability_mode_enable(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; struct xe_survivability_info *info; @@ -219,9 +218,10 @@ void xe_survivability_mode_init(struct xe_device *xe) survivability->size = MAX_SCRATCH_MMIO; - info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL); + info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), + GFP_KERNEL); if (!info) - return; + return -ENOMEM; survivability->info = info; @@ -230,9 +230,8 @@ void xe_survivability_mode_init(struct xe_device *xe) /* Only log debug information and exit if it is a critical failure */ if (survivability->boot_status == CRITICAL_FAILURE) { log_survivability_info(pdev); - kfree(survivability->info); - return; + return -ENXIO; } - enable_survivability_mode(pdev); + return enable_survivability_mode(pdev); } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index f530507a22c6..f4df5f9025ce 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -10,9 +10,8 @@ struct xe_device; -void xe_survivability_mode_init(struct xe_device *xe); -void xe_survivability_mode_remove(struct xe_device *xe); -bool xe_survivability_mode_enabled(struct xe_device *xe); +int xe_survivability_mode_enable(struct xe_device *xe); +bool xe_survivability_mode_is_enabled(struct xe_device *xe); bool xe_survivability_mode_required(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index d5281de04d54..b4a3577df70c 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -427,6 +427,36 @@ DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl, TP_ARGS(xe, caller) ); +TRACE_EVENT(xe_eu_stall_data_read, + TP_PROTO(u8 slice, u8 subslice, + u32 read_ptr, u32 write_ptr, + size_t read_size, size_t total_size), + TP_ARGS(slice, subslice, + read_ptr, write_ptr, + read_size, total_size), + + TP_STRUCT__entry(__field(u8, slice) + __field(u8, subslice) + __field(u32, read_ptr) + __field(u32, write_ptr) + __field(size_t, read_size) + __field(size_t, total_size) + ), + + TP_fast_assign(__entry->slice = slice; + __entry->subslice = subslice; + __entry->read_ptr = read_ptr; + __entry->write_ptr = write_ptr; + __entry->read_size = read_size; + __entry->total_size = total_size; + ), + + TP_printk("slice: %u subslice: %u read ptr: 0x%x write ptr: 0x%x read size: %zu total read size: %zu", + __entry->slice, __entry->subslice, + __entry->read_ptr, __entry->write_ptr, + __entry->read_size, __entry->total_size) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_trace_guc.h b/drivers/gpu/drm/xe/xe_trace_guc.h index 23abdd55dc62..78949db9cfce 100644 --- a/drivers/gpu/drm/xe/xe_trace_guc.h +++ b/drivers/gpu/drm/xe/xe_trace_guc.h @@ -14,6 +14,7 @@ #include "xe_device_types.h" #include "xe_guc_exec_queue_types.h" +#include "xe_guc_engine_activity_types.h" #define __dev_name_xe(xe) dev_name((xe)->drm.dev) @@ -100,6 +101,54 @@ DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, ); +TRACE_EVENT(xe_guc_engine_activity, + TP_PROTO(struct xe_device *xe, struct engine_activity *ea, const char *name, + u16 instance), + TP_ARGS(xe, ea, name, instance), + + TP_STRUCT__entry( + __string(dev, __dev_name_xe(xe)) + __string(name, name) + __field(u32, global_change_num) + __field(u32, guc_tsc_frequency_hz) + __field(u32, lag_latency_usec) + __field(u16, instance) + __field(u16, change_num) + __field(u16, quanta_ratio) + __field(u32, last_update_tick) + __field(u64, active_ticks) + __field(u64, active) + __field(u64, total) + __field(u64, quanta) + __field(u64, last_cpu_ts) + ), + + TP_fast_assign( + __assign_str(dev); + __assign_str(name); + __entry->global_change_num = ea->metadata.global_change_num; + __entry->guc_tsc_frequency_hz = ea->metadata.guc_tsc_frequency_hz; + __entry->lag_latency_usec = ea->metadata.lag_latency_usec; + __entry->instance = instance; + __entry->change_num = ea->activity.change_num; + __entry->quanta_ratio = ea->activity.quanta_ratio; + __entry->last_update_tick = ea->activity.last_update_tick; + __entry->active_ticks = ea->activity.active_ticks; + __entry->active = ea->active; + __entry->total = ea->total; + __entry->quanta = ea->quanta; + __entry->last_cpu_ts = ea->last_cpu_ts; + ), + + TP_printk("dev=%s engine %s:%d Active=%llu, quanta=%llu, last_cpu_ts=%llu\n" + "Activity metadata: global_change_num=%u, guc_tsc_frequency_hz=%u lag_latency_usec=%u\n" + "Activity data: change_num=%u, quanta_ratio=0x%x, last_update_tick=%u, active_ticks=%llu\n", + __get_str(dev), __get_str(name), __entry->instance, + (__entry->active + __entry->total), __entry->quanta, __entry->last_cpu_ts, + __entry->global_change_num, __entry->guc_tsc_frequency_hz, + __entry->lag_latency_usec, __entry->change_num, __entry->quanta_ratio, + __entry->last_update_tick, __entry->active_ticks) +); #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index d8167e818280..c14bd2282044 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -14,6 +14,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_guc_pc.h" +#include "xe_guc_engine_activity.h" #include "xe_huc.h" #include "xe_sriov.h" #include "xe_uc_fw.h" @@ -210,6 +211,8 @@ int xe_uc_init_hw(struct xe_uc *uc) if (ret) return ret; + xe_guc_engine_activity_enable_stats(&uc->guc); + /* We don't fail the driver load if HuC fails to auth, but let's warn */ ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC); xe_gt_assert(uc_to_gt(uc), !ret); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d664f2e418b2..996000f2424e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -660,27 +660,39 @@ int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; int err = 0; - LIST_HEAD(tmp_evict); xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); lockdep_assert_held_write(&vm->lock); /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); - /* Pin and move to temporary list */ + /* Pin and move to bind list */ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); if (err == -EFAULT) { list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); /* Wait for pending binds */ xe_vm_lock(vm, false); @@ -691,10 +703,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm) err = xe_vm_invalidate_vma(&uvma->vma); xe_vm_unlock(vm); if (err) - return err; + break; } else { - if (err < 0) - return err; + if (err) + break; list_del_init(&uvma->userptr.repin_link); list_move_tail(&uvma->vma.combined_links.rebind, @@ -702,7 +714,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } } - return 0; + if (err) { + down_write(&vm->userptr.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->userptr.notifier_lock); + } + return err; } /** @@ -1067,6 +1091,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d4982799383c..b797c863bbe5 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -619,6 +619,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 228436532282..e0c5fa460487 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -5,6 +5,7 @@ 22011391025 PLATFORM(DG2) 22012727170 SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) +22016596838 PLATFORM(PVC) 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) @@ -43,3 +44,12 @@ no_media_l3 MEDIA_VERSION(3000) 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) +16021333562 GRAPHICS_VERSION_RANGE(1200, 1274) + MEDIA_VERSION(1300) +14016712196 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 1274) +14015568240 GRAPHICS_VERSION_RANGE(1255, 1260) +18013179988 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 1274) +1508761755 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) |