summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/xe/Makefile2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c13
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h29
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h4
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c8
-rw-r--r--drivers/gpu/drm/xe/xe_device.c98
-rw-r--r--drivers/gpu/drm/xe/xe_device.h3
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c6
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h14
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c960
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.h24
-rw-r--r--drivers/gpu/drm/xe/xe_gen_wa_oob.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c57
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c373
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.h19
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity_types.h92
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h19
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c39
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.h3
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c18
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c35
-rw-r--r--drivers/gpu/drm/xe/xe_observation.c14
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c28
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c51
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.c175
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c78
-rw-r--r--drivers/gpu/drm/xe/xe_query.c43
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c4
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c77
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.h5
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h30
-rw-r--r--drivers/gpu/drm/xe/xe_trace_guc.h49
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c3
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c41
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c5
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules10
53 files changed, 2157 insertions, 348 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 5ce65ccb3c08..856b14fe1c4d 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -33,6 +33,7 @@ xe-y += xe_bb.o \
xe_device_sysfs.o \
xe_dma_buf.o \
xe_drm_client.o \
+ xe_eu_stall.o \
xe_exec.o \
xe_exec_queue.o \
xe_execlist.o \
@@ -60,6 +61,7 @@ xe-y += xe_bb.o \
xe_guc_capture.o \
xe_guc_ct.o \
xe_guc_db_mgr.o \
+ xe_guc_engine_activity.o \
xe_guc_hwconfig.o \
xe_guc_id_mgr.o \
xe_guc_klv_helpers.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index fee385532fb0..ec516e838ee8 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -140,6 +140,7 @@ enum xe_guc_action {
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+ XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 02a413a07382..d5d453dc927a 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -170,6 +170,7 @@ static void xe_display_fini(void *arg)
intel_hpd_poll_fini(xe);
intel_hdcp_component_fini(display);
intel_audio_deinit(display);
+ intel_display_driver_remove(display);
}
int xe_display_init(struct xe_device *xe)
@@ -184,7 +185,7 @@ int xe_display_init(struct xe_device *xe)
if (err)
return err;
- return xe_device_add_action_or_reset(xe, xe_display_fini, xe);
+ return devm_add_action_or_reset(xe->drm.dev, xe_display_fini, xe);
}
void xe_display_register(struct xe_device *xe)
@@ -209,16 +210,6 @@ void xe_display_unregister(struct xe_device *xe)
intel_display_driver_unregister(display);
}
-void xe_display_driver_remove(struct xe_device *xe)
-{
- struct intel_display *display = &xe->display;
-
- if (!xe->info.probe_display)
- return;
-
- intel_display_driver_remove(display);
-}
-
/* IRQ-related functions */
void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 685dc74402fb..46e14f8dee28 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -14,7 +14,6 @@ struct drm_driver;
bool xe_display_driver_probe_defer(struct pci_dev *pdev);
void xe_display_driver_set_hooks(struct drm_driver *driver);
-void xe_display_driver_remove(struct xe_device *xe);
int xe_display_create(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index c8fd3d5ca502..4f372dc2cb89 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -53,7 +53,6 @@
#define RING_CTL(base) XE_REG((base) + 0x3c)
#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
-#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */
#define RING_START_UDW(base) XE_REG((base) + 0x48)
diff --git a/drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h b/drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h
new file mode 100644
index 000000000000..c53f57fdde65
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_eu_stall_regs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_EU_STALL_REGS_H_
+#define _XE_EU_STALL_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XEHPC_EUSTALL_BASE XE_REG_MCR(0xe520)
+#define XEHPC_EUSTALL_BASE_BUF_ADDR REG_GENMASK(31, 6)
+#define XEHPC_EUSTALL_BASE_XECORE_BUF_SZ REG_GENMASK(5, 3)
+#define XEHPC_EUSTALL_BASE_ENABLE_SAMPLING REG_BIT(1)
+
+#define XEHPC_EUSTALL_BASE_UPPER XE_REG_MCR(0xe524)
+
+#define XEHPC_EUSTALL_REPORT XE_REG_MCR(0xe528, XE_REG_OPTION_MASKED)
+#define XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK REG_GENMASK(15, 2)
+#define XEHPC_EUSTALL_REPORT_OVERFLOW_DROP REG_BIT(1)
+
+#define XEHPC_EUSTALL_REPORT1 XE_REG_MCR(0xe52c, XE_REG_OPTION_MASKED)
+#define XEHPC_EUSTALL_REPORT1_READ_PTR_MASK REG_GENMASK(15, 2)
+
+#define XEHPC_EUSTALL_CTRL XE_REG_MCR(0xe53c, XE_REG_OPTION_MASKED)
+#define EUSTALL_MOCS REG_GENMASK(9, 3)
+#define EUSTALL_SAMPLE_RATE REG_GENMASK(2, 0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 096859072396..d08dd437172f 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -358,6 +358,8 @@
#define RENDER_AWAKE_STATUS REG_BIT(1)
#define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0)
+#define MISC_STATUS_0 XE_REG(0xa500)
+
#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4)
#define FORCEWAKE_GSC XE_REG(0xa618)
@@ -478,6 +480,7 @@
#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
#define STK_ID_RESTRICT REG_BIT(12)
#define SLM_WMTP_RESTORE REG_BIT(11)
+#define RES_CHK_SPR_DIS REG_BIT(6)
#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
#define UGM_BACKUP_MODE REG_BIT(13)
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 6cf282618836..3abb17d2ca33 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,10 +7,6 @@
#include "regs/xe_reg_defs.h"
-#define TIMESTAMP_OVERRIDE XE_REG(0x44074)
-#define TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK REG_GENMASK(15, 12)
-#define TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK REG_GENMASK(9, 0)
-
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 39fe485d2085..81b9d9bb3f57 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -237,7 +237,7 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
/*
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
- * internally using GFP_KERNEL expliictly. Hence this call must be in the worker
+ * internally using GFP_KERNEL explicitly. Hence this call must be in the worker
* thread and not in the initial capture call.
*/
dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
@@ -423,11 +423,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi
if (size & 3)
drm_printf(p, "Size not word aligned: %zu", size);
if (offset & 3)
- drm_printf(p, "Offset not word aligned: %zu", size);
+ drm_printf(p, "Offset not word aligned: %zu", offset);
line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
- if (IS_ERR_OR_NULL(line_buff)) {
- drm_printf(p, "Failed to allocate line buffer: %pe", line_buff);
+ if (!line_buff) {
+ drm_printf(p, "Failed to allocate line buffer\n");
return;
}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 64d3a26ad4a3..9454b51f7ad8 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -53,7 +53,6 @@
#include "xe_pxp.h"
#include "xe_query.h"
#include "xe_sriov.h"
-#include "xe_survivability_mode.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
@@ -65,12 +64,6 @@
#include <generated/xe_wa_oob.h>
-struct xe_device_remove_action {
- struct list_head node;
- void (*action)(void *);
- void *data;
-};
-
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
@@ -667,7 +660,7 @@ static int wait_for_lmem_ready(struct xe_device *xe)
}
ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
-static void update_device_info(struct xe_device *xe)
+static void sriov_update_device_info(struct xe_device *xe)
{
/* disable features that are not available/applicable to VFs */
if (IS_SRIOV_VF(xe)) {
@@ -698,15 +691,11 @@ int xe_device_probe_early(struct xe_device *xe)
xe_sriov_probe_early(xe);
- update_device_info(xe);
+ sriov_update_device_info(xe);
err = xe_pcode_probe_early(xe);
- if (err) {
- if (xe_survivability_mode_required(xe))
- xe_survivability_mode_init(xe);
-
+ if (err)
return err;
- }
err = wait_for_lmem_ready(xe);
if (err)
@@ -752,9 +741,6 @@ int xe_device_probe(struct xe_device *xe)
int err;
u8 id;
- xe->probing = true;
- INIT_LIST_HEAD(&xe->remove_action_list);
-
xe_pat_init_early(xe);
err = xe_sriov_init(xe);
@@ -762,6 +748,7 @@ int xe_device_probe(struct xe_device *xe)
return err;
xe->info.mem_region_mask = 1;
+
err = xe_set_dma_info(xe);
if (err)
return err;
@@ -770,7 +757,9 @@ int xe_device_probe(struct xe_device *xe)
if (err)
return err;
- xe_ttm_sys_mgr_init(xe);
+ err = xe_ttm_sys_mgr_init(xe);
+ if (err)
+ return err;
for_each_gt(gt, xe, id) {
err = xe_gt_init_early(gt);
@@ -865,7 +854,9 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
- xe_heci_gsc_init(xe);
+ err = xe_heci_gsc_init(xe);
+ if (err)
+ return err;
err = xe_oa_init(xe);
if (err)
@@ -877,11 +868,11 @@ int xe_device_probe(struct xe_device *xe)
err = xe_pxp_init(xe);
if (err)
- goto err_remove_display;
+ return err;
err = drm_dev_register(&xe->drm, 0);
if (err)
- goto err_remove_display;
+ return err;
xe_display_register(xe);
@@ -904,84 +895,19 @@ int xe_device_probe(struct xe_device *xe)
xe_vsec_init(xe);
- xe->probing = false;
-
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_unregister_display:
xe_display_unregister(xe);
-err_remove_display:
- xe_display_driver_remove(xe);
return err;
}
-/**
- * xe_device_call_remove_actions - Call the remove actions
- * @xe: xe device instance
- *
- * This is only to be used by xe_pci and xe_device to call the remove actions
- * while removing the driver or handling probe failures.
- */
-void xe_device_call_remove_actions(struct xe_device *xe)
-{
- struct xe_device_remove_action *ra, *tmp;
-
- list_for_each_entry_safe(ra, tmp, &xe->remove_action_list, node) {
- ra->action(ra->data);
- list_del(&ra->node);
- kfree(ra);
- }
-
- xe->probing = false;
-}
-
-/**
- * xe_device_add_action_or_reset - Add an action to run on driver removal
- * @xe: xe device instance
- * @action: Function that should be called on device remove
- * @data: Pointer to data passed to @action implementation
- *
- * This adds a custom action to the list of remove callbacks executed on device
- * remove, before any dev or drm managed resources are removed. This is only
- * needed if the action leads to component_del()/component_master_del() since
- * that is not compatible with devres cleanup.
- *
- * Returns: 0 on success or a negative error code on failure, in which case
- * @action is already called.
- */
-int xe_device_add_action_or_reset(struct xe_device *xe,
- void (*action)(void *), void *data)
-{
- struct xe_device_remove_action *ra;
-
- drm_WARN_ON(&xe->drm, !xe->probing);
-
- ra = kmalloc(sizeof(*ra), GFP_KERNEL);
- if (!ra) {
- action(data);
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD(&ra->node);
- ra->action = action;
- ra->data = data;
- list_add(&ra->node, &xe->remove_action_list);
-
- return 0;
-}
-
void xe_device_remove(struct xe_device *xe)
{
xe_display_unregister(xe);
drm_dev_unplug(&xe->drm);
-
- xe_display_driver_remove(xe);
-
- xe_heci_gsc_fini(xe);
-
- xe_device_call_remove_actions(xe);
}
void xe_device_shutdown(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 079dad32a6f5..0bc3bc8e6803 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -45,9 +45,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
const struct pci_device_id *ent);
int xe_device_probe_early(struct xe_device *xe);
int xe_device_probe(struct xe_device *xe);
-int xe_device_add_action_or_reset(struct xe_device *xe,
- void (*action)(void *), void *data);
-void xe_device_call_remove_actions(struct xe_device *xe);
void xe_device_remove(struct xe_device *xe);
void xe_device_shutdown(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 7375937934fa..7efbd4c52791 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -32,9 +32,6 @@ vram_d3cold_threshold_show(struct device *dev,
struct xe_device *xe = pdev_to_xe_device(pdev);
int ret;
- if (!xe)
- return -EINVAL;
-
xe_pm_runtime_get(xe);
ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
xe_pm_runtime_put(xe);
@@ -51,9 +48,6 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
u32 vram_d3cold_threshold;
int ret;
- if (!xe)
- return -EINVAL;
-
ret = kstrtou32(buff, 0, &vram_d3cold_threshold);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 4656305dd45a..833c29fed3a3 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -429,20 +429,6 @@ struct xe_device {
struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
/**
- * @remove_action_list: list of actions to execute on device remove.
- * Use xe_device_add_remove_action() for that. Actions can only be added
- * during probe and are executed during the call from PCI subsystem to
- * remove the driver from the device.
- */
- struct list_head remove_action_list;
-
- /**
- * @probing: cover the section in which @remove_action_list can be used
- * to post cleaning actions
- */
- bool probing;
-
- /**
* @mem_access: keep track of memory access in the device, possibly
* triggering additional actions when they occur.
*/
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
new file mode 100644
index 000000000000..88a92baf5c95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -0,0 +1,960 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/types.h>
+
+#include <drm/drm_drv.h>
+#include <generated/xe_wa_oob.h>
+#include <uapi/drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_eu_stall.h"
+#include "xe_force_wake.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_topology.h"
+#include "xe_macros.h"
+#include "xe_observation.h"
+#include "xe_pm.h"
+#include "xe_trace.h"
+#include "xe_wa.h"
+
+#include "regs/xe_eu_stall_regs.h"
+#include "regs/xe_gt_regs.h"
+
+#define POLL_PERIOD_MS 5
+
+static size_t per_xecore_buf_size = SZ_512K;
+
+struct per_xecore_buf {
+ /* Buffer vaddr */
+ u8 *vaddr;
+ /* Write pointer */
+ u32 write;
+ /* Read pointer */
+ u32 read;
+};
+
+struct xe_eu_stall_data_stream {
+ bool pollin;
+ bool enabled;
+ int wait_num_reports;
+ int sampling_rate_mult;
+ wait_queue_head_t poll_wq;
+ size_t data_record_size;
+ size_t per_xecore_buf_size;
+
+ struct xe_gt *gt;
+ struct xe_bo *bo;
+ struct per_xecore_buf *xecore_buf;
+ struct {
+ bool reported_to_user;
+ xe_dss_mask_t mask;
+ } data_drop;
+ struct delayed_work buf_poll_work;
+};
+
+struct xe_eu_stall_gt {
+ /* Lock to protect stream */
+ struct mutex stream_lock;
+ /* EU stall data stream */
+ struct xe_eu_stall_data_stream *stream;
+ /* Workqueue to schedule buffer pointers polling work */
+ struct workqueue_struct *buf_ptr_poll_wq;
+};
+
+/**
+ * struct eu_stall_open_properties - EU stall sampling properties received
+ * from user space at open.
+ * @sampling_rate_mult: EU stall sampling rate multiplier.
+ * HW will sample every (sampling_rate_mult x 251) cycles.
+ * @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
+ * @gt: GT on which EU stall data will be captured.
+ */
+struct eu_stall_open_properties {
+ int sampling_rate_mult;
+ int wait_num_reports;
+ struct xe_gt *gt;
+};
+
+/*
+ * EU stall data format for PVC
+ */
+struct xe_eu_stall_data_pvc {
+ __u64 ip_addr:29; /* Bits 0 to 28 */
+ __u64 active_count:8; /* Bits 29 to 36 */
+ __u64 other_count:8; /* Bits 37 to 44 */
+ __u64 control_count:8; /* Bits 45 to 52 */
+ __u64 pipestall_count:8; /* Bits 53 to 60 */
+ __u64 send_count:8; /* Bits 61 to 68 */
+ __u64 dist_acc_count:8; /* Bits 69 to 76 */
+ __u64 sbid_count:8; /* Bits 77 to 84 */
+ __u64 sync_count:8; /* Bits 85 to 92 */
+ __u64 inst_fetch_count:8; /* Bits 93 to 100 */
+ __u64 unused_bits:27;
+ __u64 unused[6];
+} __packed;
+
+/*
+ * EU stall data format for Xe2 arch GPUs (LNL, BMG).
+ */
+struct xe_eu_stall_data_xe2 {
+ __u64 ip_addr:29; /* Bits 0 to 28 */
+ __u64 tdr_count:8; /* Bits 29 to 36 */
+ __u64 other_count:8; /* Bits 37 to 44 */
+ __u64 control_count:8; /* Bits 45 to 52 */
+ __u64 pipestall_count:8; /* Bits 53 to 60 */
+ __u64 send_count:8; /* Bits 61 to 68 */
+ __u64 dist_acc_count:8; /* Bits 69 to 76 */
+ __u64 sbid_count:8; /* Bits 77 to 84 */
+ __u64 sync_count:8; /* Bits 85 to 92 */
+ __u64 inst_fetch_count:8; /* Bits 93 to 100 */
+ __u64 active_count:8; /* Bits 101 to 108 */
+ __u64 ex_id:3; /* Bits 109 to 111 */
+ __u64 end_flag:1; /* Bit 112 */
+ __u64 unused_bits:15;
+ __u64 unused[6];
+} __packed;
+
+const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
+
+/**
+ * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
+ *
+ * @num_rates: Pointer to a u32 to return the number of sampling rates.
+ * @rates: double u64 pointer to point to an array of sampling rates.
+ *
+ * Stores the number of sampling rates and pointer to the array of
+ * sampling rates in the input pointers.
+ *
+ * Returns: Size of the EU stall sampling rates array.
+ */
+size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
+{
+ *num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
+ *rates = eu_stall_sampling_rates;
+
+ return sizeof(eu_stall_sampling_rates);
+}
+
+/**
+ * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
+ *
+ * Returns: The per XeCore buffer size used to allocate the per GT
+ * EU stall data buffer.
+ */
+size_t xe_eu_stall_get_per_xecore_buf_size(void)
+{
+ return per_xecore_buf_size;
+}
+
+/**
+ * xe_eu_stall_data_record_size - get EU stall data record size.
+ *
+ * @xe: Pointer to a Xe device.
+ *
+ * Returns: EU stall data record size.
+ */
+size_t xe_eu_stall_data_record_size(struct xe_device *xe)
+{
+ size_t record_size = 0;
+
+ if (xe->info.platform == XE_PVC)
+ record_size = sizeof(struct xe_eu_stall_data_pvc);
+ else if (GRAPHICS_VER(xe) >= 20)
+ record_size = sizeof(struct xe_eu_stall_data_xe2);
+
+ xe_assert(xe, is_power_of_2(record_size));
+
+ return record_size;
+}
+
+/**
+ * num_data_rows - Return the number of EU stall data rows of 64B each
+ * for a given data size.
+ *
+ * @data_size: EU stall data size
+ */
+static u32 num_data_rows(u32 data_size)
+{
+ return data_size >> 6;
+}
+
+static void xe_eu_stall_fini(void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
+ mutex_destroy(&gt->eu_stall->stream_lock);
+ kfree(gt->eu_stall);
+}
+
+/**
+ * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
+ * structure xe_eu_stall_gt within struct xe_gt.
+ *
+ * @gt: GT being initialized.
+ *
+ * Returns: zero on success or a negative error code.
+ */
+int xe_eu_stall_init(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int ret;
+
+ gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
+ if (!gt->eu_stall) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ mutex_init(&gt->eu_stall->stream_lock);
+
+ gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
+ if (!gt->eu_stall->buf_ptr_poll_wq) {
+ ret = -ENOMEM;
+ goto exit_free;
+ }
+
+ ret = devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
+ if (ret)
+ goto exit_destroy;
+
+ return 0;
+exit_destroy:
+ destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
+exit_free:
+ mutex_destroy(&gt->eu_stall->stream_lock);
+ kfree(gt->eu_stall);
+exit:
+ return ret;
+}
+
+static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
+ struct eu_stall_open_properties *props)
+{
+ value = div_u64(value, 251);
+ if (value == 0 || value > 7) {
+ drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
+ return -EINVAL;
+ }
+ props->sampling_rate_mult = value;
+ return 0;
+}
+
+static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
+ struct eu_stall_open_properties *props)
+{
+ props->wait_num_reports = value;
+
+ return 0;
+}
+
+static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
+ struct eu_stall_open_properties *props)
+{
+ if (value >= xe->info.gt_count) {
+ drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
+ return -EINVAL;
+ }
+ props->gt = xe_device_get_gt(xe, value);
+ return 0;
+}
+
+typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
+ struct eu_stall_open_properties *props);
+
+static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
+ [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
+ [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
+ [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
+};
+
+static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
+ struct eu_stall_open_properties *props)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_set_property ext;
+ int err;
+ u32 idx;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
+ XE_IOCTL_DBG(xe, ext.pad))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
+ return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
+}
+
+typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
+ struct eu_stall_open_properties *props);
+static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
+ [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 5
+static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
+ int ext_number, struct eu_stall_open_properties *props)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_user_extension ext;
+ int err;
+ u32 idx;
+
+ if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(xe, ext.pad) ||
+ XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
+ err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
+ if (XE_IOCTL_DBG(xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
+
+ return 0;
+}
+
+/**
+ * buf_data_size - Calculate the number of bytes in a circular buffer
+ * given the read and write pointers and the size of
+ * the buffer.
+ *
+ * @buf_size: Size of the circular buffer
+ * @read_ptr: Read pointer with an additional overflow bit
+ * @write_ptr: Write pointer with an additional overflow bit
+ *
+ * Since the read and write pointers have an additional overflow bit,
+ * this function calculates the offsets from the pointers and use the
+ * offsets to calculate the data size in the buffer.
+ *
+ * Returns: number of bytes of data in the buffer
+ */
+static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
+{
+ u32 read_offset, write_offset, size = 0;
+
+ if (read_ptr == write_ptr)
+ goto exit;
+
+ read_offset = read_ptr & (buf_size - 1);
+ write_offset = write_ptr & (buf_size - 1);
+
+ if (write_offset > read_offset)
+ size = write_offset - read_offset;
+ else
+ size = buf_size - read_offset + write_offset;
+exit:
+ return size;
+}
+
+/**
+ * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
+ *
+ * @stream: xe EU stall data stream instance
+ *
+ * Returns: true if the EU stall buffer contains minimum stall data as
+ * specified by the event report count, else false.
+ */
+static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
+{
+ u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
+ u32 buf_size = stream->per_xecore_buf_size;
+ struct per_xecore_buf *xecore_buf;
+ struct xe_gt *gt = stream->gt;
+ bool min_data_present = false;
+ u16 group, instance;
+ unsigned int xecore;
+
+ mutex_lock(&gt->eu_stall->stream_lock);
+ for_each_dss_steering(xecore, gt, group, instance) {
+ xecore_buf = &stream->xecore_buf[xecore];
+ read_ptr = xecore_buf->read;
+ write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
+ group, instance);
+ write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
+ write_ptr <<= 6;
+ write_ptr &= ((buf_size << 1) - 1);
+ if (!min_data_present) {
+ total_data += buf_data_size(buf_size, read_ptr, write_ptr);
+ if (num_data_rows(total_data) >= stream->wait_num_reports)
+ min_data_present = true;
+ }
+ if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
+ set_bit(xecore, stream->data_drop.mask);
+ xecore_buf->write = write_ptr;
+ }
+ mutex_unlock(&gt->eu_stall->stream_lock);
+
+ return min_data_present;
+}
+
+static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 write_ptr_reg;
+
+ /* On PVC, the overflow bit has to be cleared by writing 1 to it.
+ * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
+ */
+ if (GRAPHICS_VER(xe) >= 20)
+ write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
+ else
+ write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
+
+ xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
+}
+
+static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
+ char __user *buf, size_t count,
+ size_t *total_data_size, struct xe_gt *gt,
+ u16 group, u16 instance, unsigned int xecore)
+{
+ size_t read_data_size, copy_size, buf_size;
+ u32 read_ptr_reg, read_ptr, write_ptr;
+ u8 *xecore_start_vaddr, *read_vaddr;
+ struct per_xecore_buf *xecore_buf;
+ u32 read_offset, write_offset;
+
+ /* Hardware increments the read and write pointers such that they can
+ * overflow into one additional bit. For example, a 256KB size buffer
+ * offset pointer needs 18 bits. But HW uses 19 bits for the read and
+ * write pointers. This technique avoids wasting a slot in the buffer.
+ * Read and write offsets are calculated from the pointers in order to
+ * check if the write pointer has wrapped around the array.
+ */
+ xecore_buf = &stream->xecore_buf[xecore];
+ xecore_start_vaddr = xecore_buf->vaddr;
+ read_ptr = xecore_buf->read;
+ write_ptr = xecore_buf->write;
+ buf_size = stream->per_xecore_buf_size;
+
+ read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
+ /* Read only the data that the user space buffer can accommodate */
+ read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
+ if (read_data_size == 0)
+ goto exit_drop;
+
+ read_offset = read_ptr & (buf_size - 1);
+ write_offset = write_ptr & (buf_size - 1);
+ read_vaddr = xecore_start_vaddr + read_offset;
+
+ if (write_offset > read_offset) {
+ if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
+ return -EFAULT;
+ } else {
+ if (read_data_size >= buf_size - read_offset)
+ copy_size = buf_size - read_offset;
+ else
+ copy_size = read_data_size;
+ if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
+ return -EFAULT;
+ if (copy_to_user(buf + *total_data_size + copy_size,
+ xecore_start_vaddr, read_data_size - copy_size))
+ return -EFAULT;
+ }
+
+ *total_data_size += read_data_size;
+ read_ptr += read_data_size;
+
+ /* Read pointer can overflow into one additional bit */
+ read_ptr &= (buf_size << 1) - 1;
+ read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
+ read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
+ xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
+ xecore_buf->read = read_ptr;
+ trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
+ read_data_size, *total_data_size);
+exit_drop:
+ /* Clear drop bit (if set) after any data was read or if the buffer was empty.
+ * Drop bit can be set even if the buffer is empty as the buffer may have been emptied
+ * in the previous read() and the data drop bit was set during the previous read().
+ */
+ if (test_bit(xecore, stream->data_drop.mask)) {
+ clear_dropped_eviction_line_bit(gt, group, instance);
+ clear_bit(xecore, stream->data_drop.mask);
+ }
+ return 0;
+}
+
+/**
+ * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
+ * per xecore buffers to the userspace buffer
+ * @stream: A stream opened for EU stall count metrics
+ * @file: An xe EU stall data stream file
+ * @buf: destination buffer given by userspace
+ * @count: the number of bytes userspace wants to read
+ *
+ * Returns: Number of bytes copied or a negative error code
+ * If we've successfully copied any data then reporting that takes
+ * precedence over any internal error status, so the data isn't lost.
+ */
+static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
+ struct file *file, char __user *buf,
+ size_t count)
+{
+ struct xe_gt *gt = stream->gt;
+ size_t total_size = 0;
+ u16 group, instance;
+ unsigned int xecore;
+ int ret = 0;
+
+ if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
+ if (!stream->data_drop.reported_to_user) {
+ stream->data_drop.reported_to_user = true;
+ xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
+ XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
+ return -EIO;
+ }
+ stream->data_drop.reported_to_user = false;
+ }
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
+ gt, group, instance, xecore);
+ if (ret || count == total_size)
+ break;
+ }
+ return total_size ?: (ret ?: -EAGAIN);
+}
+
+/*
+ * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
+ * before calling read().
+ *
+ * Returns: The number of bytes copied or a negative error code on failure.
+ * -EIO if HW drops any EU stall data when the buffer is full.
+ */
+static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct xe_eu_stall_data_stream *stream = file->private_data;
+ struct xe_gt *gt = stream->gt;
+ ssize_t ret, aligned_count;
+
+ aligned_count = ALIGN_DOWN(count, stream->data_record_size);
+ if (aligned_count == 0)
+ return -EINVAL;
+
+ if (!stream->enabled) {
+ xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
+ return -EINVAL;
+ }
+
+ if (!(file->f_flags & O_NONBLOCK)) {
+ do {
+ ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
+ if (ret)
+ return -EINTR;
+
+ mutex_lock(&gt->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
+ mutex_unlock(&gt->eu_stall->stream_lock);
+ } while (ret == -EAGAIN);
+ } else {
+ mutex_lock(&gt->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
+ mutex_unlock(&gt->eu_stall->stream_lock);
+ }
+
+ /*
+ * This may not work correctly if the user buffer is very small.
+ * We don't want to block the next read() when there is data in the buffer
+ * now, but couldn't be accommodated in the small user buffer.
+ */
+ stream->pollin = false;
+
+ return ret;
+}
+
+static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
+{
+ struct xe_gt *gt = stream->gt;
+
+ gt->eu_stall->stream = NULL;
+ kfree(stream);
+}
+
+static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
+{
+ xe_bo_unpin_map_no_vm(stream->bo);
+ kfree(stream->xecore_buf);
+}
+
+static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
+ u16 last_xecore)
+{
+ struct xe_tile *tile = stream->gt->tile;
+ struct xe_bo *bo;
+ u32 size;
+
+ stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
+ if (!stream->xecore_buf)
+ return -ENOMEM;
+
+ size = stream->per_xecore_buf_size * last_xecore;
+
+ bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
+ size, ~0ull, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
+ if (IS_ERR(bo)) {
+ kfree(stream->xecore_buf);
+ return PTR_ERR(bo);
+ }
+
+ XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
+ stream->bo = bo;
+
+ return 0;
+}
+
+static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
+{
+ u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
+ struct per_xecore_buf *xecore_buf;
+ struct xe_gt *gt = stream->gt;
+ u16 group, instance;
+ unsigned int fw_ref;
+ int xecore;
+
+ /* Take runtime pm ref and forcewake to disable RC6 */
+ xe_pm_runtime_get(gt_to_xe(gt));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
+ xe_gt_err(gt, "Failed to get RENDER forcewake\n");
+ xe_pm_runtime_put(gt_to_xe(gt));
+ return -ETIMEDOUT;
+ }
+
+ if (XE_WA(gt, 22016596838))
+ xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
+ /* Clear any drop bits set and not cleared in the previous session. */
+ if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
+ clear_dropped_eviction_line_bit(gt, group, instance);
+ write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
+ read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
+ read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
+ /* Initialize the read pointer to the write pointer */
+ xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
+ write_ptr <<= 6;
+ write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
+ xecore_buf = &stream->xecore_buf[xecore];
+ xecore_buf->write = write_ptr;
+ xecore_buf->read = write_ptr;
+ }
+ stream->data_drop.reported_to_user = false;
+ bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
+
+ reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
+ REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
+ REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
+ stream->sampling_rate_mult));
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
+ /* GGTT addresses can never be > 32 bits */
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
+ reg_value = xe_bo_ggtt_addr(stream->bo);
+ reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
+ stream->per_xecore_buf_size / SZ_256K);
+ reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
+
+ return 0;
+}
+
+static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
+{
+ struct xe_eu_stall_data_stream *stream =
+ container_of(work, typeof(*stream), buf_poll_work.work);
+ struct xe_gt *gt = stream->gt;
+
+ if (eu_stall_data_buf_poll(stream)) {
+ stream->pollin = true;
+ wake_up(&stream->poll_wq);
+ }
+ queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
+ &stream->buf_poll_work,
+ msecs_to_jiffies(POLL_PERIOD_MS));
+}
+
+static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
+ struct eu_stall_open_properties *props)
+{
+ unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
+ struct per_xecore_buf *xecore_buf;
+ struct xe_gt *gt = stream->gt;
+ xe_dss_mask_t all_xecores;
+ u16 group, instance;
+ u32 vaddr_offset;
+ int ret;
+
+ bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+ XE_MAX_DSS_FUSE_BITS);
+ num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
+ last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
+
+ max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
+ if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
+ xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
+ props->wait_num_reports);
+ xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
+ max_wait_num_reports);
+ return -EINVAL;
+ }
+
+ init_waitqueue_head(&stream->poll_wq);
+ INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
+ stream->per_xecore_buf_size = per_xecore_buf_size;
+ stream->sampling_rate_mult = props->sampling_rate_mult;
+ stream->wait_num_reports = props->wait_num_reports;
+ stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
+
+ ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
+ if (ret)
+ return ret;
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ xecore_buf = &stream->xecore_buf[xecore];
+ vaddr_offset = xecore * stream->per_xecore_buf_size;
+ xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
+ }
+ return 0;
+}
+
+static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
+ struct file *file, poll_table *wait)
+{
+ __poll_t events = 0;
+
+ poll_wait(file, &stream->poll_wq, wait);
+
+ if (stream->pollin)
+ events |= EPOLLIN;
+
+ return events;
+}
+
+static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
+{
+ struct xe_eu_stall_data_stream *stream = file->private_data;
+ struct xe_gt *gt = stream->gt;
+ __poll_t ret;
+
+ mutex_lock(&gt->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
+ mutex_unlock(&gt->eu_stall->stream_lock);
+
+ return ret;
+}
+
+static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
+{
+ struct xe_gt *gt = stream->gt;
+ int ret = 0;
+
+ if (stream->enabled)
+ return ret;
+
+ stream->enabled = true;
+
+ ret = xe_eu_stall_stream_enable(stream);
+
+ queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
+ &stream->buf_poll_work,
+ msecs_to_jiffies(POLL_PERIOD_MS));
+ return ret;
+}
+
+static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
+{
+ struct xe_gt *gt = stream->gt;
+
+ if (!stream->enabled)
+ return 0;
+
+ stream->enabled = false;
+
+ xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
+
+ cancel_delayed_work_sync(&stream->buf_poll_work);
+
+ if (XE_WA(gt, 22016596838))
+ xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
+ _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
+
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
+ xe_pm_runtime_put(gt_to_xe(gt));
+
+ return 0;
+}
+
+static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case DRM_XE_OBSERVATION_IOCTL_ENABLE:
+ return xe_eu_stall_enable_locked(stream);
+ case DRM_XE_OBSERVATION_IOCTL_DISABLE:
+ return xe_eu_stall_disable_locked(stream);
+ }
+
+ return -EINVAL;
+}
+
+static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct xe_eu_stall_data_stream *stream = file->private_data;
+ struct xe_gt *gt = stream->gt;
+ long ret;
+
+ mutex_lock(&gt->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
+ mutex_unlock(&gt->eu_stall->stream_lock);
+
+ return ret;
+}
+
+static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
+{
+ struct xe_eu_stall_data_stream *stream = file->private_data;
+ struct xe_gt *gt = stream->gt;
+
+ drm_dev_put(&gt->tile->xe->drm);
+
+ mutex_lock(&gt->eu_stall->stream_lock);
+ xe_eu_stall_disable_locked(stream);
+ xe_eu_stall_data_buf_destroy(stream);
+ xe_eu_stall_stream_free(stream);
+ mutex_unlock(&gt->eu_stall->stream_lock);
+
+ return 0;
+}
+
+static const struct file_operations fops_eu_stall = {
+ .owner = THIS_MODULE,
+ .llseek = noop_llseek,
+ .release = xe_eu_stall_stream_close,
+ .poll = xe_eu_stall_stream_poll,
+ .read = xe_eu_stall_stream_read,
+ .unlocked_ioctl = xe_eu_stall_stream_ioctl,
+ .compat_ioctl = xe_eu_stall_stream_ioctl,
+};
+
+static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
+ struct eu_stall_open_properties *props,
+ struct drm_file *file)
+{
+ struct xe_eu_stall_data_stream *stream;
+ struct xe_gt *gt = props->gt;
+ unsigned long f_flags = 0;
+ int ret, stream_fd;
+
+ /* Only one session can be active at any time */
+ if (gt->eu_stall->stream) {
+ xe_gt_dbg(gt, "EU stall sampling session already active\n");
+ return -EBUSY;
+ }
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (!stream)
+ return -ENOMEM;
+
+ gt->eu_stall->stream = stream;
+ stream->gt = gt;
+
+ ret = xe_eu_stall_stream_init(stream, props);
+ if (ret) {
+ xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
+ goto err_free;
+ }
+
+ stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
+ if (stream_fd < 0) {
+ ret = stream_fd;
+ xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
+ goto err_destroy;
+ }
+
+ /* Take a reference on the driver that will be kept with stream_fd
+ * until its release.
+ */
+ drm_dev_get(&gt->tile->xe->drm);
+
+ return stream_fd;
+
+err_destroy:
+ xe_eu_stall_data_buf_destroy(stream);
+err_free:
+ xe_eu_stall_stream_free(stream);
+ return ret;
+}
+
+/**
+ * xe_eu_stall_stream_open - Open a xe EU stall data stream fd
+ *
+ * @dev: DRM device pointer
+ * @data: pointer to first struct @drm_xe_ext_set_property in
+ * the chain of input properties from the user space.
+ * @file: DRM file pointer
+ *
+ * This function opens a EU stall data stream with input properties from
+ * the user space.
+ *
+ * Returns: EU stall data stream fd on success or a negative error code.
+ */
+int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct eu_stall_open_properties props = {};
+ int ret;
+
+ if (!xe_eu_stall_supported_on_platform(xe)) {
+ drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
+ return -ENODEV;
+ }
+
+ if (xe_observation_paranoid && !perfmon_capable()) {
+ drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n");
+ return -EACCES;
+ }
+
+ /* Initialize and set default values */
+ props.wait_num_reports = 1;
+ props.sampling_rate_mult = 4;
+
+ ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
+ if (ret)
+ return ret;
+
+ if (!props.gt) {
+ drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&props.gt->eu_stall->stream_lock);
+ ret = xe_eu_stall_stream_open_locked(dev, &props, file);
+ mutex_unlock(&props.gt->eu_stall->stream_lock);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h
new file mode 100644
index 000000000000..ed9d0f233566
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_eu_stall.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef __XE_EU_STALL_H__
+#define __XE_EU_STALL_H__
+
+#include "xe_gt_types.h"
+
+size_t xe_eu_stall_get_per_xecore_buf_size(void);
+size_t xe_eu_stall_data_record_size(struct xe_device *xe);
+size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates);
+
+int xe_eu_stall_init(struct xe_gt *gt);
+int xe_eu_stall_stream_open(struct drm_device *dev,
+ u64 data,
+ struct drm_file *file);
+
+static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe)
+{
+ return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
index 904cf47925aa..ed9183599e31 100644
--- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -28,10 +28,10 @@
"\n" \
"#endif\n"
-static void print_usage(FILE *f)
+static void print_usage(FILE *f, const char *progname)
{
fprintf(f, "usage: %s <input-rule-file> <generated-c-source-file> <generated-c-header-file>\n",
- program_invocation_short_name);
+ progname);
}
static void print_parse_error(const char *err_msg, const char *line,
@@ -144,7 +144,7 @@ int main(int argc, const char *argv[])
if (argc < 3) {
fprintf(stderr, "ERROR: wrong arguments\n");
- print_usage(stderr);
+ print_usage(stderr, argv[0]);
return 1;
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 31c90577faf0..8cf70b228ff3 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -490,7 +490,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc)
gsc->proxy.component_added = true;
- return xe_device_add_action_or_reset(xe, xe_gsc_proxy_remove, gsc);
+ return devm_add_action_or_reset(xe->drm.dev, xe_gsc_proxy_remove, gsc);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 650a0ee56e97..5bd8dfdce300 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -19,6 +19,7 @@
#include "xe_bb.h"
#include "xe_bo.h"
#include "xe_device.h"
+#include "xe_eu_stall.h"
#include "xe_exec_queue.h"
#include "xe_execlist.h"
#include "xe_force_wake.h"
@@ -613,6 +614,10 @@ int xe_gt_init(struct xe_gt *gt)
xe_gt_record_user_engines(gt);
+ err = xe_eu_stall_init(gt);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index cc2ae159298e..2a958c92d8ea 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -12,25 +12,10 @@
#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_macros.h"
#include "xe_mmio.h"
-static u32 read_reference_ts_freq(struct xe_gt *gt)
-{
- u32 ts_override = xe_mmio_read32(&gt->mmio, TIMESTAMP_OVERRIDE);
- u32 base_freq, frac_freq;
-
- base_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK,
- ts_override) + 1;
- base_freq *= 1000000;
-
- frac_freq = REG_FIELD_GET(TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK,
- ts_override);
- frac_freq = 1000000 / (frac_freq + 1);
-
- return base_freq + frac_freq;
-}
-
static u32 get_crystal_clock_freq(u32 rpm_config_reg)
{
const u32 f19_2_mhz = 19200000;
@@ -57,26 +42,30 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
int xe_gt_clock_init(struct xe_gt *gt)
{
- u32 ctc_reg = xe_mmio_read32(&gt->mmio, CTC_MODE);
+ u32 c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
u32 freq = 0;
- /* Assuming gen11+ so assert this assumption is correct */
- xe_gt_assert(gt, GRAPHICS_VER(gt_to_xe(gt)) >= 11);
-
- if (ctc_reg & CTC_SOURCE_DIVIDE_LOGIC) {
- freq = read_reference_ts_freq(gt);
- } else {
- u32 c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
-
- freq = get_crystal_clock_freq(c0);
-
- /*
- * Now figure out how the command stream's timestamp
- * register increments from this frequency (it might
- * increment only every few clock cycle).
- */
- freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
- }
+ /*
+ * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later
+ * platforms. In theory it could be a valid setting for pre-Xe2
+ * platforms, but there's no documentation on how to properly handle
+ * this case. Reading TIMESTAMP_OVERRIDE, as the driver attempted in
+ * the past has been confirmed as incorrect by the hardware architects.
+ *
+ * For now just warn if we ever encounter hardware in the wild that
+ * has this setting and move on as if it hadn't been set.
+ */
+ if (xe_mmio_read32(&gt->mmio, CTC_MODE) & CTC_SOURCE_DIVIDE_LOGIC)
+ xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n");
+
+ freq = get_crystal_clock_freq(c0);
+
+ /*
+ * Now figure out how the command stream's timestamp
+ * register increments from this frequency (it might
+ * increment only every few clock cycle).
+ */
+ freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
gt->info.reference_clock = freq;
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 46701ca11ce0..17d69039b866 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -137,7 +137,7 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct pagefault *pf,
bool atomic;
xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES, xe_vma_size(vma));
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024);
trace_xe_vma_pagefault(vma);
atomic = access_is_atomic(pf->access_type);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index 6b5f849a0722..4efde5f46b43 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -114,7 +114,6 @@ static const struct xe_reg tgl_runtime_regs[] = {
GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
- TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
};
static const struct xe_reg ats_m_runtime_regs[] = {
@@ -127,7 +126,6 @@ static const struct xe_reg ats_m_runtime_regs[] = {
XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
- TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
};
static const struct xe_reg pvc_runtime_regs[] = {
@@ -140,7 +138,6 @@ static const struct xe_reg pvc_runtime_regs[] = {
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
CTC_MODE, /* _MMIO(0xA26C) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
- TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
};
static const struct xe_reg ver_1270_runtime_regs[] = {
@@ -155,7 +152,6 @@ static const struct xe_reg ver_1270_runtime_regs[] = {
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
- TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
};
static const struct xe_reg ver_2000_runtime_regs[] = {
@@ -173,7 +169,6 @@ static const struct xe_reg ver_2000_runtime_regs[] = {
XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
CTC_MODE, /* _MMIO(0xa26c) */
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
- TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */
};
static const struct xe_reg ver_3000_runtime_regs[] = {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 4831549da319..a439261bf4d7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -47,12 +47,19 @@ static int guc_action_vf_reset(struct xe_guc *guc)
return ret > 0 ? -EPROTO : ret;
}
+#define GUC_RESET_VF_STATE_RETRY_MAX 10
static int vf_reset_guc_state(struct xe_gt *gt)
{
+ unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX;
struct xe_guc *guc = &gt->uc.guc;
int err;
- err = guc_action_vf_reset(guc);
+ do {
+ err = guc_action_vf_reset(guc);
+ if (!err || err != -ETIMEDOUT)
+ break;
+ } while (--retry);
+
if (unlikely(err))
xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err));
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 2e9879ea4674..6155ea354432 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -23,13 +23,13 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
if (id >= __XE_GT_STATS_NUM_IDS)
return;
- atomic_add(incr, &gt->stats.counters[id]);
+ atomic64_add(incr, &gt->stats.counters[id]);
}
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
"tlb_inval_count",
"vma_pagefault_count",
- "vma_pagefault_bytes",
+ "vma_pagefault_kb",
};
/**
@@ -44,8 +44,8 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
enum xe_gt_stats_id id;
for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id)
- drm_printf(p, "%s: %d\n", stat_description[id],
- atomic_read(&gt->stats.counters[id]));
+ drm_printf(p, "%s: %lld\n", stat_description[id],
+ atomic64_read(&gt->stats.counters[id]));
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index b072bd80c4b9..d556771f99d6 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -9,7 +9,7 @@
enum xe_gt_stats_id {
XE_GT_STATS_ID_TLB_INVAL,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
- XE_GT_STATS_ID_VMA_PAGEFAULT_BYTES,
+ XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
/* must be the last entry */
__XE_GT_STATS_NUM_IDS,
};
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index 746b325bbf6e..a72d26ba0653 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -25,6 +25,19 @@ void xe_gt_topology_init(struct xe_gt *gt);
void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+/**
+ * xe_gt_topology_mask_last_dss() - Returns the index of the last DSS in a mask.
+ * @mask: Input DSS mask
+ *
+ * Return: Index of the last DSS in the input DSS mask,
+ * XE_MAX_DSS_FUSE_BITS if DSS mask is empty.
+ */
+static inline unsigned int
+xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask)
+{
+ return find_last_bit(mask, XE_MAX_DSS_FUSE_BITS);
+}
+
unsigned int
xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 6e66bf0e8b3f..f67474e06fb3 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -139,7 +139,7 @@ struct xe_gt {
/** @stats: GT stats */
struct {
/** @stats.counters: counters for various GT stats */
- atomic_t counters[__XE_GT_STATS_NUM_IDS];
+ atomic64_t counters[__XE_GT_STATS_NUM_IDS];
} stats;
#endif
@@ -430,6 +430,9 @@ struct xe_gt {
/** @oa: oa observation subsystem per gt info */
struct xe_oa_gt oa;
+
+ /** @eu_stall: EU stall counters subsystem per gt info */
+ struct xe_eu_stall_gt *eu_stall;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 1619c0a52db9..bc1ff0a4e1e7 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -27,6 +27,7 @@
#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_db_mgr.h"
+#include "xe_guc_engine_activity.h"
#include "xe_guc_hwconfig.h"
#include "xe_guc_log.h"
#include "xe_guc_pc.h"
@@ -744,6 +745,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
+ ret = xe_guc_engine_activity_init(guc);
+ if (ret)
+ return ret;
+
ret = xe_guc_buf_cache_init(&guc->buf);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index fab259adc380..e7c9e095a19f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -342,7 +342,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
offset = guc_ads_waklv_offset(ads);
remain = guc_ads_waklv_size(ads);
- if (XE_WA(gt, 14019882105))
+ if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562))
guc_waklv_enable_simple(ads,
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
&offset, &remain);
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
new file mode 100644
index 000000000000..2a457dcf31d5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "regs/xe_gt_regs.h"
+
+#include "xe_bo.h"
+#include "xe_force_wake.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_engine_activity.h"
+#include "xe_guc_ct.h"
+#include "xe_hw_engine.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_trace_guc.h"
+
+#define TOTAL_QUANTA 0x8000
+
+static struct iosys_map engine_activity_map(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+ u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
+ size_t offset;
+
+ offset = offsetof(struct guc_engine_activity_data,
+ engine_activity[guc_class][hwe->logical_instance]);
+
+ return IOSYS_MAP_INIT_OFFSET(&buffer->activity_bo->vmap, offset);
+}
+
+static struct iosys_map engine_metadata_map(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+
+ return buffer->metadata_bo->vmap;
+}
+
+static int allocate_engine_activity_group(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct xe_device *xe = guc_to_xe(guc);
+ u32 num_activity_group = 1; /* Will be modified for VF */
+
+ engine_activity->eag = drmm_kcalloc(&xe->drm, num_activity_group,
+ sizeof(struct engine_activity_group), GFP_KERNEL);
+
+ if (!engine_activity->eag)
+ return -ENOMEM;
+
+ engine_activity->num_activity_group = num_activity_group;
+
+ return 0;
+}
+
+static int allocate_engine_activity_buffers(struct xe_guc *guc,
+ struct engine_activity_buffer *buffer)
+{
+ u32 metadata_size = sizeof(struct guc_engine_activity_metadata);
+ u32 size = sizeof(struct guc_engine_activity_data);
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo, *metadata_bo;
+
+ metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size),
+ ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+
+ if (IS_ERR(metadata_bo))
+ return PTR_ERR(metadata_bo);
+
+ bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size),
+ ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+
+ if (IS_ERR(bo)) {
+ xe_bo_unpin_map_no_vm(metadata_bo);
+ return PTR_ERR(bo);
+ }
+
+ buffer->metadata_bo = metadata_bo;
+ buffer->activity_bo = bo;
+ return 0;
+}
+
+static void free_engine_activity_buffers(struct engine_activity_buffer *buffer)
+{
+ xe_bo_unpin_map_no_vm(buffer->metadata_bo);
+ xe_bo_unpin_map_no_vm(buffer->activity_bo);
+}
+
+static bool is_engine_activity_supported(struct xe_guc *guc)
+{
+ struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
+ struct xe_uc_fw_version required = { 1, 14, 1 };
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ if (IS_SRIOV_VF(gt_to_xe(gt))) {
+ xe_gt_info(gt, "engine activity stats not supported on VFs\n");
+ return false;
+ }
+
+ /* engine activity stats is supported from GuC interface version (1.14.1) */
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER_STRUCT(required)) {
+ xe_gt_info(gt,
+ "engine activity stats unsupported in GuC interface v%u.%u.%u, need v%u.%u.%u or higher\n",
+ version->major, version->minor, version->patch, required.major,
+ required.minor, required.patch);
+ return false;
+ }
+
+ return true;
+}
+
+static struct engine_activity *hw_engine_to_engine_activity(struct xe_hw_engine *hwe)
+{
+ struct xe_guc *guc = &hwe->gt->uc.guc;
+ struct engine_activity_group *eag = &guc->engine_activity.eag[0];
+ u16 guc_class = xe_engine_class_to_guc_class(hwe->class);
+
+ return &eag->engine[guc_class][hwe->logical_instance];
+}
+
+static u64 cpu_ns_to_guc_tsc_tick(ktime_t ns, u32 freq)
+{
+ return mul_u64_u32_div(ns, freq, NSEC_PER_SEC);
+}
+
+#define read_engine_activity_record(xe_, map_, field_) \
+ xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity, field_)
+
+#define read_metadata_record(xe_, map_, field_) \
+ xe_map_rd_field(xe_, map_, 0, struct guc_engine_activity_metadata, field_)
+
+static u64 get_engine_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
+ struct guc_engine_activity *cached_activity = &ea->activity;
+ struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct iosys_map activity_map, metadata_map;
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 last_update_tick, global_change_num;
+ u64 active_ticks, gpm_ts;
+ u16 change_num;
+
+ activity_map = engine_activity_map(guc, hwe);
+ metadata_map = engine_metadata_map(guc);
+ global_change_num = read_metadata_record(xe, &metadata_map, global_change_num);
+
+ /* GuC has not initialized activity data yet, return 0 */
+ if (!global_change_num)
+ goto update;
+
+ if (global_change_num == cached_metadata->global_change_num)
+ goto update;
+
+ cached_metadata->global_change_num = global_change_num;
+ change_num = read_engine_activity_record(xe, &activity_map, change_num);
+
+ if (!change_num || change_num == cached_activity->change_num)
+ goto update;
+
+ /* read engine activity values */
+ last_update_tick = read_engine_activity_record(xe, &activity_map, last_update_tick);
+ active_ticks = read_engine_activity_record(xe, &activity_map, active_ticks);
+
+ /* activity calculations */
+ ea->running = !!last_update_tick;
+ ea->total += active_ticks - cached_activity->active_ticks;
+ ea->active = 0;
+
+ /* cache the counter */
+ cached_activity->change_num = change_num;
+ cached_activity->last_update_tick = last_update_tick;
+ cached_activity->active_ticks = active_ticks;
+
+update:
+ if (ea->running) {
+ gpm_ts = xe_mmio_read64_2x32(&gt->mmio, MISC_STATUS_0) >>
+ engine_activity->gpm_timestamp_shift;
+ ea->active = lower_32_bits(gpm_ts) - cached_activity->last_update_tick;
+ }
+
+ trace_xe_guc_engine_activity(xe, ea, hwe->name, hwe->instance);
+
+ return ea->total + ea->active;
+}
+
+static u64 get_engine_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ struct engine_activity *ea = hw_engine_to_engine_activity(hwe);
+ struct guc_engine_activity_metadata *cached_metadata = &ea->metadata;
+ struct guc_engine_activity *cached_activity = &ea->activity;
+ struct iosys_map activity_map, metadata_map;
+ struct xe_device *xe = guc_to_xe(guc);
+ ktime_t now, cpu_delta;
+ u64 numerator;
+ u16 quanta_ratio;
+
+ activity_map = engine_activity_map(guc, hwe);
+ metadata_map = engine_metadata_map(guc);
+
+ if (!cached_metadata->guc_tsc_frequency_hz)
+ cached_metadata->guc_tsc_frequency_hz = read_metadata_record(xe, &metadata_map,
+ guc_tsc_frequency_hz);
+
+ quanta_ratio = read_engine_activity_record(xe, &activity_map, quanta_ratio);
+ cached_activity->quanta_ratio = quanta_ratio;
+
+ /* Total ticks calculations */
+ now = ktime_get();
+ cpu_delta = now - ea->last_cpu_ts;
+ ea->last_cpu_ts = now;
+ numerator = (ea->quanta_remainder_ns + cpu_delta) * cached_activity->quanta_ratio;
+ ea->quanta_ns += numerator / TOTAL_QUANTA;
+ ea->quanta_remainder_ns = numerator % TOTAL_QUANTA;
+ ea->quanta = cpu_ns_to_guc_tsc_tick(ea->quanta_ns, cached_metadata->guc_tsc_frequency_hz);
+
+ trace_xe_guc_engine_activity(xe, ea, hwe->name, hwe->instance);
+
+ return ea->quanta;
+}
+
+static int enable_engine_activity_stats(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+ u32 action[] = {
+ XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER,
+ xe_bo_ggtt_addr(buffer->metadata_bo),
+ 0,
+ xe_bo_ggtt_addr(buffer->activity_bo),
+ 0,
+ };
+
+ /* Blocking here to ensure the buffers are ready before reading them */
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static void engine_activity_set_cpu_ts(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct engine_activity_group *eag = &engine_activity->eag[0];
+ int i, j;
+
+ for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++)
+ for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; j++)
+ eag->engine[i][j].last_cpu_ts = ktime_get();
+}
+
+static u32 gpm_timestamp_shift(struct xe_gt *gt)
+{
+ u32 reg;
+
+ reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
+
+ return 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
+}
+
+/**
+ * xe_guc_engine_activity_active_ticks - Get engine active ticks
+ * @guc: The GuC object
+ * @hwe: The hw_engine object
+ *
+ * Return: accumulated ticks @hwe was active since engine activity stats were enabled.
+ */
+u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ if (!xe_guc_engine_activity_supported(guc))
+ return 0;
+
+ return get_engine_active_ticks(guc, hwe);
+}
+
+/**
+ * xe_guc_engine_activity_total_ticks - Get engine total ticks
+ * @guc: The GuC object
+ * @hwe: The hw_engine object
+ *
+ * Return: accumulated quanta of ticks allocated for the engine
+ */
+u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe)
+{
+ if (!xe_guc_engine_activity_supported(guc))
+ return 0;
+
+ return get_engine_total_ticks(guc, hwe);
+}
+
+/**
+ * xe_guc_engine_activity_supported - Check support for engine activity stats
+ * @guc: The GuC object
+ *
+ * Engine activity stats is supported from GuC interface version (1.14.1)
+ *
+ * Return: true if engine activity stats supported, false otherwise
+ */
+bool xe_guc_engine_activity_supported(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+
+ return engine_activity->supported;
+}
+
+/**
+ * xe_guc_engine_activity_enable_stats - Enable engine activity stats
+ * @guc: The GuC object
+ *
+ * Enable engine activity stats and set initial timestamps
+ */
+void xe_guc_engine_activity_enable_stats(struct xe_guc *guc)
+{
+ int ret;
+
+ if (!xe_guc_engine_activity_supported(guc))
+ return;
+
+ ret = enable_engine_activity_stats(guc);
+ if (ret)
+ xe_gt_err(guc_to_gt(guc), "failed to enable activity stats%d\n", ret);
+ else
+ engine_activity_set_cpu_ts(guc);
+}
+
+static void engine_activity_fini(void *arg)
+{
+ struct xe_guc_engine_activity *engine_activity = arg;
+ struct engine_activity_buffer *buffer = &engine_activity->device_buffer;
+
+ free_engine_activity_buffers(buffer);
+}
+
+/**
+ * xe_guc_engine_activity_init - Initialize the engine activity data
+ * @guc: The GuC object
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int xe_guc_engine_activity_init(struct xe_guc *guc)
+{
+ struct xe_guc_engine_activity *engine_activity = &guc->engine_activity;
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ engine_activity->supported = is_engine_activity_supported(guc);
+ if (!engine_activity->supported)
+ return 0;
+
+ ret = allocate_engine_activity_group(guc);
+ if (ret) {
+ xe_gt_err(gt, "failed to allocate engine activity group (%pe)\n", ERR_PTR(ret));
+ return ret;
+ }
+
+ ret = allocate_engine_activity_buffers(guc, &engine_activity->device_buffer);
+ if (ret) {
+ xe_gt_err(gt, "failed to allocate engine activity buffers (%pe)\n", ERR_PTR(ret));
+ return ret;
+ }
+
+ engine_activity->gpm_timestamp_shift = gpm_timestamp_shift(gt);
+
+ return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, engine_activity_fini,
+ engine_activity);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.h b/drivers/gpu/drm/xe/xe_guc_engine_activity.h
new file mode 100644
index 000000000000..a042d4cb404c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_ACTIVITY_H_
+#define _XE_GUC_ENGINE_ACTIVITY_H_
+
+#include <linux/types.h>
+
+struct xe_hw_engine;
+struct xe_guc;
+
+int xe_guc_engine_activity_init(struct xe_guc *guc);
+bool xe_guc_engine_activity_supported(struct xe_guc *guc);
+void xe_guc_engine_activity_enable_stats(struct xe_guc *guc);
+u64 xe_guc_engine_activity_active_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
+u64 xe_guc_engine_activity_total_ticks(struct xe_guc *guc, struct xe_hw_engine *hwe);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
new file mode 100644
index 000000000000..5cdd034b6b70
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity_types.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ENGINE_ACTIVITY_TYPES_H_
+#define _XE_GUC_ENGINE_ACTIVITY_TYPES_H_
+
+#include <linux/types.h>
+
+#include "xe_guc_fwif.h"
+/**
+ * struct engine_activity - Engine specific activity data
+ *
+ * Contains engine specific activity data and snapshot of the
+ * structures from GuC
+ */
+struct engine_activity {
+ /** @active: current activity */
+ u64 active;
+
+ /** @last_cpu_ts: cpu timestamp in nsec of previous sample */
+ u64 last_cpu_ts;
+
+ /** @quanta: total quanta used on HW */
+ u64 quanta;
+
+ /** @quanta_ns: total quanta_ns used on HW */
+ u64 quanta_ns;
+
+ /**
+ * @quanta_remainder_ns: remainder when the CPU time is scaled as
+ * per the quanta_ratio. This remainder is used in subsequent
+ * quanta calculations.
+ */
+ u64 quanta_remainder_ns;
+
+ /** @total: total engine activity */
+ u64 total;
+
+ /** @running: true if engine is running some work */
+ bool running;
+
+ /** @metadata: snapshot of engine activity metadata */
+ struct guc_engine_activity_metadata metadata;
+
+ /** @activity: snapshot of engine activity counter */
+ struct guc_engine_activity activity;
+};
+
+/**
+ * struct engine_activity_group - Activity data for all engines
+ */
+struct engine_activity_group {
+ /** @engine: engine specific activity data */
+ struct engine_activity engine[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+};
+
+/**
+ * struct engine_activity_buffer - engine activity buffers
+ *
+ * This contains the buffers allocated for metadata and activity data
+ */
+struct engine_activity_buffer {
+ /** @activity_bo: object allocated to hold activity data */
+ struct xe_bo *activity_bo;
+
+ /** @metadata_bo: object allocated to hold activity metadata */
+ struct xe_bo *metadata_bo;
+};
+
+/**
+ * struct xe_guc_engine_activity - Data used by engine activity implementation
+ */
+struct xe_guc_engine_activity {
+ /** @gpm_timestamp_shift: Right shift value for the gpm timestamp */
+ u32 gpm_timestamp_shift;
+
+ /** @num_activity_group: number of activity groups */
+ u32 num_activity_group;
+
+ /** @supported: indicates support for engine activity stats */
+ bool supported;
+
+ /** @eag: holds the device level engine activity data */
+ struct engine_activity_group *eag;
+
+ /** @device_buffer: buffer object for global engine activity */
+ struct engine_activity_buffer device_buffer;
+};
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 057153f89b30..6f57578b07cb 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -208,6 +208,25 @@ struct guc_engine_usage {
struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
} __packed;
+/* Engine Activity stats */
+struct guc_engine_activity {
+ u16 change_num;
+ u16 quanta_ratio;
+ u32 last_update_tick;
+ u64 active_ticks;
+} __packed;
+
+struct guc_engine_activity_data {
+ struct guc_engine_activity engine_activity[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
+struct guc_engine_activity_metadata {
+ u32 guc_tsc_frequency_hz;
+ u32 lag_latency_usec;
+ u32 global_change_num;
+ u32 reserved;
+} __packed;
+
/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
enum xe_guc_recv_message {
XE_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 913c74d6e2ae..b6a2dd742ebd 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1248,6 +1248,8 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
+ /* Confirm no work left behind accessing device structures */
+ cancel_delayed_work_sync(&ge->sched.base.work_tdr);
release_guc_id(guc, q);
xe_sched_entity_fini(&ge->entity);
xe_sched_fini(&ge->sched);
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 573aa6308380..63bac64429a5 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -13,6 +13,7 @@
#include "xe_guc_ads_types.h"
#include "xe_guc_buf_types.h"
#include "xe_guc_ct_types.h"
+#include "xe_guc_engine_activity_types.h"
#include "xe_guc_fwif.h"
#include "xe_guc_log_types.h"
#include "xe_guc_pc_types.h"
@@ -103,6 +104,9 @@ struct xe_guc {
/** @relay: GuC Relay Communication used in SR-IOV */
struct xe_guc_relay relay;
+ /** @engine_activity: Device specific engine activity */
+ struct xe_guc_engine_activity engine_activity;
+
/**
* @notify_reg: Register which is written to notify GuC of H2G messages
*/
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 06dc78d3a812..27d11e06a82b 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -89,12 +89,9 @@ static void heci_gsc_release_dev(struct device *dev)
kfree(adev);
}
-void xe_heci_gsc_fini(struct xe_device *xe)
+static void xe_heci_gsc_fini(void *arg)
{
- struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
-
- if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi)
- return;
+ struct xe_heci_gsc *heci_gsc = arg;
if (heci_gsc->adev) {
struct auxiliary_device *aux_dev = &heci_gsc->adev->aux_dev;
@@ -106,6 +103,7 @@ void xe_heci_gsc_fini(struct xe_device *xe)
if (heci_gsc->irq >= 0)
irq_free_desc(heci_gsc->irq);
+
heci_gsc->irq = -1;
}
@@ -172,14 +170,14 @@ static int heci_gsc_add_device(struct xe_device *xe, const struct heci_gsc_def *
return ret;
}
-void xe_heci_gsc_init(struct xe_device *xe)
+int xe_heci_gsc_init(struct xe_device *xe)
{
struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
- const struct heci_gsc_def *def;
+ const struct heci_gsc_def *def = NULL;
int ret;
if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi)
- return;
+ return 0;
heci_gsc->irq = -1;
@@ -191,29 +189,24 @@ void xe_heci_gsc_init(struct xe_device *xe)
def = &heci_gsc_def_dg2;
} else if (xe->info.platform == XE_DG1) {
def = &heci_gsc_def_dg1;
- } else {
- drm_warn_once(&xe->drm, "Unknown platform\n");
- return;
}
- if (!def->name) {
- drm_warn_once(&xe->drm, "HECI is not implemented!\n");
- return;
+ if (!def || !def->name) {
+ drm_warn(&xe->drm, "HECI is not implemented!\n");
+ return 0;
}
- if (!def->use_polling && !xe_survivability_mode_enabled(xe)) {
+ ret = devm_add_action_or_reset(xe->drm.dev, xe_heci_gsc_fini, heci_gsc);
+ if (ret)
+ return ret;
+
+ if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
ret = heci_gsc_irq_setup(xe);
if (ret)
- goto fail;
+ return ret;
}
- ret = heci_gsc_add_device(xe, def);
- if (ret)
- goto fail;
-
- return;
-fail:
- xe_heci_gsc_fini(xe);
+ return heci_gsc_add_device(xe, def);
}
void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.h b/drivers/gpu/drm/xe/xe_heci_gsc.h
index 48b3b1838045..745eb6783942 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.h
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.h
@@ -33,8 +33,7 @@ struct xe_heci_gsc {
int irq;
};
-void xe_heci_gsc_init(struct xe_device *xe);
-void xe_heci_gsc_fini(struct xe_device *xe);
+int xe_heci_gsc_init(struct xe_device *xe);
void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir);
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index 089834467880..2e4ae61567d8 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -166,13 +166,20 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
{
unsigned long timeout =
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- unsigned long *pfns, flags = HMM_PFN_REQ_FAULT;
+ unsigned long *pfns;
struct xe_userptr *userptr;
struct xe_vma *vma = &uvma->vma;
u64 userptr_start = xe_vma_userptr(vma);
u64 userptr_end = userptr_start + xe_vma_size(vma);
struct xe_vm *vm = xe_vma_vm(vma);
- struct hmm_range hmm_range;
+ struct hmm_range hmm_range = {
+ .pfn_flags_mask = 0, /* ignore pfns */
+ .default_flags = HMM_PFN_REQ_FAULT,
+ .start = userptr_start,
+ .end = userptr_end,
+ .notifier = &uvma->userptr.notifier,
+ .dev_private_owner = vm->xe,
+ };
bool write = !xe_vma_read_only(vma);
unsigned long notifier_seq;
u64 npages;
@@ -199,19 +206,14 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
return -ENOMEM;
if (write)
- flags |= HMM_PFN_REQ_WRITE;
+ hmm_range.default_flags |= HMM_PFN_REQ_WRITE;
if (!mmget_not_zero(userptr->notifier.mm)) {
ret = -EFAULT;
goto free_pfns;
}
- hmm_range.default_flags = flags;
hmm_range.hmm_pfns = pfns;
- hmm_range.notifier = &userptr->notifier;
- hmm_range.start = userptr_start;
- hmm_range.end = userptr_end;
- hmm_range.dev_private_owner = vm->xe;
while (true) {
hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 2c5a24a13e87..6f185632da14 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -12,6 +12,8 @@
#include <drm/drm_managed.h>
#include <uapi/drm/xe_drm.h>
+#include <generated/xe_wa_oob.h>
+
#include "abi/guc_actions_slpc_abi.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
@@ -35,6 +37,7 @@
#include "xe_sched_job.h"
#include "xe_sriov.h"
#include "xe_sync.h"
+#include "xe_wa.h"
#define DEFAULT_POLL_FREQUENCY_HZ 200
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
@@ -812,11 +815,8 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
struct xe_mmio *mmio = &stream->gt->mmio;
u32 sqcnt1;
- /*
- * Wa_1508761755:xehpsdv, dg2
- * Enable thread stall DOP gating and EU DOP gating.
- */
- if (stream->oa->xe->info.platform == XE_DG2) {
+ /* Enable thread stall DOP gating and EU DOP gating. */
+ if (XE_WA(stream->gt, 1508761755)) {
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
_MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
@@ -1065,11 +1065,10 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
int ret;
/*
- * Wa_1508761755:xehpsdv, dg2
* EU NOA signals behave incorrectly if EU clock gating is enabled.
* Disable thread stall DOP gating and EU DOP gating.
*/
- if (stream->oa->xe->info.platform == XE_DG2) {
+ if (XE_WA(stream->gt, 1508761755)) {
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
_MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
@@ -1690,7 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format];
stream->sample = param->sample;
- stream->periodic = param->period_exponent > 0;
+ stream->periodic = param->period_exponent >= 0;
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
stream->wait_num_reports = param->wait_num_reports;
@@ -1720,12 +1719,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
}
/*
- * Wa_1509372804:pvc
- *
* GuC reset of engines causes OA to lose configuration
* state. Prevent this by overriding GUCRC mode.
*/
- if (stream->oa->xe->info.platform == XE_PVC) {
+ if (XE_WA(stream->gt, 1509372804)) {
ret = xe_guc_pc_override_gucrc_mode(&gt->uc.guc.pc,
SLPC_GUCRC_MODE_GUCRC_NO_RC6);
if (ret)
@@ -1857,23 +1854,14 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
{
u32 reg, shift;
- /*
- * Wa_18013179988:dg2
- * Wa_14015568240:pvc
- * Wa_14015846243:mtl
- */
- switch (gt_to_xe(gt)->info.platform) {
- case XE_DG2:
- case XE_PVC:
- case XE_METEORLAKE:
+ if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) {
xe_pm_runtime_get(gt_to_xe(gt));
reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
xe_pm_runtime_put(gt_to_xe(gt));
shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
return gt->info.reference_clock << (3 - shift);
-
- default:
+ } else {
return gt->info.reference_clock;
}
}
@@ -1971,6 +1959,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
}
param.xef = xef;
+ param.period_exponent = -1;
ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, &param);
if (ret)
return ret;
@@ -2025,7 +2014,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
goto err_exec_q;
}
- if (param.period_exponent > 0) {
+ if (param.period_exponent >= 0) {
u64 oa_period, oa_freq_hz;
/* Requesting samples from OAG buffer is a privileged operation */
diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c
index 57cf01efc07f..e3f9b546207e 100644
--- a/drivers/gpu/drm/xe/xe_observation.c
+++ b/drivers/gpu/drm/xe/xe_observation.c
@@ -8,6 +8,7 @@
#include <uapi/drm/xe_drm.h>
+#include "xe_eu_stall.h"
#include "xe_oa.h"
#include "xe_observation.h"
@@ -29,6 +30,17 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_observation_param *
}
}
+static int xe_eu_stall_ioctl(struct drm_device *dev, struct drm_xe_observation_param *arg,
+ struct drm_file *file)
+{
+ switch (arg->observation_op) {
+ case DRM_XE_OBSERVATION_OP_STREAM_OPEN:
+ return xe_eu_stall_stream_open(dev, arg->param, file);
+ default:
+ return -EINVAL;
+ }
+}
+
/**
* xe_observation_ioctl - The top level observation layer ioctl
* @dev: @drm_device
@@ -51,6 +63,8 @@ int xe_observation_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
switch (arg->observation_type) {
case DRM_XE_OBSERVATION_TYPE_OA:
return xe_oa_ioctl(dev, arg, file);
+ case DRM_XE_OBSERVATION_TYPE_EU_STALL:
+ return xe_eu_stall_ioctl(dev, arg, file);
default:
return -EINVAL;
}
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f8417f4d8ce6..8b6658b214be 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -765,21 +765,16 @@ static int xe_info_init(struct xe_device *xe,
static void xe_pci_remove(struct pci_dev *pdev)
{
- struct xe_device *xe;
-
- xe = pdev_to_xe_device(pdev);
- if (!xe) /* driver load aborted, nothing to cleanup */
- return;
+ struct xe_device *xe = pdev_to_xe_device(pdev);
if (IS_SRIOV_PF(xe))
xe_pci_sriov_configure(pdev, 0);
- if (xe_survivability_mode_enabled(xe))
- return xe_survivability_mode_remove(xe);
+ if (xe_survivability_mode_is_enabled(xe))
+ return;
xe_device_remove(xe);
xe_pm_runtime_fini(xe);
- pci_set_drvdata(pdev, NULL);
}
/*
@@ -851,13 +846,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = xe_device_probe_early(xe);
/*
- * In Boot Survivability mode, no drm card is exposed
- * and driver is loaded with bare minimum to allow
- * for firmware to be flashed through mei. Return
- * success if survivability mode is enabled.
+ * In Boot Survivability mode, no drm card is exposed and driver is
+ * loaded with bare minimum to allow for firmware to be flashed through
+ * mei. If early probe fails, check if survivability mode is flagged by
+ * HW to be enabled. In that case enable it and return success.
*/
if (err) {
- if (xe_survivability_mode_enabled(xe))
+ if (xe_survivability_mode_required(xe) &&
+ xe_survivability_mode_enable(xe))
return 0;
return err;
@@ -900,10 +896,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return err;
err = xe_device_probe(xe);
- if (err) {
- xe_device_call_remove_actions(xe);
+ if (err)
return err;
- }
err = xe_pm_init(xe);
if (err)
@@ -953,7 +947,7 @@ static int xe_pci_suspend(struct device *dev)
struct xe_device *xe = pdev_to_xe_device(pdev);
int err;
- if (xe_survivability_mode_enabled(xe))
+ if (xe_survivability_mode_is_enabled(xe))
return -EBUSY;
err = xe_pm_suspend(xe);
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index aaceee748287..09ee8a06fe2e 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -62,6 +62,55 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs)
xe_gt_sriov_pf_control_trigger_flr(gt, n);
}
+static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ /* caller must use pci_dev_put() */
+ return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ pci_iov_virtfn_devfn(pdev, vf_id));
+}
+
+static void pf_link_vfs(struct xe_device *xe, int num_vfs)
+{
+ struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev);
+ struct device_link *link;
+ struct pci_dev *pdev_vf;
+ unsigned int n;
+
+ /*
+ * When both PF and VF devices are enabled on the host, during system
+ * resume they are resuming in parallel.
+ *
+ * But PF has to complete the provision of VF first to allow any VFs to
+ * successfully resume.
+ *
+ * Create a parent-child device link between PF and VF devices that will
+ * enforce correct resume order.
+ */
+ for (n = 1; n <= num_vfs; n++) {
+ pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1);
+
+ /* unlikely, something weird is happening, abort */
+ if (!pdev_vf) {
+ xe_sriov_err(xe, "Cannot find VF%u device, aborting link%s creation!\n",
+ n, str_plural(num_vfs));
+ break;
+ }
+
+ link = device_link_add(&pdev_vf->dev, &pdev_pf->dev,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
+ /* unlikely and harmless, continue with other VFs */
+ if (!link)
+ xe_sriov_notice(xe, "Failed linking VF%u\n", n);
+
+ pci_dev_put(pdev_vf);
+ }
+}
+
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -92,6 +141,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
if (err < 0)
goto failed;
+ pf_link_vfs(xe, num_vfs);
+
xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
num_vfs, total_vfs, str_plural(total_vfs));
return num_vfs;
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 3910a82328ee..4f62a6e515d6 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -7,16 +7,18 @@
#include <linux/device.h>
#include "xe_device.h"
+#include "xe_force_wake.h"
#include "xe_gt_idle.h"
+#include "xe_guc_engine_activity.h"
+#include "xe_hw_engine.h"
#include "xe_pm.h"
#include "xe_pmu.h"
/**
* DOC: Xe PMU (Performance Monitoring Unit)
*
- * Expose events/counters like GT-C6 residency and GT frequency to user land via
- * the perf interface. Events are per device. The GT can be selected with an
- * extra config sub-field (bits 60-63).
+ * Expose events/counters like GT-C6 residency, GT frequency and per-class-engine
+ * activity to user land via the perf interface. Events are per device.
*
* All events are listed in sysfs:
*
@@ -24,7 +26,18 @@
* $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/events/
* $ ls /sys/bus/event_source/devices/xe_0000_00_02.0/format/
*
- * The format directory has info regarding the configs that can be used.
+ * The following format parameters are available to read events,
+ * but only few are valid with each event:
+ *
+ * gt[60:63] Selects gt for the event
+ * engine_class[20:27] Selects engine-class for event
+ * engine_instance[12:19] Selects the engine-instance for the event
+ *
+ * For engine specific events (engine-*), gt, engine_class and engine_instance parameters must be
+ * set as populated by DRM_XE_DEVICE_QUERY_ENGINES.
+ *
+ * For gt specific events (gt-*) gt parameter must be passed. All other parameters will be 0.
+ *
* The standard perf tool can be used to grep for a certain event as well.
* Example:
*
@@ -35,20 +48,34 @@
* $ perf stat -e <event_name,gt=> -I <interval>
*/
-#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60)
-#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0)
+#define XE_PMU_EVENT_GT_MASK GENMASK_ULL(63, 60)
+#define XE_PMU_EVENT_ENGINE_CLASS_MASK GENMASK_ULL(27, 20)
+#define XE_PMU_EVENT_ENGINE_INSTANCE_MASK GENMASK_ULL(19, 12)
+#define XE_PMU_EVENT_ID_MASK GENMASK_ULL(11, 0)
static unsigned int config_to_event_id(u64 config)
{
return FIELD_GET(XE_PMU_EVENT_ID_MASK, config);
}
+static unsigned int config_to_engine_class(u64 config)
+{
+ return FIELD_GET(XE_PMU_EVENT_ENGINE_CLASS_MASK, config);
+}
+
+static unsigned int config_to_engine_instance(u64 config)
+{
+ return FIELD_GET(XE_PMU_EVENT_ENGINE_INSTANCE_MASK, config);
+}
+
static unsigned int config_to_gt_id(u64 config)
{
return FIELD_GET(XE_PMU_EVENT_GT_MASK, config);
}
-#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01
+#define XE_PMU_EVENT_GT_C6_RESIDENCY 0x01
+#define XE_PMU_EVENT_ENGINE_ACTIVE_TICKS 0x02
+#define XE_PMU_EVENT_ENGINE_TOTAL_TICKS 0x03
static struct xe_gt *event_to_gt(struct perf_event *event)
{
@@ -58,6 +85,59 @@ static struct xe_gt *event_to_gt(struct perf_event *event)
return xe_device_get_gt(xe, gt);
}
+static struct xe_hw_engine *event_to_hwe(struct perf_event *event)
+{
+ struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+ struct drm_xe_engine_class_instance eci;
+ u64 config = event->attr.config;
+ struct xe_hw_engine *hwe;
+
+ eci.engine_class = config_to_engine_class(config);
+ eci.engine_instance = config_to_engine_instance(config);
+ eci.gt_id = config_to_gt_id(config);
+
+ hwe = xe_hw_engine_lookup(xe, eci);
+ if (!hwe || xe_hw_engine_is_reserved(hwe))
+ return NULL;
+
+ return hwe;
+}
+
+static bool is_engine_event(u64 config)
+{
+ unsigned int event_id = config_to_event_id(config);
+
+ return (event_id == XE_PMU_EVENT_ENGINE_TOTAL_TICKS ||
+ event_id == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
+}
+
+static bool event_gt_forcewake(struct perf_event *event)
+{
+ struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+ u64 config = event->attr.config;
+ struct xe_gt *gt;
+ unsigned int *fw_ref;
+
+ if (!is_engine_event(config))
+ return true;
+
+ gt = xe_device_get_gt(xe, config_to_gt_id(config));
+
+ fw_ref = kzalloc(sizeof(*fw_ref), GFP_KERNEL);
+ if (!fw_ref)
+ return false;
+
+ *fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!*fw_ref) {
+ kfree(fw_ref);
+ return false;
+ }
+
+ event->pmu_private = fw_ref;
+
+ return true;
+}
+
static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
unsigned int id)
{
@@ -68,9 +148,47 @@ static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
pmu->supported_events & BIT_ULL(id);
}
+static bool event_param_valid(struct perf_event *event)
+{
+ struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+ unsigned int engine_class, engine_instance;
+ u64 config = event->attr.config;
+ struct xe_gt *gt;
+
+ gt = xe_device_get_gt(xe, config_to_gt_id(config));
+ if (!gt)
+ return false;
+
+ engine_class = config_to_engine_class(config);
+ engine_instance = config_to_engine_instance(config);
+
+ switch (config_to_event_id(config)) {
+ case XE_PMU_EVENT_GT_C6_RESIDENCY:
+ if (engine_class || engine_instance)
+ return false;
+ break;
+ case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
+ case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
+ if (!event_to_hwe(event))
+ return false;
+ break;
+ }
+
+ return true;
+}
+
static void xe_pmu_event_destroy(struct perf_event *event)
{
struct xe_device *xe = container_of(event->pmu, typeof(*xe), pmu.base);
+ struct xe_gt *gt;
+ unsigned int *fw_ref = event->pmu_private;
+
+ if (fw_ref) {
+ gt = xe_device_get_gt(xe, config_to_gt_id(event->attr.config));
+ xe_force_wake_put(gt_to_fw(gt), *fw_ref);
+ kfree(fw_ref);
+ event->pmu_private = NULL;
+ }
drm_WARN_ON(&xe->drm, event->parent);
xe_pm_runtime_put(xe);
@@ -104,15 +222,37 @@ static int xe_pmu_event_init(struct perf_event *event)
if (has_branch_stack(event))
return -EOPNOTSUPP;
+ if (!event_param_valid(event))
+ return -ENOENT;
+
if (!event->parent) {
drm_dev_get(&xe->drm);
xe_pm_runtime_get(xe);
+ if (!event_gt_forcewake(event)) {
+ xe_pm_runtime_put(xe);
+ drm_dev_put(&xe->drm);
+ return -EINVAL;
+ }
event->destroy = xe_pmu_event_destroy;
}
return 0;
}
+static u64 read_engine_events(struct xe_gt *gt, struct perf_event *event)
+{
+ struct xe_hw_engine *hwe;
+ u64 val = 0;
+
+ hwe = event_to_hwe(event);
+ if (config_to_event_id(event->attr.config) == XE_PMU_EVENT_ENGINE_ACTIVE_TICKS)
+ val = xe_guc_engine_activity_active_ticks(&gt->uc.guc, hwe);
+ else
+ val = xe_guc_engine_activity_total_ticks(&gt->uc.guc, hwe);
+
+ return val;
+}
+
static u64 __xe_pmu_event_read(struct perf_event *event)
{
struct xe_gt *gt = event_to_gt(event);
@@ -123,6 +263,9 @@ static u64 __xe_pmu_event_read(struct perf_event *event)
switch (config_to_event_id(event->attr.config)) {
case XE_PMU_EVENT_GT_C6_RESIDENCY:
return xe_gt_idle_residency_msec(&gt->gtidle);
+ case XE_PMU_EVENT_ENGINE_ACTIVE_TICKS:
+ case XE_PMU_EVENT_ENGINE_TOTAL_TICKS:
+ return read_engine_events(gt, event);
}
return 0;
@@ -207,11 +350,15 @@ static void xe_pmu_event_del(struct perf_event *event, int flags)
xe_pmu_event_stop(event, PERF_EF_UPDATE);
}
-PMU_FORMAT_ATTR(gt, "config:60-63");
-PMU_FORMAT_ATTR(event, "config:0-11");
+PMU_FORMAT_ATTR(gt, "config:60-63");
+PMU_FORMAT_ATTR(engine_class, "config:20-27");
+PMU_FORMAT_ATTR(engine_instance, "config:12-19");
+PMU_FORMAT_ATTR(event, "config:0-11");
static struct attribute *pmu_format_attrs[] = {
&format_attr_event.attr,
+ &format_attr_engine_class.attr,
+ &format_attr_engine_instance.attr,
&format_attr_gt.attr,
NULL,
};
@@ -270,6 +417,8 @@ static ssize_t event_attr_show(struct device *dev,
XE_EVENT_ATTR_GROUP(v_, id_, &pmu_event_ ##v_.attr.attr)
XE_EVENT_ATTR_SIMPLE(gt-c6-residency, gt_c6_residency, XE_PMU_EVENT_GT_C6_RESIDENCY, "ms");
+XE_EVENT_ATTR_NOUNIT(engine-active-ticks, engine_active_ticks, XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
+XE_EVENT_ATTR_NOUNIT(engine-total-ticks, engine_total_ticks, XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
static struct attribute *pmu_empty_event_attrs[] = {
/* Empty - all events are added as groups with .attr_update() */
@@ -283,15 +432,23 @@ static const struct attribute_group pmu_events_attr_group = {
static const struct attribute_group *pmu_events_attr_update[] = {
&pmu_group_gt_c6_residency,
+ &pmu_group_engine_active_ticks,
+ &pmu_group_engine_total_ticks,
NULL,
};
static void set_supported_events(struct xe_pmu *pmu)
{
struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+ struct xe_gt *gt = xe_device_get_gt(xe, 0);
if (!xe->info.skip_guc_pc)
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
+
+ if (xe_guc_engine_activity_supported(&gt->uc.guc)) {
+ pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
+ pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
+ }
}
/**
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index 3cd3f83e86b0..47499ca02693 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -132,14 +132,6 @@ static int pxp_wait_for_session_state(struct xe_pxp *pxp, u32 id, bool in_play)
static void pxp_invalidate_queues(struct xe_pxp *pxp);
-static void pxp_invalidate_state(struct xe_pxp *pxp)
-{
- pxp_invalidate_queues(pxp);
-
- if (pxp->status == XE_PXP_ACTIVE)
- pxp->key_instance++;
-}
-
static int pxp_terminate_hw(struct xe_pxp *pxp)
{
struct xe_gt *gt = pxp->gt;
@@ -193,7 +185,8 @@ static void pxp_terminate(struct xe_pxp *pxp)
mutex_lock(&pxp->mutex);
- pxp_invalidate_state(pxp);
+ if (pxp->status == XE_PXP_ACTIVE)
+ pxp->key_instance++;
/*
* we'll mark the status as needing termination on resume, so no need to
@@ -220,6 +213,8 @@ static void pxp_terminate(struct xe_pxp *pxp)
mutex_unlock(&pxp->mutex);
+ pxp_invalidate_queues(pxp);
+
ret = pxp_terminate_hw(pxp);
if (ret) {
drm_err(&xe->drm, "PXP termination failed: %pe\n", ERR_PTR(ret));
@@ -665,23 +660,15 @@ out:
return ret;
}
-/**
- * xe_pxp_exec_queue_remove - remove a queue from the PXP list
- * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled)
- * @q: the queue to remove from the list
- *
- * If PXP is enabled and the exec_queue is in the list, the queue will be
- * removed from the list and its PM reference will be released. It is safe to
- * call this function multiple times for the same queue.
- */
-void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q)
+static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock)
{
bool need_pm_put = false;
if (!xe_pxp_is_enabled(pxp))
return;
- spin_lock_irq(&pxp->queues.lock);
+ if (lock)
+ spin_lock_irq(&pxp->queues.lock);
if (!list_empty(&q->pxp.link)) {
list_del_init(&q->pxp.link);
@@ -690,36 +677,54 @@ void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q)
q->pxp.type = DRM_XE_PXP_TYPE_NONE;
- spin_unlock_irq(&pxp->queues.lock);
+ if (lock)
+ spin_unlock_irq(&pxp->queues.lock);
if (need_pm_put)
xe_pm_runtime_put(pxp->xe);
}
+/**
+ * xe_pxp_exec_queue_remove - remove a queue from the PXP list
+ * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled)
+ * @q: the queue to remove from the list
+ *
+ * If PXP is enabled and the exec_queue is in the list, the queue will be
+ * removed from the list and its PM reference will be released. It is safe to
+ * call this function multiple times for the same queue.
+ */
+void xe_pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q)
+{
+ __pxp_exec_queue_remove(pxp, q, true);
+}
+
static void pxp_invalidate_queues(struct xe_pxp *pxp)
{
struct xe_exec_queue *tmp, *q;
+ LIST_HEAD(to_clean);
spin_lock_irq(&pxp->queues.lock);
- /*
- * Removing a queue from the PXP list requires a put of the RPM ref that
- * the queue holds to keep the PXP session alive, which can't be done
- * under spinlock. Since it is safe to kill a queue multiple times, we
- * can leave the invalid queue in the list for now and postpone the
- * removal and associated RPM put to when the queue is destroyed.
- */
- list_for_each_entry(tmp, &pxp->queues.list, pxp.link) {
- q = xe_exec_queue_get_unless_zero(tmp);
-
+ list_for_each_entry_safe(q, tmp, &pxp->queues.list, pxp.link) {
+ q = xe_exec_queue_get_unless_zero(q);
if (!q)
continue;
+ list_move_tail(&q->pxp.link, &to_clean);
+ }
+ spin_unlock_irq(&pxp->queues.lock);
+
+ list_for_each_entry_safe(q, tmp, &to_clean, pxp.link) {
xe_exec_queue_kill(q);
+
+ /*
+ * We hold a ref to the queue so there is no risk of racing with
+ * the calls to exec_queue_remove coming from exec_queue_destroy.
+ */
+ __pxp_exec_queue_remove(pxp, q, false);
+
xe_exec_queue_put(q);
}
-
- spin_unlock_irq(&pxp->queues.lock);
}
/**
@@ -816,6 +821,7 @@ int xe_pxp_obj_key_check(struct xe_pxp *pxp, struct drm_gem_object *obj)
*/
int xe_pxp_pm_suspend(struct xe_pxp *pxp)
{
+ bool needs_queue_inval = false;
int ret = 0;
if (!xe_pxp_is_enabled(pxp))
@@ -848,7 +854,8 @@ wait_for_activation:
break;
fallthrough;
case XE_PXP_ACTIVE:
- pxp_invalidate_state(pxp);
+ pxp->key_instance++;
+ needs_queue_inval = true;
break;
default:
drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u",
@@ -865,6 +872,9 @@ wait_for_activation:
mutex_unlock(&pxp->mutex);
+ if (needs_queue_inval)
+ pxp_invalidate_queues(pxp);
+
/*
* if there is a termination in progress, wait for it.
* We need to wait outside the lock because the completion is done from
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index ebfae746f861..781dd21682e5 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -16,6 +16,7 @@
#include "regs/xe_gt_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
+#include "xe_eu_stall.h"
#include "xe_exec_queue.h"
#include "xe_force_wake.h"
#include "xe_ggtt.h"
@@ -729,6 +730,47 @@ static int query_pxp_status(struct xe_device *xe, struct drm_xe_device_query *qu
return 0;
}
+static int query_eu_stall(struct xe_device *xe,
+ struct drm_xe_device_query *query)
+{
+ void __user *query_ptr = u64_to_user_ptr(query->data);
+ struct drm_xe_query_eu_stall *info;
+ size_t size, array_size;
+ const u64 *rates;
+ u32 num_rates;
+ int ret;
+
+ if (!xe_eu_stall_supported_on_platform(xe)) {
+ drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
+ return -ENODEV;
+ }
+
+ array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates);
+ size = sizeof(struct drm_xe_query_eu_stall) + array_size;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_DBG(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ info = kzalloc(size, GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->num_sampling_rates = num_rates;
+ info->capabilities = DRM_XE_EU_STALL_CAPS_BASE;
+ info->record_size = xe_eu_stall_data_record_size(xe);
+ info->per_xecore_buf_size = xe_eu_stall_get_per_xecore_buf_size();
+ memcpy(info->sampling_rates, rates, array_size);
+
+ ret = copy_to_user(query_ptr, info, size);
+ kfree(info);
+
+ return ret ? -EFAULT : 0;
+}
+
static int (* const xe_query_funcs[])(struct xe_device *xe,
struct drm_xe_device_query *query) = {
query_engines,
@@ -741,6 +783,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
query_uc_fw_version,
query_oa_units,
query_pxp_status,
+ query_eu_stall,
};
int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 0c230ee53bba..d2f604aa96fa 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -177,6 +177,10 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
u32 flags;
+ if (XE_WA(gt, 14016712196))
+ i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH,
+ LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
+
flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 02b4eadf8407..d939ce70e6fa 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -127,40 +127,55 @@ static ssize_t survivability_mode_show(struct device *dev,
static DEVICE_ATTR_ADMIN_RO(survivability_mode);
-static void enable_survivability_mode(struct pci_dev *pdev)
+static void xe_survivability_mode_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct device *dev = &pdev->dev;
+
+ sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
+}
+
+static int enable_survivability_mode(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
int ret = 0;
- /* set survivability mode */
- survivability->mode = true;
- dev_info(dev, "In Survivability Mode\n");
-
/* create survivability mode sysfs */
ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
if (ret) {
dev_warn(dev, "Failed to create survivability sysfs files\n");
- return;
+ return ret;
}
- xe_heci_gsc_init(xe);
+ ret = devm_add_action_or_reset(xe->drm.dev,
+ xe_survivability_mode_fini, xe);
+ if (ret)
+ return ret;
+
+ ret = xe_heci_gsc_init(xe);
+ if (ret)
+ return ret;
xe_vsec_init(xe);
+
+ survivability->mode = true;
+ dev_err(dev, "In Survivability Mode\n");
+
+ return 0;
}
/**
- * xe_survivability_mode_enabled - check if survivability mode is enabled
+ * xe_survivability_mode_is_enabled - check if survivability mode is enabled
* @xe: xe device instance
*
* Returns true if in survivability mode, false otherwise
*/
-bool xe_survivability_mode_enabled(struct xe_device *xe)
+bool xe_survivability_mode_is_enabled(struct xe_device *xe)
{
- struct xe_survivability *survivability = &xe->survivability;
-
- return survivability->mode;
+ return xe->survivability.mode;
}
/**
@@ -183,35 +198,19 @@ bool xe_survivability_mode_required(struct xe_device *xe)
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
- return (survivability->boot_status == NON_CRITICAL_FAILURE ||
- survivability->boot_status == CRITICAL_FAILURE);
+ return survivability->boot_status == NON_CRITICAL_FAILURE ||
+ survivability->boot_status == CRITICAL_FAILURE;
}
/**
- * xe_survivability_mode_remove - remove survivability mode
+ * xe_survivability_mode_enable - Initialize and enable the survivability mode
* @xe: xe device instance
*
- * clean up sysfs entries of survivability mode
- */
-void xe_survivability_mode_remove(struct xe_device *xe)
-{
- struct xe_survivability *survivability = &xe->survivability;
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- struct device *dev = &pdev->dev;
-
- sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
- xe_heci_gsc_fini(xe);
- kfree(survivability->info);
- pci_set_drvdata(pdev, NULL);
-}
-
-/**
- * xe_survivability_mode_init - Initialize the survivability mode
- * @xe: xe device instance
+ * Initialize survivability information and enable survivability mode
*
- * Initializes survivability information and enables survivability mode
+ * Return: 0 for success, negative error code otherwise.
*/
-void xe_survivability_mode_init(struct xe_device *xe)
+int xe_survivability_mode_enable(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_survivability_info *info;
@@ -219,9 +218,10 @@ void xe_survivability_mode_init(struct xe_device *xe)
survivability->size = MAX_SCRATCH_MMIO;
- info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL);
+ info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
+ GFP_KERNEL);
if (!info)
- return;
+ return -ENOMEM;
survivability->info = info;
@@ -230,9 +230,8 @@ void xe_survivability_mode_init(struct xe_device *xe)
/* Only log debug information and exit if it is a critical failure */
if (survivability->boot_status == CRITICAL_FAILURE) {
log_survivability_info(pdev);
- kfree(survivability->info);
- return;
+ return -ENXIO;
}
- enable_survivability_mode(pdev);
+ return enable_survivability_mode(pdev);
}
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index f530507a22c6..f4df5f9025ce 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -10,9 +10,8 @@
struct xe_device;
-void xe_survivability_mode_init(struct xe_device *xe);
-void xe_survivability_mode_remove(struct xe_device *xe);
-bool xe_survivability_mode_enabled(struct xe_device *xe);
+int xe_survivability_mode_enable(struct xe_device *xe);
+bool xe_survivability_mode_is_enabled(struct xe_device *xe);
bool xe_survivability_mode_required(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index d5281de04d54..b4a3577df70c 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -427,6 +427,36 @@ DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl,
TP_ARGS(xe, caller)
);
+TRACE_EVENT(xe_eu_stall_data_read,
+ TP_PROTO(u8 slice, u8 subslice,
+ u32 read_ptr, u32 write_ptr,
+ size_t read_size, size_t total_size),
+ TP_ARGS(slice, subslice,
+ read_ptr, write_ptr,
+ read_size, total_size),
+
+ TP_STRUCT__entry(__field(u8, slice)
+ __field(u8, subslice)
+ __field(u32, read_ptr)
+ __field(u32, write_ptr)
+ __field(size_t, read_size)
+ __field(size_t, total_size)
+ ),
+
+ TP_fast_assign(__entry->slice = slice;
+ __entry->subslice = subslice;
+ __entry->read_ptr = read_ptr;
+ __entry->write_ptr = write_ptr;
+ __entry->read_size = read_size;
+ __entry->total_size = total_size;
+ ),
+
+ TP_printk("slice: %u subslice: %u read ptr: 0x%x write ptr: 0x%x read size: %zu total read size: %zu",
+ __entry->slice, __entry->subslice,
+ __entry->read_ptr, __entry->write_ptr,
+ __entry->read_size, __entry->total_size)
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_trace_guc.h b/drivers/gpu/drm/xe/xe_trace_guc.h
index 23abdd55dc62..78949db9cfce 100644
--- a/drivers/gpu/drm/xe/xe_trace_guc.h
+++ b/drivers/gpu/drm/xe/xe_trace_guc.h
@@ -14,6 +14,7 @@
#include "xe_device_types.h"
#include "xe_guc_exec_queue_types.h"
+#include "xe_guc_engine_activity_types.h"
#define __dev_name_xe(xe) dev_name((xe)->drm.dev)
@@ -100,6 +101,54 @@ DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h,
);
+TRACE_EVENT(xe_guc_engine_activity,
+ TP_PROTO(struct xe_device *xe, struct engine_activity *ea, const char *name,
+ u16 instance),
+ TP_ARGS(xe, ea, name, instance),
+
+ TP_STRUCT__entry(
+ __string(dev, __dev_name_xe(xe))
+ __string(name, name)
+ __field(u32, global_change_num)
+ __field(u32, guc_tsc_frequency_hz)
+ __field(u32, lag_latency_usec)
+ __field(u16, instance)
+ __field(u16, change_num)
+ __field(u16, quanta_ratio)
+ __field(u32, last_update_tick)
+ __field(u64, active_ticks)
+ __field(u64, active)
+ __field(u64, total)
+ __field(u64, quanta)
+ __field(u64, last_cpu_ts)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev);
+ __assign_str(name);
+ __entry->global_change_num = ea->metadata.global_change_num;
+ __entry->guc_tsc_frequency_hz = ea->metadata.guc_tsc_frequency_hz;
+ __entry->lag_latency_usec = ea->metadata.lag_latency_usec;
+ __entry->instance = instance;
+ __entry->change_num = ea->activity.change_num;
+ __entry->quanta_ratio = ea->activity.quanta_ratio;
+ __entry->last_update_tick = ea->activity.last_update_tick;
+ __entry->active_ticks = ea->activity.active_ticks;
+ __entry->active = ea->active;
+ __entry->total = ea->total;
+ __entry->quanta = ea->quanta;
+ __entry->last_cpu_ts = ea->last_cpu_ts;
+ ),
+
+ TP_printk("dev=%s engine %s:%d Active=%llu, quanta=%llu, last_cpu_ts=%llu\n"
+ "Activity metadata: global_change_num=%u, guc_tsc_frequency_hz=%u lag_latency_usec=%u\n"
+ "Activity data: change_num=%u, quanta_ratio=0x%x, last_update_tick=%u, active_ticks=%llu\n",
+ __get_str(dev), __get_str(name), __entry->instance,
+ (__entry->active + __entry->total), __entry->quanta, __entry->last_cpu_ts,
+ __entry->global_change_num, __entry->guc_tsc_frequency_hz,
+ __entry->lag_latency_usec, __entry->change_num, __entry->quanta_ratio,
+ __entry->last_update_tick, __entry->active_ticks)
+);
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index d8167e818280..c14bd2282044 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -14,6 +14,7 @@
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_guc_pc.h"
+#include "xe_guc_engine_activity.h"
#include "xe_huc.h"
#include "xe_sriov.h"
#include "xe_uc_fw.h"
@@ -210,6 +211,8 @@ int xe_uc_init_hw(struct xe_uc *uc)
if (ret)
return ret;
+ xe_guc_engine_activity_enable_stats(&uc->guc);
+
/* We don't fail the driver load if HuC fails to auth, but let's warn */
ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
xe_gt_assert(uc_to_gt(uc), !ret);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d664f2e418b2..996000f2424e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -660,27 +660,39 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
{
struct xe_userptr_vma *uvma, *next;
int err = 0;
- LIST_HEAD(tmp_evict);
xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
lockdep_assert_held_write(&vm->lock);
/* Collect invalidated userptrs */
spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
userptr.invalidate_link) {
list_del_init(&uvma->userptr.invalidate_link);
- list_move_tail(&uvma->userptr.repin_link,
- &vm->userptr.repin_list);
+ list_add_tail(&uvma->userptr.repin_link,
+ &vm->userptr.repin_list);
}
spin_unlock(&vm->userptr.invalidated_lock);
- /* Pin and move to temporary list */
+ /* Pin and move to bind list */
list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
userptr.repin_link) {
err = xe_vma_userptr_pin_pages(uvma);
if (err == -EFAULT) {
list_del_init(&uvma->userptr.repin_link);
+ /*
+ * We might have already done the pin once already, but
+ * then had to retry before the re-bind happened, due
+ * some other condition in the caller, but in the
+ * meantime the userptr got dinged by the notifier such
+ * that we need to revalidate here, but this time we hit
+ * the EFAULT. In such a case make sure we remove
+ * ourselves from the rebind list to avoid going down in
+ * flames.
+ */
+ if (!list_empty(&uvma->vma.combined_links.rebind))
+ list_del_init(&uvma->vma.combined_links.rebind);
/* Wait for pending binds */
xe_vm_lock(vm, false);
@@ -691,10 +703,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
err = xe_vm_invalidate_vma(&uvma->vma);
xe_vm_unlock(vm);
if (err)
- return err;
+ break;
} else {
- if (err < 0)
- return err;
+ if (err)
+ break;
list_del_init(&uvma->userptr.repin_link);
list_move_tail(&uvma->vma.combined_links.rebind,
@@ -702,7 +714,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
}
}
- return 0;
+ if (err) {
+ down_write(&vm->userptr.notifier_lock);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+ up_write(&vm->userptr.notifier_lock);
+ }
+ return err;
}
/**
@@ -1067,6 +1091,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
spin_unlock(&vm->userptr.invalidated_lock);
} else if (!xe_vma_is_null(vma)) {
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d4982799383c..b797c863bbe5 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -619,6 +619,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE))
},
+ { XE_RTP_NAME("13012615864"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 228436532282..e0c5fa460487 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -5,6 +5,7 @@
22011391025 PLATFORM(DG2)
22012727170 SUBPLATFORM(DG2, G11)
22012727685 SUBPLATFORM(DG2, G11)
+22016596838 PLATFORM(PVC)
18020744125 PLATFORM(PVC)
1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
1409600907 GRAPHICS_VERSION_RANGE(1200, 1250)
@@ -43,3 +44,12 @@
no_media_l3 MEDIA_VERSION(3000)
14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
+16021333562 GRAPHICS_VERSION_RANGE(1200, 1274)
+ MEDIA_VERSION(1300)
+14016712196 GRAPHICS_VERSION(1255)
+ GRAPHICS_VERSION_RANGE(1270, 1274)
+14015568240 GRAPHICS_VERSION_RANGE(1255, 1260)
+18013179988 GRAPHICS_VERSION(1255)
+ GRAPHICS_VERSION_RANGE(1270, 1274)
+1508761755 GRAPHICS_VERSION(1255)
+ GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)