summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-xe-sriov159
-rw-r--r--Documentation/gpu/xe/xe_gt_freq.rst3
-rw-r--r--drivers/gpu/drm/xe/Makefile4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h13
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h1
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c1
-rw-r--r--drivers/gpu/drm/xe/xe_device.c23
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h11
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c14
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c122
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h23
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h17
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c44
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c30
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c679
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c200
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c321
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h65
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.c95
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.h15
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c14
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h8
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c45
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.c445
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.h19
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault_types.h136
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c15
-rw-r--r--drivers/gpu/drm/xe/xe_pat.h5
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c41
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c80
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c7
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c19
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h1
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.c5
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.c284
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.h14
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c647
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_types.h11
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c3
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c91
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sync_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.c31
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.h5
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h23
-rw-r--r--drivers/gpu/drm/xe/xe_validation.h8
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c101
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c2
55 files changed, 2719 insertions, 1256 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
new file mode 100644
index 000000000000..2fd7e9b7bacc
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
@@ -0,0 +1,159 @@
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ This directory appears for the particular Intel Xe device when:
+
+ - device supports SR-IOV, and
+ - device is a Physical Function (PF), and
+ - driver support for the SR-IOV PF is enabled on given device.
+
+ This directory is used as a root for all attributes required to
+ manage both Physical Function (PF) and Virtual Functions (VFs).
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ This directory holds attributes related to the SR-IOV Physical
+ Function (PF).
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf1/
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf2/
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<N>/
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These directories hold attributes related to the SR-IOV Virtual
+ Functions (VFs).
+
+ Note that the VF number <N> is 1-based as described in PCI SR-IOV
+ specification as the Xe driver follows that naming schema.
+
+ There could be "vf1", "vf2" and so on, up to "vf<N>", where <N>
+ matches the value of the "sriov_totalvfs" attribute.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/exec_quantum_ms
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/preempt_timeout_us
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/sched_priority
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/exec_quantum_ms
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/preempt_timeout_us
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/sched_priority
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These files expose scheduling parameters for the PF and its VFs, and
+ are visible only on Intel Xe platforms that use time-sliced GPU sharing.
+ They can be changed even if VFs are enabled and running and reflect the
+ settings of all tiles/GTs assigned to the given function.
+
+ exec_quantum_ms: (RW) unsigned integer
+ The GT execution quantum (EQ) in [ms] for the given function.
+ Actual quantum value might be aligned per HW/FW requirements.
+
+ Default is 0 (unlimited).
+
+ preempt_timeout_us: (RW) unsigned integer
+ The GT preemption timeout in [us] of the given function.
+ Actual timeout value might be aligned per HW/FW requirements.
+
+ Default is 0 (unlimited).
+
+ sched_priority: (RW/RO) string
+ The GT scheduling priority of the given function.
+
+ "low" - function will be scheduled on the GPU for its EQ/PT
+ only if function has any work already submitted.
+
+ "normal" - functions will be scheduled on the GPU for its EQ/PT
+ irrespective of whether it has submitted a work or not.
+
+ "high" - function will be scheduled on the GPU for its EQ/PT
+ in the next time-slice after the current one completes
+ and function has a work submitted.
+
+ Default is "low".
+
+ When read, this file will display the current and available
+ scheduling priorities. The currently active priority level will
+ be enclosed in square brackets, like:
+
+ [low] normal high
+
+ This file can be read-only if changing the priority is not
+ supported.
+
+ Writes to these attributes may fail with errors like:
+ -EINVAL if provided input is malformed or not recognized,
+ -EPERM if change is not applicable on given HW/FW,
+ -EIO if FW refuses to change the provisioning.
+
+ Reads from these attributes may fail with:
+ -EUCLEAN if value is not consistent across all tiles/GTs.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/exec_quantum_ms
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/preempt_timeout_us
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/sched_priority
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These files allows bulk reconfiguration of the scheduling parameters
+ of the PF or VFs and are available only for Intel Xe platforms with
+ GPU sharing based on the time-slice basis. These scheduling parameters
+ can be changed even if VFs are enabled and running.
+
+ exec_quantum_ms: (WO) unsigned integer
+ The GT execution quantum (EQ) in [ms] to be applied to all functions.
+ See sriov_admin/{pf,vf<N>}/profile/exec_quantum_ms for more details.
+
+ preempt_timeout_us: (WO) unsigned integer
+ The GT preemption timeout (PT) in [us] to be applied to all functions.
+ See sriov_admin/{pf,vf<N>}/profile/preempt_timeout_us for more details.
+
+ sched_priority: (RW/RO) string
+ The GT scheduling priority to be applied for all functions.
+ See sriov_admin/{pf,vf<N>}/profile/sched_priority for more details.
+
+ Writes to these attributes may fail with errors like:
+ -EINVAL if provided input is malformed or not recognized,
+ -EPERM if change is not applicable on given HW/FW,
+ -EIO if FW refuses to change the provisioning.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/stop
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ This file allows to control scheduling of the VF on the Intel Xe GPU
+ platforms. It allows to implement custom policy mechanism in case VFs
+ are misbehaving or triggering adverse events above defined thresholds.
+
+ stop: (WO) bool
+ All GT executions of given function shall be immediately stopped.
+ To allow scheduling this VF again, the VF FLR must be triggered.
+
+ Writes to this attribute may fail with errors like:
+ -EINVAL if provided input is malformed or not recognized,
+ -EPERM if change is not applicable on given HW/FW,
+ -EIO if FW refuses to change the scheduling.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/device
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/device
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These are symlinks to the underlying PCI device entry representing
+ given Xe SR-IOV function. For the PF, this link is always present.
+ For VFs, this link is present only for currently enabled VFs.
diff --git a/Documentation/gpu/xe/xe_gt_freq.rst b/Documentation/gpu/xe/xe_gt_freq.rst
index c0811200e327..182d6aabeee1 100644
--- a/Documentation/gpu/xe/xe_gt_freq.rst
+++ b/Documentation/gpu/xe/xe_gt_freq.rst
@@ -7,6 +7,9 @@ Xe GT Frequency Management
.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c
:doc: Xe GT Frequency Management
+.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_throttle.c
+ :doc: Xe GT Throttle
+
Internal API
============
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index f8a3a1bfe42e..7b4ca591a4ae 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -58,7 +58,6 @@ xe-y += xe_bb.o \
xe_gt_freq.o \
xe_gt_idle.o \
xe_gt_mcr.o \
- xe_gt_pagefault.o \
xe_gt_sysfs.o \
xe_gt_throttle.o \
xe_gt_topology.o \
@@ -73,6 +72,7 @@ xe-y += xe_bb.o \
xe_guc_id_mgr.o \
xe_guc_klv_helpers.o \
xe_guc_log.o \
+ xe_guc_pagefault.o \
xe_guc_pc.o \
xe_guc_submit.o \
xe_guc_tlb_inval.o \
@@ -94,6 +94,7 @@ xe-y += xe_bb.o \
xe_nvm.o \
xe_oa.o \
xe_observation.o \
+ xe_pagefault.o \
xe_pat.o \
xe_pci.o \
xe_pcode.o \
@@ -178,6 +179,7 @@ xe-$(CONFIG_PCI_IOV) += \
xe_sriov_pf_debugfs.o \
xe_sriov_pf_provision.o \
xe_sriov_pf_service.o \
+ xe_sriov_pf_sysfs.o \
xe_tile_sriov_pf_debugfs.o
# include helpers for tests even when XE is built-in
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index a895a8e801a9..2088256ad381 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -590,6 +590,7 @@
#define GT_GFX_RC6 XE_REG(0x138108)
#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8)
+/* Common performance limit reason bits - available on all platforms */
#define GT0_PERF_LIMIT_REASONS_MASK 0xde3
#define PROCHOT_MASK REG_BIT(0)
#define THERMAL_LIMIT_MASK REG_BIT(1)
@@ -599,6 +600,18 @@
#define POWER_LIMIT_4_MASK REG_BIT(8)
#define POWER_LIMIT_1_MASK REG_BIT(10)
#define POWER_LIMIT_2_MASK REG_BIT(11)
+/* Platform-specific performance limit reason bits - for Crescent Island */
+#define CRI_PERF_LIMIT_REASONS_MASK 0xfdff
+#define SOC_THERMAL_LIMIT_MASK REG_BIT(1)
+#define MEM_THERMAL_MASK REG_BIT(2)
+#define VR_THERMAL_MASK REG_BIT(3)
+#define ICCMAX_MASK REG_BIT(4)
+#define SOC_AVG_THERMAL_MASK REG_BIT(6)
+#define FASTVMODE_MASK REG_BIT(7)
+#define PSYS_PL1_MASK REG_BIT(12)
+#define PSYS_PL2_MASK REG_BIT(13)
+#define P0_FREQ_MASK REG_BIT(14)
+#define PSYS_CRIT_MASK REG_BIT(15)
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h
index 264e9baf949c..0f79c0714454 100644
--- a/drivers/gpu/drm/xe/regs/xe_pmt.h
+++ b/drivers/gpu/drm/xe/regs/xe_pmt.h
@@ -24,6 +24,7 @@
#define BMG_MODS_RESIDENCY_OFFSET (0x4D0)
#define BMG_G2_RESIDENCY_OFFSET (0x530)
#define BMG_G6_RESIDENCY_OFFSET (0x538)
+#define BMG_G7_RESIDENCY_OFFSET (0x4B0)
#define BMG_G8_RESIDENCY_OFFSET (0x540)
#define BMG_G10_RESIDENCY_OFFSET (0x548)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 1c3c9557a9bd..e91da9589c5f 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -142,6 +142,7 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data)
} residencies[] = {
{BMG_G2_RESIDENCY_OFFSET, "Package G2"},
{BMG_G6_RESIDENCY_OFFSET, "Package G6"},
+ {BMG_G7_RESIDENCY_OFFSET, "Package G7"},
{BMG_G8_RESIDENCY_OFFSET, "Package G8"},
{BMG_G10_RESIDENCY_OFFSET, "Package G10"},
{BMG_MODS_RESIDENCY_OFFSET, "Package ModS"}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 86d5960476af..c7d373c70f0f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -52,6 +52,7 @@
#include "xe_nvm.h"
#include "xe_oa.h"
#include "xe_observation.h"
+#include "xe_pagefault.h"
#include "xe_pat.h"
#include "xe_pcode.h"
#include "xe_pm.h"
@@ -896,6 +897,10 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
+ err = xe_pagefault_init(xe);
+ if (err)
+ return err;
+
if (xe->tiles->media_gt &&
XE_GT_WA(xe->tiles->media_gt, 15015404425_disable))
XE_DEVICE_WA_DISABLE(xe, 15015404425);
@@ -988,21 +993,21 @@ void xe_device_remove(struct xe_device *xe)
void xe_device_shutdown(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ u8 id;
+
drm_dbg(&xe->drm, "Shutting down device\n");
- if (xe_driver_flr_disabled(xe)) {
- struct xe_gt *gt;
- u8 id;
+ xe_display_pm_shutdown(xe);
- xe_display_pm_shutdown(xe);
+ xe_irq_suspend(xe);
- xe_irq_suspend(xe);
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
- for_each_gt(gt, xe, id)
- xe_gt_shutdown(gt);
+ xe_display_pm_shutdown_late(xe);
- xe_display_pm_shutdown_late(xe);
- } else {
+ if (!xe_driver_flr_disabled(xe)) {
/* BOOM! */
__xe_driver_flr(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index af0ce275b032..7baf15f51575 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -18,6 +18,7 @@
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa_types.h"
+#include "xe_pagefault_types.h"
#include "xe_platform_types.h"
#include "xe_pmu_types.h"
#include "xe_pt_types.h"
@@ -418,6 +419,16 @@ struct xe_device {
u32 next_asid;
/** @usm.lock: protects UM state */
struct rw_semaphore lock;
+ /** @usm.pf_wq: page fault work queue, unbound, high priority */
+ struct workqueue_struct *pf_wq;
+ /*
+ * We pick 4 here because, in the current implementation, it
+ * yields the best bandwidth utilization of the kernel paging
+ * engine.
+ */
+#define XE_PAGEFAULT_QUEUE_COUNT 4
+ /** @usm.pf_queue: Page fault queues */
+ struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
} usm;
/** @pinned: pinned BO state */
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 521467d976f7..4d81210e41f5 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -21,6 +21,7 @@
#include "xe_sched_job.h"
#include "xe_sync.h"
#include "xe_svm.h"
+#include "xe_trace.h"
#include "xe_vm.h"
/**
@@ -154,6 +155,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_exec_queue;
}
+ if (atomic_read(&q->job_cnt) >= XE_MAX_JOB_COUNT_PER_EXEC_QUEUE) {
+ trace_xe_exec_queue_reach_max_job_count(q, XE_MAX_JOB_COUNT_PER_EXEC_QUEUE);
+ err = -EAGAIN;
+ goto err_exec_queue;
+ }
+
if (args->num_syncs) {
syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
if (!syncs) {
@@ -166,7 +173,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
- &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC |
+ &syncs_user[num_syncs], NULL, 0,
+ SYNC_PARSE_FLAG_EXEC |
(xe_vm_in_lr_mode(vm) ?
SYNC_PARSE_FLAG_LR_MODE : 0));
if (err)
@@ -294,10 +302,6 @@ retry:
goto err_put_job;
if (!xe_vm_in_lr_mode(vm)) {
- err = xe_sched_job_last_fence_add_dep(job, vm);
- if (err)
- goto err_put_job;
-
err = xe_svm_notifier_lock_interruptible(vm);
if (err)
goto err_put_job;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 90cbc95f8e2e..8724f8de67e2 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -10,6 +10,7 @@
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
#include "xe_dep_scheduler.h"
@@ -368,6 +369,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
}
xe_vm_put(migrate_vm);
+ if (!IS_ERR(q)) {
+ int err = drm_syncobj_create(&q->ufence_syncobj,
+ DRM_SYNCOBJ_CREATE_SIGNALED,
+ NULL);
+ if (err) {
+ xe_exec_queue_put(q);
+ return ERR_PTR(err);
+ }
+ }
+
return q;
}
ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
@@ -376,11 +387,20 @@ void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
struct xe_exec_queue *eq, *next;
+ int i;
+
+ xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
+
+ if (q->ufence_syncobj)
+ drm_syncobj_put(q->ufence_syncobj);
if (xe_exec_queue_uses_pxp(q))
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
xe_exec_queue_last_fence_put_unlocked(q);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
+
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(eq, next, &q->multi_gt_list,
multi_gt_link)
@@ -998,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
- if (q->flags & EXEC_QUEUE_FLAG_VM) {
+ if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
+ xe_migrate_job_lock_assert(q);
+ } else if (q->flags & EXEC_QUEUE_FLAG_VM) {
lockdep_assert_held(&vm->lock);
} else {
xe_vm_assert_held(vm);
@@ -1097,32 +1119,104 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
struct dma_fence *fence)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
}
/**
- * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue
+ * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
* @q: The exec queue
- * @vm: The VM the engine does a bind or exec for
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ */
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
+ * invalidation fence unlocked
+ * @q: The exec queue
+ * @type: Either primary or media GT
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type)
+{
+ xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ dma_fence_put(q->tlb_inval[type].last_fence);
+ q->tlb_inval[type].last_fence = NULL;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ *
+ * Get last fence, takes a ref
*
- * Returns:
- * -ETIME if there exists an unsignalled last fence dependency, zero otherwise.
+ * Returns: last fence if not signaled, dma fence stub if signaled
*/
-int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
{
struct dma_fence *fence;
- int err = 0;
- fence = xe_exec_queue_last_fence_get(q, vm);
- if (fence) {
- err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ?
- 0 : -ETIME;
- dma_fence_put(fence);
- }
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
- return err;
+ if (q->tlb_inval[type].last_fence &&
+ test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &q->tlb_inval[type].last_fence->flags))
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+
+ fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @fence: The fence
+ * @type: Either primary or media GT
+ *
+ * Set the last fence for the tlb invalidation type on the queue. Increases
+ * reference count for fence, when closing queue
+ * xe_exec_queue_tlb_inval_last_fence_put should be called.
+ */
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
+
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+ q->tlb_inval[type].last_fence = dma_fence_get(fence);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index a4dfbe858bda..fda4d4f9bda8 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -14,6 +14,10 @@ struct drm_file;
struct xe_device;
struct xe_file;
+#define for_each_tlb_inval(__i) \
+ for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \
+ __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i)
+
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hw_engine, u32 flags,
@@ -84,8 +88,23 @@ struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *
struct xe_vm *vm);
void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
-int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
- struct xe_vm *vm);
+
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type);
+
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type);
+
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type);
+
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type);
+
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 282505fa1377..771ffe35cd0c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -15,6 +15,7 @@
#include "xe_hw_fence_types.h"
#include "xe_lrc_types.h"
+struct drm_syncobj;
struct xe_execlist_exec_queue;
struct xe_gt;
struct xe_guc_exec_queue;
@@ -145,6 +146,11 @@ struct xe_exec_queue {
* dependency scheduler
*/
struct xe_dep_scheduler *dep_scheduler;
+ /**
+ * @last_fence: last fence for tlb invalidation, protected by
+ * vm->lock in write mode
+ */
+ struct dma_fence *last_fence;
} tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
/** @pxp: PXP info tracking */
@@ -155,6 +161,12 @@ struct xe_exec_queue {
struct list_head link;
} pxp;
+ /** @ufence_syncobj: User fence syncobj */
+ struct drm_syncobj *ufence_syncobj;
+
+ /** @ufence_timeline_value: User fence timeline value */
+ u64 ufence_timeline_value;
+
/** @ops: submission backend exec queue operations */
const struct xe_exec_queue_ops *ops;
@@ -162,6 +174,11 @@ struct xe_exec_queue {
const struct xe_ring_ops *ring_ops;
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
struct drm_sched_entity *entity;
+
+#define XE_MAX_JOB_COUNT_PER_EXEC_QUEUE 1000
+ /** @job_cnt: number of drm jobs in this exec queue */
+ atomic_t job_cnt;
+
/**
* @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
* Protected by @vm's resv. Unused if @vm == NULL.
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 89808b33d0a8..6d479948bf21 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -32,7 +32,6 @@
#include "xe_gt_freq.h"
#include "xe_gt_idle.h"
#include "xe_gt_mcr.h"
-#include "xe_gt_pagefault.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_vf.h"
@@ -49,6 +48,7 @@
#include "xe_map.h"
#include "xe_migrate.h"
#include "xe_mmio.h"
+#include "xe_pagefault.h"
#include "xe_pat.h"
#include "xe_pm.h"
#include "xe_mocs.h"
@@ -607,6 +607,13 @@ static void xe_gt_fini(void *arg)
struct xe_gt *gt = arg;
int i;
+ if (disable_work_sync(&gt->reset.worker))
+ /*
+ * If gt_reset_worker was halted from executing, take care of
+ * releasing the rpm reference here.
+ */
+ xe_pm_runtime_put(gt_to_xe(gt));
+
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -637,10 +644,6 @@ int xe_gt_init(struct xe_gt *gt)
if (err)
return err;
- err = xe_gt_pagefault_init(gt);
- if (err)
- return err;
-
err = xe_gt_idle_init(&gt->gtidle);
if (err)
return err;
@@ -813,21 +816,18 @@ static int do_gt_restart(struct xe_gt *gt)
return 0;
}
-static int gt_reset(struct xe_gt *gt)
+static void gt_reset_worker(struct work_struct *w)
{
+ struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
unsigned int fw_ref;
int err;
- if (xe_device_wedged(gt_to_xe(gt))) {
- err = -ECANCELED;
+ if (xe_device_wedged(gt_to_xe(gt)))
goto err_pm_put;
- }
/* We only support GT resets with GuC submission */
- if (!xe_device_uc_enabled(gt_to_xe(gt))) {
- err = -ENODEV;
+ if (!xe_device_uc_enabled(gt_to_xe(gt)))
goto err_pm_put;
- }
xe_gt_info(gt, "reset started\n");
@@ -849,7 +849,7 @@ static int gt_reset(struct xe_gt *gt)
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
- xe_gt_pagefault_reset(gt);
+ xe_pagefault_reset(gt_to_xe(gt), gt);
xe_uc_stop(&gt->uc);
@@ -864,30 +864,24 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ /* Pair with get while enqueueing the work in xe_gt_reset_async() */
xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
- return 0;
+ return;
err_out:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
XE_WARN_ON(xe_uc_start(&gt->uc));
+
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
-
xe_device_declare_wedged(gt_to_xe(gt));
+
err_pm_put:
xe_pm_runtime_put(gt_to_xe(gt));
-
- return err;
-}
-
-static void gt_reset_worker(struct work_struct *w)
-{
- struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
-
- gt_reset(gt);
}
void xe_gt_reset_async(struct xe_gt *gt)
@@ -899,6 +893,8 @@ void xe_gt_reset_async(struct xe_gt *gt)
return;
xe_gt_info(gt, "reset queued\n");
+
+ /* Pair with put in gt_reset_worker() if work is enqueued */
xe_pm_runtime_get_noresume(gt_to_xe(gt));
if (!queue_work(gt->ordered_wq, &gt->reset.worker))
xe_pm_runtime_put(gt_to_xe(gt));
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index e88f113226bc..849ea6c86e8e 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -29,24 +29,26 @@
* PCODE is the ultimate decision maker of the actual running frequency, based
* on thermal and other running conditions.
*
- * Xe's Freq provides a sysfs API for frequency management:
+ * Xe's Freq provides a sysfs API for frequency management under
+ * ``<device>/tile#/gt#/freq0/`` directory.
*
- * device/tile#/gt#/freq0/<item>_freq *read-only* files:
+ * **Read-only** attributes:
*
- * - act_freq: The actual resolved frequency decided by PCODE.
- * - cur_freq: The current one requested by GuC PC to the PCODE.
- * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
- * - rpa_freq: The Render Performance (RP) A level, which is the achievable one.
- * Calculated by PCODE at runtime based on multiple running conditions
- * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
- * Calculated by PCODE at runtime based on multiple running conditions
- * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
+ * - ``act_freq``: The actual resolved frequency decided by PCODE.
+ * - ``cur_freq``: The current one requested by GuC PC to the PCODE.
+ * - ``rpn_freq``: The Render Performance (RP) N level, which is the minimal one.
+ * - ``rpa_freq``: The Render Performance (RP) A level, which is the achievable one.
+ * Calculated by PCODE at runtime based on multiple running conditions
+ * - ``rpe_freq``: The Render Performance (RP) E level, which is the efficient one.
+ * Calculated by PCODE at runtime based on multiple running conditions
+ * - ``rp0_freq``: The Render Performance (RP) 0 level, which is the maximum one.
*
- * device/tile#/gt#/freq0/<item>_freq *read-write* files:
+ * **Read-write** attributes:
*
- * - min_freq: Min frequency request.
- * - max_freq: Max frequency request.
- * If max <= min, then freq_min becomes a fixed frequency request.
+ * - ``min_freq``: Min frequency request.
+ * - ``max_freq``: Max frequency request.
+ * If max <= min, then freq_min becomes a fixed frequency
+ * request.
*/
static struct xe_guc_pc *
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
deleted file mode 100644
index a054d6010ae0..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ /dev/null
@@ -1,679 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include "xe_gt_pagefault.h"
-
-#include <linux/bitfield.h>
-#include <linux/circ_buf.h>
-
-#include <drm/drm_exec.h>
-#include <drm/drm_managed.h>
-
-#include "abi/guc_actions_abi.h"
-#include "xe_bo.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_gt_stats.h"
-#include "xe_guc.h"
-#include "xe_guc_ct.h"
-#include "xe_migrate.h"
-#include "xe_svm.h"
-#include "xe_trace_bo.h"
-#include "xe_vm.h"
-#include "xe_vram_types.h"
-
-struct pagefault {
- u64 page_addr;
- u32 asid;
- u16 pdata;
- u8 vfid;
- u8 access_type;
- u8 fault_type;
- u8 fault_level;
- u8 engine_class;
- u8 engine_instance;
- u8 fault_unsuccessful;
- bool trva_fault;
-};
-
-enum access_type {
- ACCESS_TYPE_READ = 0,
- ACCESS_TYPE_WRITE = 1,
- ACCESS_TYPE_ATOMIC = 2,
- ACCESS_TYPE_RESERVED = 3,
-};
-
-enum fault_type {
- NOT_PRESENT = 0,
- WRITE_ACCESS_VIOLATION = 1,
- ATOMIC_ACCESS_VIOLATION = 2,
-};
-
-struct acc {
- u64 va_range_base;
- u32 asid;
- u32 sub_granularity;
- u8 granularity;
- u8 vfid;
- u8 access_type;
- u8 engine_class;
- u8 engine_instance;
-};
-
-static bool access_is_atomic(enum access_type access_type)
-{
- return access_type == ACCESS_TYPE_ATOMIC;
-}
-
-static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
-{
- return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present,
- vma->tile_invalidated);
-}
-
-static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
- bool need_vram_move, struct xe_vram_region *vram)
-{
- struct xe_bo *bo = xe_vma_bo(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- int err;
-
- err = xe_vm_lock_vma(exec, vma);
- if (err)
- return err;
-
- if (!bo)
- return 0;
-
- return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) :
- xe_bo_validate(bo, vm, true, exec);
-}
-
-static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
- bool atomic)
-{
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_validation_ctx ctx;
- struct drm_exec exec;
- struct dma_fence *fence;
- int err, needs_vram;
-
- lockdep_assert_held_write(&vm->lock);
-
- needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
- if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma)))
- return needs_vram < 0 ? needs_vram : -EACCES;
-
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024);
-
- trace_xe_vma_pagefault(vma);
-
- /* Check if VMA is valid, opportunistic check only */
- if (vma_is_valid(tile, vma) && !atomic)
- return 0;
-
-retry_userptr:
- if (xe_vma_is_userptr(vma) &&
- xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
- struct xe_userptr_vma *uvma = to_userptr_vma(vma);
-
- err = xe_vma_userptr_pin_pages(uvma);
- if (err)
- return err;
- }
-
- /* Lock VM and BOs dma-resv */
- xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
- drm_exec_until_all_locked(&exec) {
- err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram);
- drm_exec_retry_on_contention(&exec);
- xe_validation_retry_on_oom(&ctx, &err);
- if (err)
- goto unlock_dma_resv;
-
- /* Bind VMA only to the GT that has faulted */
- trace_xe_vma_pf_bind(vma);
- xe_vm_set_validation_exec(vm, &exec);
- fence = xe_vma_rebind(vm, vma, BIT(tile->id));
- xe_vm_set_validation_exec(vm, NULL);
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- xe_validation_retry_on_oom(&ctx, &err);
- goto unlock_dma_resv;
- }
- }
-
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
-
-unlock_dma_resv:
- xe_validation_ctx_fini(&ctx);
- if (err == -EAGAIN)
- goto retry_userptr;
-
- return err;
-}
-
-static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid)
-{
- struct xe_vm *vm;
-
- down_read(&xe->usm.lock);
- vm = xa_load(&xe->usm.asid_to_vm, asid);
- if (vm && xe_vm_in_fault_mode(vm))
- xe_vm_get(vm);
- else
- vm = ERR_PTR(-EINVAL);
- up_read(&xe->usm.lock);
-
- return vm;
-}
-
-static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
-{
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_vm *vm;
- struct xe_vma *vma = NULL;
- int err;
- bool atomic;
-
- /* SW isn't expected to handle TRTT faults */
- if (pf->trva_fault)
- return -EFAULT;
-
- vm = asid_to_vm(xe, pf->asid);
- if (IS_ERR(vm))
- return PTR_ERR(vm);
-
- /*
- * TODO: Change to read lock? Using write lock for simplicity.
- */
- down_write(&vm->lock);
-
- if (xe_vm_is_closed(vm)) {
- err = -ENOENT;
- goto unlock_vm;
- }
-
- vma = xe_vm_find_vma_by_addr(vm, pf->page_addr);
- if (!vma) {
- err = -EINVAL;
- goto unlock_vm;
- }
-
- atomic = access_is_atomic(pf->access_type);
-
- if (xe_vma_is_cpu_addr_mirror(vma))
- err = xe_svm_handle_pagefault(vm, vma, gt,
- pf->page_addr, atomic);
- else
- err = handle_vma_pagefault(gt, vma, atomic);
-
-unlock_vm:
- if (!err)
- vm->usm.last_fault_vma = vma;
- up_write(&vm->lock);
- xe_vm_put(vm);
-
- return err;
-}
-
-static int send_pagefault_reply(struct xe_guc *guc,
- struct xe_guc_pagefault_reply *reply)
-{
- u32 action[] = {
- XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
- reply->dw0,
- reply->dw1,
- };
-
- return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
-}
-
-static void print_pagefault(struct xe_gt *gt, struct pagefault *pf)
-{
- xe_gt_dbg(gt, "\n\tASID: %d\n"
- "\tVFID: %d\n"
- "\tPDATA: 0x%04x\n"
- "\tFaulted Address: 0x%08x%08x\n"
- "\tFaultType: %d\n"
- "\tAccessType: %d\n"
- "\tFaultLevel: %d\n"
- "\tEngineClass: %d %s\n"
- "\tEngineInstance: %d\n",
- pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
- lower_32_bits(pf->page_addr),
- pf->fault_type, pf->access_type, pf->fault_level,
- pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class),
- pf->engine_instance);
-}
-
-#define PF_MSG_LEN_DW 4
-
-static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
-{
- const struct xe_guc_pagefault_desc *desc;
- bool ret = false;
-
- spin_lock_irq(&pf_queue->lock);
- if (pf_queue->tail != pf_queue->head) {
- desc = (const struct xe_guc_pagefault_desc *)
- (pf_queue->data + pf_queue->tail);
-
- pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
- pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
- pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
- pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
- pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
- PFD_PDATA_HI_SHIFT;
- pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
- pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
- pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
- pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
- pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
- pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
- PFD_VIRTUAL_ADDR_HI_SHIFT;
- pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
- PFD_VIRTUAL_ADDR_LO_SHIFT;
-
- pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
- pf_queue->num_dw;
- ret = true;
- }
- spin_unlock_irq(&pf_queue->lock);
-
- return ret;
-}
-
-static bool pf_queue_full(struct pf_queue *pf_queue)
-{
- lockdep_assert_held(&pf_queue->lock);
-
- return CIRC_SPACE(pf_queue->head, pf_queue->tail,
- pf_queue->num_dw) <=
- PF_MSG_LEN_DW;
-}
-
-int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- struct pf_queue *pf_queue;
- unsigned long flags;
- u32 asid;
- bool full;
-
- if (unlikely(len != PF_MSG_LEN_DW))
- return -EPROTO;
-
- asid = FIELD_GET(PFD_ASID, msg[1]);
- pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
-
- /*
- * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
- */
- xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW));
-
- spin_lock_irqsave(&pf_queue->lock, flags);
- full = pf_queue_full(pf_queue);
- if (!full) {
- memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
- pf_queue->head = (pf_queue->head + len) %
- pf_queue->num_dw;
- queue_work(gt->usm.pf_wq, &pf_queue->worker);
- } else {
- xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n");
- }
- spin_unlock_irqrestore(&pf_queue->lock, flags);
-
- return full ? -ENOSPC : 0;
-}
-
-#define USM_QUEUE_MAX_RUNTIME_MS 20
-
-static void pf_queue_work_func(struct work_struct *w)
-{
- struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
- struct xe_gt *gt = pf_queue->gt;
- struct xe_guc_pagefault_reply reply = {};
- struct pagefault pf = {};
- unsigned long threshold;
- int ret;
-
- threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
-
- while (get_pagefault(pf_queue, &pf)) {
- ret = handle_pagefault(gt, &pf);
- if (unlikely(ret)) {
- print_pagefault(gt, &pf);
- pf.fault_unsuccessful = 1;
- xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret));
- }
-
- reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
- FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
- FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
- FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
- FIELD_PREP(PFR_ASID, pf.asid);
-
- reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
- FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
- FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
- FIELD_PREP(PFR_PDATA, pf.pdata);
-
- send_pagefault_reply(&gt->uc.guc, &reply);
-
- if (time_after(jiffies, threshold) &&
- pf_queue->tail != pf_queue->head) {
- queue_work(gt->usm.pf_wq, w);
- break;
- }
- }
-}
-
-static void acc_queue_work_func(struct work_struct *w);
-
-static void pagefault_fini(void *arg)
-{
- struct xe_gt *gt = arg;
- struct xe_device *xe = gt_to_xe(gt);
-
- if (!xe->info.has_usm)
- return;
-
- destroy_workqueue(gt->usm.acc_wq);
- destroy_workqueue(gt->usm.pf_wq);
-}
-
-static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
-{
- struct xe_device *xe = gt_to_xe(gt);
- xe_dss_mask_t all_dss;
- int num_dss, num_eus;
-
- bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
- XE_MAX_DSS_FUSE_BITS);
-
- num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
- num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
- XE_MAX_EU_FUSE_BITS) * num_dss;
-
- /*
- * user can issue separate page faults per EU and per CS
- *
- * XXX: Multiplier required as compute UMD are getting PF queue errors
- * without it. Follow on why this multiplier is required.
- */
-#define PF_MULTIPLIER 8
- pf_queue->num_dw =
- (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
- pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw);
-#undef PF_MULTIPLIER
-
- pf_queue->gt = gt;
- pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw,
- sizeof(u32), GFP_KERNEL);
- if (!pf_queue->data)
- return -ENOMEM;
-
- spin_lock_init(&pf_queue->lock);
- INIT_WORK(&pf_queue->worker, pf_queue_work_func);
-
- return 0;
-}
-
-int xe_gt_pagefault_init(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
- int i, ret = 0;
-
- if (!xe->info.has_usm)
- return 0;
-
- for (i = 0; i < NUM_PF_QUEUE; ++i) {
- ret = xe_alloc_pf_queue(gt, &gt->usm.pf_queue[i]);
- if (ret)
- return ret;
- }
- for (i = 0; i < NUM_ACC_QUEUE; ++i) {
- gt->usm.acc_queue[i].gt = gt;
- spin_lock_init(&gt->usm.acc_queue[i].lock);
- INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
- }
-
- gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
- WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
- if (!gt->usm.pf_wq)
- return -ENOMEM;
-
- gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
- WQ_UNBOUND | WQ_HIGHPRI,
- NUM_ACC_QUEUE);
- if (!gt->usm.acc_wq) {
- destroy_workqueue(gt->usm.pf_wq);
- return -ENOMEM;
- }
-
- return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt);
-}
-
-void xe_gt_pagefault_reset(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
- int i;
-
- if (!xe->info.has_usm)
- return;
-
- for (i = 0; i < NUM_PF_QUEUE; ++i) {
- spin_lock_irq(&gt->usm.pf_queue[i].lock);
- gt->usm.pf_queue[i].head = 0;
- gt->usm.pf_queue[i].tail = 0;
- spin_unlock_irq(&gt->usm.pf_queue[i].lock);
- }
-
- for (i = 0; i < NUM_ACC_QUEUE; ++i) {
- spin_lock(&gt->usm.acc_queue[i].lock);
- gt->usm.acc_queue[i].head = 0;
- gt->usm.acc_queue[i].tail = 0;
- spin_unlock(&gt->usm.acc_queue[i].lock);
- }
-}
-
-static int granularity_in_byte(int val)
-{
- switch (val) {
- case 0:
- return SZ_128K;
- case 1:
- return SZ_2M;
- case 2:
- return SZ_16M;
- case 3:
- return SZ_64M;
- default:
- return 0;
- }
-}
-
-static int sub_granularity_in_byte(int val)
-{
- return (granularity_in_byte(val) / 32);
-}
-
-static void print_acc(struct xe_gt *gt, struct acc *acc)
-{
- xe_gt_warn(gt, "Access counter request:\n"
- "\tType: %s\n"
- "\tASID: %d\n"
- "\tVFID: %d\n"
- "\tEngine: %d:%d\n"
- "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
- "\tSub_Granularity Vector: 0x%08x\n"
- "\tVA Range base: 0x%016llx\n",
- acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
- acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
- granularity_in_byte(acc->granularity) / SZ_1K,
- sub_granularity_in_byte(acc->granularity) / SZ_1K,
- acc->sub_granularity, acc->va_range_base);
-}
-
-static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
-{
- u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
- sub_granularity_in_byte(acc->granularity);
-
- return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
-}
-
-static int handle_acc(struct xe_gt *gt, struct acc *acc)
-{
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_validation_ctx ctx;
- struct drm_exec exec;
- struct xe_vm *vm;
- struct xe_vma *vma;
- int ret = 0;
-
- /* We only support ACC_TRIGGER at the moment */
- if (acc->access_type != ACC_TRIGGER)
- return -EINVAL;
-
- vm = asid_to_vm(xe, acc->asid);
- if (IS_ERR(vm))
- return PTR_ERR(vm);
-
- down_read(&vm->lock);
-
- /* Lookup VMA */
- vma = get_acc_vma(vm, acc);
- if (!vma) {
- ret = -EINVAL;
- goto unlock_vm;
- }
-
- trace_xe_vma_acc(vma);
-
- /* Userptr or null can't be migrated, nothing to do */
- if (xe_vma_has_no_bo(vma))
- goto unlock_vm;
-
- /* Lock VM and BOs dma-resv */
- xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
- drm_exec_until_all_locked(&exec) {
- ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram);
- drm_exec_retry_on_contention(&exec);
- xe_validation_retry_on_oom(&ctx, &ret);
- }
-
- xe_validation_ctx_fini(&ctx);
-unlock_vm:
- up_read(&vm->lock);
- xe_vm_put(vm);
-
- return ret;
-}
-
-#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__))
-
-#define ACC_MSG_LEN_DW 4
-
-static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
-{
- const struct xe_guc_acc_desc *desc;
- bool ret = false;
-
- spin_lock(&acc_queue->lock);
- if (acc_queue->tail != acc_queue->head) {
- desc = (const struct xe_guc_acc_desc *)
- (acc_queue->data + acc_queue->tail);
-
- acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
- acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
- FIELD_GET(ACC_SUBG_LO, desc->dw0);
- acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
- acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
- acc->asid = FIELD_GET(ACC_ASID, desc->dw1);
- acc->vfid = FIELD_GET(ACC_VFID, desc->dw2);
- acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
- acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
- desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
-
- acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) %
- ACC_QUEUE_NUM_DW;
- ret = true;
- }
- spin_unlock(&acc_queue->lock);
-
- return ret;
-}
-
-static void acc_queue_work_func(struct work_struct *w)
-{
- struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
- struct xe_gt *gt = acc_queue->gt;
- struct acc acc = {};
- unsigned long threshold;
- int ret;
-
- threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
-
- while (get_acc(acc_queue, &acc)) {
- ret = handle_acc(gt, &acc);
- if (unlikely(ret)) {
- print_acc(gt, &acc);
- xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret));
- }
-
- if (time_after(jiffies, threshold) &&
- acc_queue->tail != acc_queue->head) {
- queue_work(gt->usm.acc_wq, w);
- break;
- }
- }
-}
-
-static bool acc_queue_full(struct acc_queue *acc_queue)
-{
- lockdep_assert_held(&acc_queue->lock);
-
- return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <=
- ACC_MSG_LEN_DW;
-}
-
-int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- struct acc_queue *acc_queue;
- u32 asid;
- bool full;
-
- /*
- * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0
- */
- BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW);
-
- if (unlikely(len != ACC_MSG_LEN_DW))
- return -EPROTO;
-
- asid = FIELD_GET(ACC_ASID, msg[1]);
- acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
-
- spin_lock(&acc_queue->lock);
- full = acc_queue_full(acc_queue);
- if (!full) {
- memcpy(acc_queue->data + acc_queue->head, msg,
- len * sizeof(u32));
- acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
- queue_work(gt->usm.acc_wq, &acc_queue->worker);
- } else {
- xe_gt_warn(gt, "ACC Queue full, dropping ACC\n");
- }
- spin_unlock(&acc_queue->lock);
-
- return full ? -ENOSPC : 0;
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
deleted file mode 100644
index 839c065a5e4c..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _XE_GT_PAGEFAULT_H_
-#define _XE_GT_PAGEFAULT_H_
-
-#include <linux/types.h>
-
-struct xe_gt;
-struct xe_guc;
-
-int xe_gt_pagefault_init(struct xe_gt *gt);
-void xe_gt_pagefault_reset(struct xe_gt *gt);
-int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
-int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
-
-#endif /* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index c0c0215c0703..d90261a7ab7c 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -924,7 +924,8 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns
const char *what, const char *(*unit)(u32),
unsigned int last, int err)
{
- xe_gt_assert(gt, first);
+ char name[8];
+
xe_gt_assert(gt, num_vfs);
xe_gt_assert(gt, first <= last);
@@ -932,8 +933,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns
return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err);
if (unlikely(err)) {
- xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n",
- first, first + num_vfs - 1, what);
+ xe_gt_sriov_notice(gt, "Failed to bulk provision %s..VF%u with %s\n",
+ xe_sriov_function_name(first, name, sizeof(name)),
+ first + num_vfs - 1, what);
if (last > first)
pf_config_bulk_set_u32_done(gt, first, last - first, value,
get, what, unit, last, 0);
@@ -942,8 +944,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns
/* pick actual value from first VF - bulk provisioning shall be equal across all VFs */
value = get(gt, first);
- xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n",
- first, first + num_vfs - 1, value, unit(value), what);
+ xe_gt_sriov_info(gt, "%s..VF%u provisioned with %u%s %s\n",
+ xe_sriov_function_name(first, name, sizeof(name)),
+ first + num_vfs - 1, value, unit(value), what);
return 0;
}
@@ -1724,7 +1727,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
return 0;
}
-static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
+static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
@@ -1732,47 +1735,107 @@ static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
}
/**
- * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF.
+ * xe_gt_sriov_pf_config_set_exec_quantum_locked() - Configure PF/VF execution quantum.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
* @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
*
- * This function can only be called on PF.
+ * This function can only be called on PF with the master mutex hold.
+ * It will log the provisioned value or an error in case of the failure.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid,
- u32 exec_quantum)
+int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum)
{
int err;
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
err = pf_provision_exec_quantum(gt, vfid, exec_quantum);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
return pf_config_set_u32_done(gt, vfid, exec_quantum,
- xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid),
+ pf_get_exec_quantum(gt, vfid),
"execution quantum", exec_quantum_unit, err);
}
/**
- * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum.
+ * xe_gt_sriov_pf_config_set_exec_quantum() - Configure PF/VF execution quantum.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
+ * @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
*
* This function can only be called on PF.
+ * It will log the provisioned value or an error in case of the failure.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum)
+{
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ return xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, exec_quantum);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_exec_quantum_locked() - Get PF/VF execution quantum.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ *
+ * This function can only be called on PF with the master mutex hold.
*
- * Return: VF's (or PF's) execution quantum in milliseconds.
+ * Return: execution quantum in milliseconds (or 0 if infinity).
+ */
+u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_get_exec_quantum(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_exec_quantum() - Get PF/VF execution quantum.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: execution quantum in milliseconds (or 0 if infinity).
*/
u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
{
- u32 exec_quantum;
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
- exec_quantum = pf_get_exec_quantum(gt, vfid);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return pf_get_exec_quantum(gt, vfid);
+}
- return exec_quantum;
+/**
+ * xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked() - Configure EQ for PF and VFs.
+ * @gt: the &xe_gt to configure
+ * @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
+ *
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum)
+{
+ unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int n;
+ int err = 0;
+
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 0; n <= totalvfs; n++) {
+ err = pf_provision_exec_quantum(gt, VFID(n), exec_quantum);
+ if (err)
+ break;
+ }
+
+ return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, exec_quantum,
+ pf_get_exec_quantum, "execution quantum",
+ exec_quantum_unit, n, err);
}
static const char *preempt_timeout_unit(u32 preempt_timeout)
@@ -1795,7 +1858,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
return 0;
}
-static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
+static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
@@ -1803,47 +1866,106 @@ static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
}
/**
- * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF.
+ * xe_gt_sriov_pf_config_set_preempt_timeout_locked() - Configure PF/VF preemption timeout.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
* @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
*
- * This function can only be called on PF.
+ * This function can only be called on PF with the master mutex hold.
+ * It will log the provisioned value or an error in case of the failure.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
- u32 preempt_timeout)
+int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout)
{
int err;
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
return pf_config_set_u32_done(gt, vfid, preempt_timeout,
- xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid),
+ pf_get_preempt_timeout(gt, vfid),
"preemption timeout", preempt_timeout_unit, err);
}
/**
- * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout.
+ * xe_gt_sriov_pf_config_set_preempt_timeout() - Configure PF/VF preemption timeout.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
+ * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout)
+{
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ return xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, preempt_timeout);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_preempt_timeout_locked() - Get PF/VF preemption timeout.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ *
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: preemption timeout in microseconds (or 0 if infinity).
+ */
+u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_get_preempt_timeout(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_preempt_timeout() - Get PF/VF preemption timeout.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
*
* This function can only be called on PF.
*
- * Return: VF's (or PF's) preemption timeout in microseconds.
+ * Return: preemption timeout in microseconds (or 0 if infinity).
*/
u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
{
- u32 preempt_timeout;
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
- preempt_timeout = pf_get_preempt_timeout(gt, vfid);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return pf_get_preempt_timeout(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked() - Configure PT for PF and VFs.
+ * @gt: the &xe_gt to configure
+ * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
+ *
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout)
+{
+ unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int n;
+ int err = 0;
+
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 0; n <= totalvfs; n++) {
+ err = pf_provision_preempt_timeout(gt, VFID(n), preempt_timeout);
+ if (err)
+ break;
+ }
- return preempt_timeout;
+ return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, preempt_timeout,
+ pf_get_preempt_timeout, "preemption timeout",
+ preempt_timeout_unit, n, err);
}
static const char *sched_priority_unit(u32 priority)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 513e6512a575..14d036790695 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -40,10 +40,20 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, uns
u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum);
+u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum);
+int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum);
+
u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
u32 preempt_timeout);
+u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout);
+int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout);
+
u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority);
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index aa962c783cdf..fa7068aac334 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -8,221 +8,168 @@
#include <regs/xe_gt_regs.h>
#include "xe_device.h"
#include "xe_gt.h"
-#include "xe_gt_printk.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
#include "xe_mmio.h"
+#include "xe_platform_types.h"
#include "xe_pm.h"
/**
* DOC: Xe GT Throttle
*
- * Provides sysfs entries and other helpers for frequency throttle reasons in GT
+ * The GT frequency may be throttled by hardware/firmware for various reasons
+ * that are provided through attributes under the ``freq0/throttle/`` directory.
+ * Their availability depend on the platform and some may not be visible if that
+ * reason is not available.
*
- * device/gt#/freq0/throttle/status - Overall status
- * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
- * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
- * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
- * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
- * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
- * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
- * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
- * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC
+ * The following attributes are available on Crescent Island platform:
+ *
+ * - ``status``: Overall throttle status
+ * - ``reason_pl1``: package PL1
+ * - ``reason_pl2``: package PL2
+ * - ``reason_pl4``: package PL4
+ * - ``reason_prochot``: prochot
+ * - ``reason_soc_thermal``: SoC thermal
+ * - ``reason_mem_thermal``: Memory thermal
+ * - ``reason_vr_thermal``: VR thermal
+ * - ``reason_iccmax``: ICCMAX
+ * - ``reason_ratl``: RATL thermal algorithm
+ * - ``reason_soc_avg_thermal``: SoC average temp
+ * - ``reason_fastvmode``: VR is hitting FastVMode
+ * - ``reason_psys_pl1``: PSYS PL1
+ * - ``reason_psys_pl2``: PSYS PL2
+ * - ``reason_p0_freq``: P0 frequency
+ * - ``reason_psys_crit``: PSYS critical
+ *
+ * Other platforms support the following reasons:
+ *
+ * - ``status``: Overall status
+ * - ``reason_pl1``: package PL1
+ * - ``reason_pl2``: package PL2
+ * - ``reason_pl4``: package PL4, Iccmax etc.
+ * - ``reason_thermal``: thermal
+ * - ``reason_prochot``: prochot
+ * - ``reason_ratl``: RATL hermal algorithm
+ * - ``reason_vr_thermalert``: VR THERMALERT
+ * - ``reason_vr_tdc``: VR TDC
*/
-static struct xe_gt *
-dev_to_gt(struct device *dev)
-{
- return kobj_to_gt(dev->kobj.parent);
-}
-
-u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
-{
- u32 reg;
-
- xe_pm_runtime_get(gt_to_xe(gt));
- if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_PERF_LIMIT_REASONS);
- else
- reg = xe_mmio_read32(&gt->mmio, GT0_PERF_LIMIT_REASONS);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return reg;
-}
-
-static u32 read_status(struct xe_gt *gt)
-{
- u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
-
- xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status);
- return status;
-}
-
-static u32 read_reason_pl1(struct xe_gt *gt)
-{
- u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK;
-
- return pl1;
-}
+struct throttle_attribute {
+ struct kobj_attribute attr;
+ u32 mask;
+};
-static u32 read_reason_pl2(struct xe_gt *gt)
+static struct xe_gt *dev_to_gt(struct device *dev)
{
- u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK;
-
- return pl2;
+ return kobj_to_gt(dev->kobj.parent);
}
-static u32 read_reason_pl4(struct xe_gt *gt)
+static struct xe_gt *throttle_to_gt(struct kobject *kobj)
{
- u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK;
-
- return pl4;
+ return dev_to_gt(kobj_to_dev(kobj));
}
-static u32 read_reason_thermal(struct xe_gt *gt)
+static struct throttle_attribute *kobj_attribute_to_throttle(struct kobj_attribute *attr)
{
- u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK;
-
- return thermal;
+ return container_of(attr, struct throttle_attribute, attr);
}
-static u32 read_reason_prochot(struct xe_gt *gt)
+u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
{
- u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK;
-
- return prochot;
-}
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_reg reg;
+ u32 val, mask;
-static u32 read_reason_ratl(struct xe_gt *gt)
-{
- u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK;
+ if (xe_gt_is_media_type(gt))
+ reg = MTL_MEDIA_PERF_LIMIT_REASONS;
+ else
+ reg = GT0_PERF_LIMIT_REASONS;
- return ratl;
-}
+ if (xe->info.platform == XE_CRESCENTISLAND)
+ mask = CRI_PERF_LIMIT_REASONS_MASK;
+ else
+ mask = GT0_PERF_LIMIT_REASONS_MASK;
-static u32 read_reason_vr_thermalert(struct xe_gt *gt)
-{
- u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK;
+ xe_pm_runtime_get(xe);
+ val = xe_mmio_read32(&gt->mmio, reg) & mask;
+ xe_pm_runtime_put(xe);
- return thermalert;
+ return val;
}
-static u32 read_reason_vr_tdc(struct xe_gt *gt)
+static bool is_throttled_by(struct xe_gt *gt, u32 mask)
{
- u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK;
-
- return tdc;
+ return xe_gt_throttle_get_limit_reasons(gt) & mask;
}
-static ssize_t status_show(struct kobject *kobj,
+static ssize_t reason_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool status = !!read_status(gt);
-
- return sysfs_emit(buff, "%u\n", status);
-}
-static struct kobj_attribute attr_status = __ATTR_RO(status);
-
-static ssize_t reason_pl1_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool pl1 = !!read_reason_pl1(gt);
-
- return sysfs_emit(buff, "%u\n", pl1);
-}
-static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1);
-
-static ssize_t reason_pl2_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool pl2 = !!read_reason_pl2(gt);
-
- return sysfs_emit(buff, "%u\n", pl2);
-}
-static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2);
-
-static ssize_t reason_pl4_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool pl4 = !!read_reason_pl4(gt);
-
- return sysfs_emit(buff, "%u\n", pl4);
-}
-static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4);
-
-static ssize_t reason_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool thermal = !!read_reason_thermal(gt);
+ struct throttle_attribute *ta = kobj_attribute_to_throttle(attr);
+ struct xe_gt *gt = throttle_to_gt(kobj);
- return sysfs_emit(buff, "%u\n", thermal);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask));
}
-static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal);
-static ssize_t reason_prochot_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool prochot = !!read_reason_prochot(gt);
+#define THROTTLE_ATTR_RO(name, _mask) \
+ struct throttle_attribute attr_##name = { \
+ .attr = __ATTR(name, 0444, reason_show, NULL), \
+ .mask = _mask, \
+ }
- return sysfs_emit(buff, "%u\n", prochot);
-}
-static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot);
-
-static ssize_t reason_ratl_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool ratl = !!read_reason_ratl(gt);
-
- return sysfs_emit(buff, "%u\n", ratl);
-}
-static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl);
-
-static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool thermalert = !!read_reason_vr_thermalert(gt);
-
- return sysfs_emit(buff, "%u\n", thermalert);
-}
-static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert);
-
-static ssize_t reason_vr_tdc_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool tdc = !!read_reason_vr_tdc(gt);
-
- return sysfs_emit(buff, "%u\n", tdc);
-}
-static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc);
+static THROTTLE_ATTR_RO(status, U32_MAX);
+static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
+static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
+static THROTTLE_ATTR_RO(reason_pl4, POWER_LIMIT_4_MASK);
+static THROTTLE_ATTR_RO(reason_thermal, THERMAL_LIMIT_MASK);
+static THROTTLE_ATTR_RO(reason_prochot, PROCHOT_MASK);
+static THROTTLE_ATTR_RO(reason_ratl, RATL_MASK);
+static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK);
+static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
static struct attribute *throttle_attrs[] = {
- &attr_status.attr,
- &attr_reason_pl1.attr,
- &attr_reason_pl2.attr,
- &attr_reason_pl4.attr,
- &attr_reason_thermal.attr,
- &attr_reason_prochot.attr,
- &attr_reason_ratl.attr,
- &attr_reason_vr_thermalert.attr,
- &attr_reason_vr_tdc.attr,
+ &attr_status.attr.attr,
+ &attr_reason_pl1.attr.attr,
+ &attr_reason_pl2.attr.attr,
+ &attr_reason_pl4.attr.attr,
+ &attr_reason_thermal.attr.attr,
+ &attr_reason_prochot.attr.attr,
+ &attr_reason_ratl.attr.attr,
+ &attr_reason_vr_thermalert.attr.attr,
+ &attr_reason_vr_tdc.attr.attr,
+ NULL
+};
+
+static THROTTLE_ATTR_RO(reason_vr_thermal, VR_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_soc_thermal, SOC_THERMAL_LIMIT_MASK);
+static THROTTLE_ATTR_RO(reason_mem_thermal, MEM_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_iccmax, ICCMAX_MASK);
+static THROTTLE_ATTR_RO(reason_soc_avg_thermal, SOC_AVG_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_fastvmode, FASTVMODE_MASK);
+static THROTTLE_ATTR_RO(reason_psys_pl1, PSYS_PL1_MASK);
+static THROTTLE_ATTR_RO(reason_psys_pl2, PSYS_PL2_MASK);
+static THROTTLE_ATTR_RO(reason_p0_freq, P0_FREQ_MASK);
+static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK);
+
+static struct attribute *cri_throttle_attrs[] = {
+ /* Common */
+ &attr_status.attr.attr,
+ &attr_reason_pl1.attr.attr,
+ &attr_reason_pl2.attr.attr,
+ &attr_reason_pl4.attr.attr,
+ &attr_reason_prochot.attr.attr,
+ &attr_reason_ratl.attr.attr,
+ /* CRI */
+ &attr_reason_vr_thermal.attr.attr,
+ &attr_reason_soc_thermal.attr.attr,
+ &attr_reason_mem_thermal.attr.attr,
+ &attr_reason_iccmax.attr.attr,
+ &attr_reason_soc_avg_thermal.attr.attr,
+ &attr_reason_fastvmode.attr.attr,
+ &attr_reason_psys_pl1.attr.attr,
+ &attr_reason_psys_pl2.attr.attr,
+ &attr_reason_p0_freq.attr.attr,
+ &attr_reason_psys_crit.attr.attr,
NULL
};
@@ -231,19 +178,37 @@ static const struct attribute_group throttle_group_attrs = {
.attrs = throttle_attrs,
};
+static const struct attribute_group cri_throttle_group_attrs = {
+ .name = "throttle",
+ .attrs = cri_throttle_attrs,
+};
+
+static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe)
+{
+ switch (xe->info.platform) {
+ case XE_CRESCENTISLAND:
+ return &cri_throttle_group_attrs;
+ default:
+ return &throttle_group_attrs;
+ }
+}
+
static void gt_throttle_sysfs_fini(void *arg)
{
struct xe_gt *gt = arg;
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group = get_platform_throttle_group(xe);
- sysfs_remove_group(gt->freq, &throttle_group_attrs);
+ sysfs_remove_group(gt->freq, group);
}
int xe_gt_throttle_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group = get_platform_throttle_group(xe);
int err;
- err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+ err = sysfs_create_group(gt->freq, group);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 0b525643a048..0a728180b6fe 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -220,71 +220,6 @@ struct xe_gt {
* operations (e.g. migrations, fixing page tables)
*/
u16 reserved_bcs_instance;
- /** @usm.pf_wq: page fault work queue, unbound, high priority */
- struct workqueue_struct *pf_wq;
- /** @usm.acc_wq: access counter work queue, unbound, high priority */
- struct workqueue_struct *acc_wq;
- /**
- * @usm.pf_queue: Page fault queue used to sync faults so faults can
- * be processed not under the GuC CT lock. The queue is sized so
- * it can sync all possible faults (1 per physical engine).
- * Multiple queues exist for page faults from different VMs to be
- * processed in parallel.
- */
- struct pf_queue {
- /** @usm.pf_queue.gt: back pointer to GT */
- struct xe_gt *gt;
- /** @usm.pf_queue.data: data in the page fault queue */
- u32 *data;
- /**
- * @usm.pf_queue.num_dw: number of DWORDS in the page
- * fault queue. Dynamically calculated based on the number
- * of compute resources available.
- */
- u32 num_dw;
- /**
- * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
- * moved by worker which processes faults (consumer).
- */
- u16 tail;
- /**
- * @usm.pf_queue.head: head pointer in DWs for page fault queue,
- * moved by G2H handler (producer).
- */
- u16 head;
- /** @usm.pf_queue.lock: protects page fault queue */
- spinlock_t lock;
- /** @usm.pf_queue.worker: to process page faults */
- struct work_struct worker;
-#define NUM_PF_QUEUE 4
- } pf_queue[NUM_PF_QUEUE];
- /**
- * @usm.acc_queue: Same as page fault queue, cannot process access
- * counters under CT lock.
- */
- struct acc_queue {
- /** @usm.acc_queue.gt: back pointer to GT */
- struct xe_gt *gt;
-#define ACC_QUEUE_NUM_DW 128
- /** @usm.acc_queue.data: data in the page fault queue */
- u32 data[ACC_QUEUE_NUM_DW];
- /**
- * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
- * moved by worker which processes counters
- * (consumer).
- */
- u16 tail;
- /**
- * @usm.acc_queue.head: head pointer in DWs for access counter queue,
- * moved by G2H handler (producer).
- */
- u16 head;
- /** @usm.acc_queue.lock: protects page fault queue */
- spinlock_t lock;
- /** @usm.acc_queue.worker: to process access counters */
- struct work_struct worker;
-#define NUM_ACC_QUEUE 4
- } acc_queue[NUM_ACC_QUEUE];
} usm;
/** @ordered_wq: used to serialize GT resets and TDRs */
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index e68953ef3a00..2697d711adb2 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -21,12 +21,12 @@
#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_gt.h"
-#include "xe_gt_pagefault.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_guc.h"
#include "xe_guc_log.h"
+#include "xe_guc_pagefault.h"
#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
#include "xe_guc_tlb_inval.h"
@@ -199,6 +199,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ cancel_work_sync(&ct->dead.worker);
+#endif
ct_exit_safe_mode(ct);
destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
@@ -1545,10 +1548,6 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
- case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
- ret = xe_guc_access_counter_notify_handler(guc, payload,
- adj_len);
- break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
break;
diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c
new file mode 100644
index 000000000000..719a18187a31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "abi/guc_actions_abi.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_pagefault.h"
+#include "xe_pagefault.h"
+
+static void guc_ack_fault(struct xe_pagefault *pf, int err)
+{
+ u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]);
+ u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]);
+ u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]);
+ u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) |
+ (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) <<
+ PFD_PDATA_HI_SHIFT);
+ u32 action[] = {
+ XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+
+ FIELD_PREP(PFR_VALID, 1) |
+ FIELD_PREP(PFR_SUCCESS, !!err) |
+ FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+ FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+ FIELD_PREP(PFR_ASID, pf->consumer.asid),
+
+ FIELD_PREP(PFR_VFID, vfid) |
+ FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) |
+ FIELD_PREP(PFR_ENG_CLASS, engine_class) |
+ FIELD_PREP(PFR_PDATA, pdata),
+ };
+ struct xe_guc *guc = pf->producer.private;
+
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static const struct xe_pagefault_ops guc_pagefault_ops = {
+ .ack_fault = guc_ack_fault,
+};
+
+/**
+ * xe_guc_pagefault_handler() - G2H page fault handler
+ * @guc: GuC object
+ * @msg: G2H message
+ * @len: Length of G2H message
+ *
+ * Parse GuC to host (G2H) message into a struct xe_pagefault and forward onto
+ * the Xe page fault layer.
+ *
+ * Return: 0 on success, errno on failure
+ */
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_pagefault pf;
+ int i;
+
+#define GUC_PF_MSG_LEN_DW \
+ (sizeof(struct xe_guc_pagefault_desc) / sizeof(u32))
+
+ BUILD_BUG_ON(GUC_PF_MSG_LEN_DW > XE_PAGEFAULT_PRODUCER_MSG_LEN_DW);
+
+ if (len != GUC_PF_MSG_LEN_DW)
+ return -EPROTO;
+
+ pf.gt = guc_to_gt(guc);
+
+ /*
+ * XXX: These values happen to match the enum in xe_pagefault_types.h.
+ * If that changes, we’ll need to remap them here.
+ */
+ pf.consumer.page_addr = ((u64)FIELD_GET(PFD_VIRTUAL_ADDR_HI, msg[3])
+ << PFD_VIRTUAL_ADDR_HI_SHIFT) |
+ (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) <<
+ PFD_VIRTUAL_ADDR_LO_SHIFT);
+ pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]);
+ pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]);
+ pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]);
+ if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0]))
+ pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK;
+ else
+ pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]);
+ pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]);
+ pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]);
+
+ pf.producer.private = guc;
+ pf.producer.ops = &guc_pagefault_ops;
+ for (i = 0; i < GUC_PF_MSG_LEN_DW; ++i)
+ pf.producer.msg[i] = msg[i];
+
+#undef GUC_PF_MSG_LEN_DW
+
+ return xe_pagefault_handler(guc_to_xe(guc), &pf);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.h b/drivers/gpu/drm/xe/xe_guc_pagefault.h
new file mode 100644
index 000000000000..3bd599e7207c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pagefault.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PAGEFAULT_H_
+#define _XE_GUC_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 56a5804726e9..5003e3c4dd17 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -2333,6 +2333,20 @@ void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q)
xe_vm_assert_held(q->vm); /* User queues VM's should be locked */
}
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+/**
+ * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue
+ * @q: Migrate queue
+ */
+void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
+{
+ struct xe_migrate *m = gt_to_tile(q->gt)->migrate;
+
+ xe_gt_assert(q->gt, q == m->q);
+ lockdep_assert_held(&m->job_mutex);
+}
+#endif
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 4fad324b6253..9b5791617f5e 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -152,6 +152,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
void xe_migrate_wait(struct xe_migrate *m);
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+void xe_migrate_job_lock_assert(struct xe_exec_queue *q);
+#else
+static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
+{
+}
+#endif
+
void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q);
void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index f901ba52b403..7a13a7bd99a6 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -10,6 +10,7 @@
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
@@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro
return 0;
}
-static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
+static int xe_oa_parse_syncs(struct xe_oa *oa,
+ struct xe_oa_stream *stream,
+ struct xe_oa_open_param *param)
{
int ret, num_syncs, num_ufence = 0;
@@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
- &param->syncs_user[num_syncs], 0);
+ &param->syncs_user[num_syncs],
+ stream->ufence_syncobj,
+ ++stream->ufence_timeline_value, 0);
if (ret)
goto err_syncs;
@@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
return -ENODEV;
param.xef = stream->xef;
- err = xe_oa_parse_syncs(stream->oa, &param);
+ err = xe_oa_parse_syncs(stream->oa, stream, &param);
if (err)
goto err_config_put;
@@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream)
if (stream->exec_q)
xe_exec_queue_put(stream->exec_q);
+ drm_syncobj_put(stream->ufence_syncobj);
kfree(stream);
}
@@ -1827,6 +1833,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
struct xe_oa_open_param *param)
{
struct xe_oa_stream *stream;
+ struct drm_syncobj *ufence_syncobj;
int stream_fd;
int ret;
@@ -1837,17 +1844,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
goto exit;
}
+ ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED,
+ NULL);
+ if (ret)
+ goto exit;
+
stream = kzalloc(sizeof(*stream), GFP_KERNEL);
if (!stream) {
ret = -ENOMEM;
- goto exit;
+ goto err_syncobj;
}
-
+ stream->ufence_syncobj = ufence_syncobj;
stream->oa = oa;
- ret = xe_oa_stream_init(stream, param);
+
+ ret = xe_oa_parse_syncs(oa, stream, param);
if (ret)
goto err_free;
+ ret = xe_oa_stream_init(stream, param);
+ if (ret) {
+ while (param->num_syncs--)
+ xe_sync_entry_cleanup(&param->syncs[param->num_syncs]);
+ kfree(param->syncs);
+ goto err_free;
+ }
+
if (!param->disabled) {
ret = xe_oa_enable_locked(stream);
if (ret)
@@ -1871,6 +1892,8 @@ err_destroy:
xe_oa_stream_destroy(stream);
err_free:
kfree(stream);
+err_syncobj:
+ drm_syncobj_put(ufence_syncobj);
exit:
return ret;
}
@@ -2084,22 +2107,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
goto err_exec_q;
}
- ret = xe_oa_parse_syncs(oa, &param);
- if (ret)
- goto err_exec_q;
-
mutex_lock(&param.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
mutex_unlock(&param.hwe->gt->oa.gt_lock);
if (ret < 0)
- goto err_sync_cleanup;
+ goto err_exec_q;
return ret;
-err_sync_cleanup:
- while (param.num_syncs--)
- xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
- kfree(param.syncs);
err_exec_q:
if (param.exec_q)
xe_exec_queue_put(param.exec_q);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 2628f78c4e8d..daf701b5d48b 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -15,6 +15,8 @@
#include "regs/xe_reg_defs.h"
#include "xe_hw_engine_types.h"
+struct drm_syncobj;
+
#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
enum xe_oa_report_header {
@@ -248,6 +250,12 @@ struct xe_oa_stream {
/** @xef: xe_file with which the stream was opened */
struct xe_file *xef;
+ /** @ufence_syncobj: User fence syncobj */
+ struct drm_syncobj *ufence_syncobj;
+
+ /** @ufence_timeline_value: User fence timeline value */
+ u64 ufence_timeline_value;
+
/** @last_fence: fence to use in stream destroy when needed */
struct dma_fence *last_fence;
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
new file mode 100644
index 000000000000..fe3e40145012
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pagefault.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+#include "xe_gt_stats.h"
+#include "xe_hw_engine.h"
+#include "xe_pagefault.h"
+#include "xe_pagefault_types.h"
+#include "xe_svm.h"
+#include "xe_trace_bo.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Xe page faults
+ *
+ * Xe page faults are handled in two layers. The producer layer interacts with
+ * hardware or firmware to receive and parse faults into struct xe_pagefault,
+ * then forwards them to the consumer. The consumer layer services the faults
+ * (e.g., memory migration, page table updates) and acknowledges the result back
+ * to the producer, which then forwards the results to the hardware or firmware.
+ * The consumer uses a page fault queue sized to absorb all potential faults and
+ * a multi-threaded worker to process them. Multiple producers are supported,
+ * with a single shared consumer.
+ *
+ * xe_pagefault.c implements the consumer layer.
+ */
+
+static int xe_pagefault_entry_size(void)
+{
+ /*
+ * Power of two alignment is not a hardware requirement, rather a
+ * software restriction which makes the math for page fault queue
+ * management simplier.
+ */
+ return roundup_pow_of_two(sizeof(struct xe_pagefault));
+}
+
+static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma,
+ struct xe_vram_region *vram, bool need_vram_move)
+{
+ struct xe_bo *bo = xe_vma_bo(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
+ int err;
+
+ err = xe_vm_lock_vma(exec, vma);
+ if (err)
+ return err;
+
+ if (!bo)
+ return 0;
+
+ return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) :
+ xe_bo_validate(bo, vm, true, exec);
+}
+
+static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma,
+ bool atomic)
+{
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct dma_fence *fence;
+ int err, needs_vram;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
+ if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma)))
+ return needs_vram < 0 ? needs_vram : -EACCES;
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
+ xe_vma_size(vma) / SZ_1K);
+
+ trace_xe_vma_pagefault(vma);
+
+ /* Check if VMA is valid, opportunistic check only */
+ if (xe_vm_has_valid_gpu_mapping(tile, vma->tile_present,
+ vma->tile_invalidated) && !atomic)
+ return 0;
+
+retry_userptr:
+ if (xe_vma_is_userptr(vma) &&
+ xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+
+ err = xe_vma_userptr_pin_pages(uvma);
+ if (err)
+ return err;
+ }
+
+ /* Lock VM and BOs dma-resv */
+ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ err = xe_pagefault_begin(&exec, vma, tile->mem.vram,
+ needs_vram == 1);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ if (err)
+ goto unlock_dma_resv;
+
+ /* Bind VMA only to the GT that has faulted */
+ trace_xe_vma_pf_bind(vma);
+ xe_vm_set_validation_exec(vm, &exec);
+ fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ xe_validation_retry_on_oom(&ctx, &err);
+ goto unlock_dma_resv;
+ }
+ }
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+unlock_dma_resv:
+ xe_validation_ctx_fini(&ctx);
+ if (err == -EAGAIN)
+ goto retry_userptr;
+
+ return err;
+}
+
+static bool
+xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type)
+{
+ return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC;
+}
+
+static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid)
+{
+ struct xe_vm *vm;
+
+ down_read(&xe->usm.lock);
+ vm = xa_load(&xe->usm.asid_to_vm, asid);
+ if (vm && xe_vm_in_fault_mode(vm))
+ xe_vm_get(vm);
+ else
+ vm = ERR_PTR(-EINVAL);
+ up_read(&xe->usm.lock);
+
+ return vm;
+}
+
+static int xe_pagefault_service(struct xe_pagefault *pf)
+{
+ struct xe_gt *gt = pf->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_vm *vm;
+ struct xe_vma *vma = NULL;
+ int err;
+ bool atomic;
+
+ /* Producer flagged this fault to be nacked */
+ if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK)
+ return -EFAULT;
+
+ vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid);
+ if (IS_ERR(vm))
+ return PTR_ERR(vm);
+
+ /*
+ * TODO: Change to read lock? Using write lock for simplicity.
+ */
+ down_write(&vm->lock);
+
+ if (xe_vm_is_closed(vm)) {
+ err = -ENOENT;
+ goto unlock_vm;
+ }
+
+ vma = xe_vm_find_vma_by_addr(vm, pf->consumer.page_addr);
+ if (!vma) {
+ err = -EINVAL;
+ goto unlock_vm;
+ }
+
+ atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type);
+
+ if (xe_vma_is_cpu_addr_mirror(vma))
+ err = xe_svm_handle_pagefault(vm, vma, gt,
+ pf->consumer.page_addr, atomic);
+ else
+ err = xe_pagefault_handle_vma(gt, vma, atomic);
+
+unlock_vm:
+ if (!err)
+ vm->usm.last_fault_vma = vma;
+ up_write(&vm->lock);
+ xe_vm_put(vm);
+
+ return err;
+}
+
+static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue,
+ struct xe_pagefault *pf)
+{
+ bool found_fault = false;
+
+ spin_lock_irq(&pf_queue->lock);
+ if (pf_queue->tail != pf_queue->head) {
+ memcpy(pf, pf_queue->data + pf_queue->tail, sizeof(*pf));
+ pf_queue->tail = (pf_queue->tail + xe_pagefault_entry_size()) %
+ pf_queue->size;
+ found_fault = true;
+ }
+ spin_unlock_irq(&pf_queue->lock);
+
+ return found_fault;
+}
+
+static void xe_pagefault_print(struct xe_pagefault *pf)
+{
+ xe_gt_dbg(pf->gt, "\n\tASID: %d\n"
+ "\tFaulted Address: 0x%08x%08x\n"
+ "\tFaultType: %d\n"
+ "\tAccessType: %d\n"
+ "\tFaultLevel: %d\n"
+ "\tEngineClass: %d %s\n"
+ "\tEngineInstance: %d\n",
+ pf->consumer.asid,
+ upper_32_bits(pf->consumer.page_addr),
+ lower_32_bits(pf->consumer.page_addr),
+ pf->consumer.fault_type,
+ pf->consumer.access_type,
+ pf->consumer.fault_level,
+ pf->consumer.engine_class,
+ xe_hw_engine_class_to_str(pf->consumer.engine_class),
+ pf->consumer.engine_instance);
+}
+
+static void xe_pagefault_queue_work(struct work_struct *w)
+{
+ struct xe_pagefault_queue *pf_queue =
+ container_of(w, typeof(*pf_queue), worker);
+ struct xe_pagefault pf;
+ unsigned long threshold;
+
+#define USM_QUEUE_MAX_RUNTIME_MS 20
+ threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+ while (xe_pagefault_queue_pop(pf_queue, &pf)) {
+ int err;
+
+ if (!pf.gt) /* Fault squashed during reset */
+ continue;
+
+ err = xe_pagefault_service(&pf);
+ if (err) {
+ xe_pagefault_print(&pf);
+ xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n",
+ ERR_PTR(err));
+ }
+
+ pf.producer.ops->ack_fault(&pf, err);
+
+ if (time_after(jiffies, threshold)) {
+ queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w);
+ break;
+ }
+ }
+#undef USM_QUEUE_MAX_RUNTIME_MS
+}
+
+static int xe_pagefault_queue_init(struct xe_device *xe,
+ struct xe_pagefault_queue *pf_queue)
+{
+ struct xe_gt *gt;
+ int total_num_eus = 0;
+ u8 id;
+
+ for_each_gt(gt, xe, id) {
+ xe_dss_mask_t all_dss;
+ int num_dss, num_eus;
+
+ bitmap_or(all_dss, gt->fuse_topo.g_dss_mask,
+ gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS);
+
+ num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
+ num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
+ XE_MAX_EU_FUSE_BITS) * num_dss;
+
+ total_num_eus += num_eus;
+ }
+
+ xe_assert(xe, total_num_eus);
+
+ /*
+ * user can issue separate page faults per EU and per CS
+ *
+ * XXX: Multiplier required as compute UMD are getting PF queue errors
+ * without it. Follow on why this multiplier is required.
+ */
+#define PF_MULTIPLIER 8
+ pf_queue->size = (total_num_eus + XE_NUM_HW_ENGINES) *
+ xe_pagefault_entry_size() * PF_MULTIPLIER;
+ pf_queue->size = roundup_pow_of_two(pf_queue->size);
+#undef PF_MULTIPLIER
+
+ drm_dbg(&xe->drm, "xe_pagefault_entry_size=%d, total_num_eus=%d, pf_queue->size=%u",
+ xe_pagefault_entry_size(), total_num_eus, pf_queue->size);
+
+ spin_lock_init(&pf_queue->lock);
+ INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work);
+
+ pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL);
+ if (!pf_queue->data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void xe_pagefault_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ destroy_workqueue(xe->usm.pf_wq);
+}
+
+/**
+ * xe_pagefault_init() - Page fault init
+ * @xe: xe device instance
+ *
+ * Initialize Xe page fault state. Must be done after reading fuses.
+ *
+ * Return: 0 on Success, errno on failure
+ */
+int xe_pagefault_init(struct xe_device *xe)
+{
+ int err, i;
+
+ if (!xe->info.has_usm)
+ return 0;
+
+ xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
+ WQ_UNBOUND | WQ_HIGHPRI,
+ XE_PAGEFAULT_QUEUE_COUNT);
+ if (!xe->usm.pf_wq)
+ return -ENOMEM;
+
+ for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) {
+ err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i);
+ if (err)
+ goto err_out;
+ }
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe);
+
+err_out:
+ destroy_workqueue(xe->usm.pf_wq);
+ return err;
+}
+
+static void xe_pagefault_queue_reset(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_pagefault_queue *pf_queue)
+{
+ u32 i;
+
+ /* Driver load failure guard / USM not enabled guard */
+ if (!pf_queue->data)
+ return;
+
+ /* Squash all pending faults on the GT */
+
+ spin_lock_irq(&pf_queue->lock);
+ for (i = pf_queue->tail; i != pf_queue->head;
+ i = (i + xe_pagefault_entry_size()) % pf_queue->size) {
+ struct xe_pagefault *pf = pf_queue->data + i;
+
+ if (pf->gt == gt)
+ pf->gt = NULL;
+ }
+ spin_unlock_irq(&pf_queue->lock);
+}
+
+/**
+ * xe_pagefault_reset() - Page fault reset for a GT
+ * @xe: xe device instance
+ * @gt: GT being reset
+ *
+ * Reset the Xe page fault state for a GT; that is, squash any pending faults on
+ * the GT.
+ */
+void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt)
+{
+ int i;
+
+ for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i)
+ xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue + i);
+}
+
+static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue)
+{
+ lockdep_assert_held(&pf_queue->lock);
+
+ return CIRC_SPACE(pf_queue->head, pf_queue->tail, pf_queue->size) <=
+ xe_pagefault_entry_size();
+}
+
+/**
+ * xe_pagefault_handler() - Page fault handler
+ * @xe: xe device instance
+ * @pf: Page fault
+ *
+ * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to
+ * service it. Safe to be called from IRQ or process context. Reclaim safe.
+ *
+ * Return: 0 on success, errno on failure
+ */
+int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf)
+{
+ struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue +
+ (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT);
+ unsigned long flags;
+ bool full;
+
+ spin_lock_irqsave(&pf_queue->lock, flags);
+ full = xe_pagefault_queue_full(pf_queue);
+ if (!full) {
+ memcpy(pf_queue->data + pf_queue->head, pf, sizeof(*pf));
+ pf_queue->head = (pf_queue->head + xe_pagefault_entry_size()) %
+ pf_queue->size;
+ queue_work(xe->usm.pf_wq, &pf_queue->worker);
+ } else {
+ drm_warn(&xe->drm,
+ "PageFault Queue (%d) full, shouldn't be possible\n",
+ pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT);
+ }
+ spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+ return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_pagefault.h b/drivers/gpu/drm/xe/xe_pagefault.h
new file mode 100644
index 000000000000..bd0cdf9ed37f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pagefault.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGEFAULT_H_
+#define _XE_PAGEFAULT_H_
+
+struct xe_device;
+struct xe_gt;
+struct xe_pagefault;
+
+int xe_pagefault_init(struct xe_device *xe);
+
+void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt);
+
+int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h
new file mode 100644
index 000000000000..d3b516407d60
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pagefault_types.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGEFAULT_TYPES_H_
+#define _XE_PAGEFAULT_TYPES_H_
+
+#include <linux/workqueue.h>
+
+struct xe_gt;
+struct xe_pagefault;
+
+/** enum xe_pagefault_access_type - Xe page fault access type */
+enum xe_pagefault_access_type {
+ /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */
+ XE_PAGEFAULT_ACCESS_TYPE_READ = 0,
+ /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */
+ XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1,
+ /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */
+ XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2,
+};
+
+/** enum xe_pagefault_type - Xe page fault type */
+enum xe_pagefault_type {
+ /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */
+ XE_PAGEFAULT_TYPE_NOT_PRESENT = 0,
+ /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */
+ XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1,
+ /** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */
+ XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+/** struct xe_pagefault_ops - Xe pagefault ops (producer) */
+struct xe_pagefault_ops {
+ /**
+ * @ack_fault: Ack fault
+ * @pf: Page fault
+ * @err: Error state of fault
+ *
+ * Page fault producer receives acknowledgment from the consumer and
+ * sends the result to the HW/FW interface.
+ */
+ void (*ack_fault)(struct xe_pagefault *pf, int err);
+};
+
+/**
+ * struct xe_pagefault - Xe page fault
+ *
+ * Generic page fault structure for communication between producer and consumer.
+ * Carefully sized to be 64 bytes. Upon a device page fault, the producer
+ * populates this structure, and the consumer copies it into the page-fault
+ * queue for deferred handling.
+ */
+struct xe_pagefault {
+ /**
+ * @gt: GT of fault
+ */
+ struct xe_gt *gt;
+ /**
+ * @consumer: State for the software handling the fault. Populated by
+ * the producer and may be modified by the consumer to communicate
+ * information back to the producer upon fault acknowledgment.
+ */
+ struct {
+ /** @consumer.page_addr: address of page fault */
+ u64 page_addr;
+ /** @consumer.asid: address space ID */
+ u32 asid;
+ /**
+ * @consumer.access_type: access type, u8 rather than enum to
+ * keep size compact
+ */
+ u8 access_type;
+ /**
+ * @consumer.fault_type: fault type, u8 rather than enum to
+ * keep size compact
+ */
+ u8 fault_type;
+#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */
+ /** @consumer.fault_level: fault level */
+ u8 fault_level;
+ /** @consumer.engine_class: engine class */
+ u8 engine_class;
+ /** @consumer.engine_instance: engine instance */
+ u8 engine_instance;
+ /** consumer.reserved: reserved bits for future expansion */
+ u8 reserved[7];
+ } consumer;
+ /**
+ * @producer: State for the producer (i.e., HW/FW interface). Populated
+ * by the producer and should not be modified—or even inspected—by the
+ * consumer, except for calling operations.
+ */
+ struct {
+ /** @producer.private: private pointer */
+ void *private;
+ /** @producer.ops: operations */
+ const struct xe_pagefault_ops *ops;
+#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4
+ /**
+ * @producer.msg: page fault message, used by producer in fault
+ * acknowledgment to formulate response to HW/FW interface.
+ * Included in the page-fault message because the producer
+ * typically receives the fault in a context where memory cannot
+ * be allocated (e.g., atomic context or the reclaim path).
+ */
+ u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW];
+ } producer;
+};
+
+/**
+ * struct xe_pagefault_queue: Xe pagefault queue (consumer)
+ *
+ * Used to capture all device page faults for deferred processing. Size this
+ * queue to absorb the device’s worst-case number of outstanding faults.
+ */
+struct xe_pagefault_queue {
+ /**
+ * @data: Data in queue containing struct xe_pagefault, protected by
+ * @lock
+ */
+ void *data;
+ /** @size: Size of queue in bytes */
+ u32 size;
+ /** @head: Head pointer in bytes, moved by producer, protected by @lock */
+ u32 head;
+ /** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */
+ u32 tail;
+ /** @lock: protects page fault queue */
+ spinlock_t lock;
+ /** @worker: to process page faults */
+ struct work_struct worker;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 7649b554942a..68171cceea18 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -115,7 +115,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
.coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \
- XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+ XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \
+ .valid = 1 \
}
static const struct xe_pat_table_entry xe2_pat_table[] = {
@@ -368,7 +369,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p)
if (!fw_ref)
return -ETIMEDOUT;
- drm_printf(p, "PAT table:\n");
+ drm_printf(p, "PAT table: (* = reserved entry)\n");
for (i = 0; i < xe->pat.n_entries; i++) {
if (xe_gt_is_media_type(gt))
@@ -376,14 +377,14 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p)
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i,
+ drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i,
!!(pat & XE2_NO_PROMOTE),
!!(pat & XE2_COMP_EN),
REG_FIELD_GET(XE2_L3_CLOS, pat),
REG_FIELD_GET(XE2_L3_POLICY, pat),
REG_FIELD_GET(XE2_L4_POLICY, pat),
REG_FIELD_GET(XE2_COH_MODE, pat),
- pat);
+ pat, xe->pat.table[i].valid ? "" : " *");
}
/*
@@ -426,18 +427,18 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p)
if (!fw_ref)
return -ETIMEDOUT;
- drm_printf(p, "PAT table:\n");
+ drm_printf(p, "PAT table: (* = reserved entry)\n");
for (i = 0; i < xe->pat.n_entries; i++) {
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)\n", i,
+ drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i,
!!(pat & XE2_NO_PROMOTE),
REG_FIELD_GET(XE2_L3_CLOS, pat),
REG_FIELD_GET(XE2_L3_POLICY, pat),
REG_FIELD_GET(XE2_L4_POLICY, pat),
REG_FIELD_GET(XE2_COH_MODE, pat),
- pat);
+ pat, xe->pat.table[i].valid ? "" : " *");
}
/*
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index 268c9a899f56..05dae03a5f54 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -29,6 +29,11 @@ struct xe_pat_table_entry {
#define XE_COH_NONE 1
#define XE_COH_AT_LEAST_1WAY 2
u16 coh_mode;
+
+ /**
+ * @valid: Set to 1 if the entry is valid, 0 if it's reserved.
+ */
+ u16 valid;
};
/**
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index 735f51effc7a..d0fcde66a774 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -20,6 +20,7 @@
#include "xe_sriov_pf_control.h"
#include "xe_sriov_pf_helpers.h"
#include "xe_sriov_pf_provision.h"
+#include "xe_sriov_pf_sysfs.h"
#include "xe_sriov_printk.h"
static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs)
@@ -30,18 +31,6 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs)
xe_sriov_pf_control_reset_vf(xe, n);
}
-static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id)
-{
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-
- xe_assert(xe, IS_SRIOV_PF(xe));
-
- /* caller must use pci_dev_put() */
- return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
- pdev->bus->number,
- pci_iov_virtfn_devfn(pdev, vf_id));
-}
-
static void pf_link_vfs(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev);
@@ -60,7 +49,7 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs)
* enforce correct resume order.
*/
for (n = 1; n <= num_vfs; n++) {
- pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1);
+ pdev_vf = xe_pci_sriov_get_vf_pdev(pdev_pf, n);
/* unlikely, something weird is happening, abort */
if (!pdev_vf) {
@@ -150,6 +139,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
num_vfs, total_vfs, str_plural(total_vfs));
+ xe_sriov_pf_sysfs_link_vfs(xe, num_vfs);
+
pf_engine_activity_stats(xe, num_vfs, true);
return num_vfs;
@@ -177,6 +168,8 @@ static int pf_disable_vfs(struct xe_device *xe)
pf_engine_activity_stats(xe, num_vfs, false);
+ xe_sriov_pf_sysfs_unlink_vfs(xe, num_vfs);
+
pci_disable_sriov(pdev);
pf_reset_vfs(xe, num_vfs);
@@ -228,3 +221,25 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
return ret;
}
+
+/**
+ * xe_pci_sriov_get_vf_pdev() - Lookup the VF's PCI device using the VF identifier.
+ * @pdev: the PF's &pci_dev
+ * @vfid: VF identifier (1-based)
+ *
+ * The caller must decrement the reference count by calling pci_dev_put().
+ *
+ * Return: the VF's &pci_dev or NULL if the VF device was not found.
+ */
+struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid)
+{
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+
+ xe_assert(xe, dev_is_pf(&pdev->dev));
+ xe_assert(xe, vfid);
+ xe_assert(xe, vfid <= pci_sriov_get_totalvfs(pdev));
+
+ return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ pci_iov_virtfn_devfn(pdev, vfid - 1));
+}
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h
index c76dd0d90495..b9105d71dbb1 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.h
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.h
@@ -10,6 +10,7 @@ struct pci_dev;
#ifdef CONFIG_PCI_IOV
int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs);
+struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid);
#else
static inline int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 7c5bca78c8bf..884127b4d97d 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -3,8 +3,6 @@
* Copyright © 2022 Intel Corporation
*/
-#include <linux/dma-fence-array.h>
-
#include "xe_pt.h"
#include "regs/xe_gtt_defs.h"
@@ -1340,13 +1338,6 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
return err;
}
- if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
- if (job)
- err = xe_sched_job_last_fence_add_dep(job, vm);
- else
- err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
- }
-
for (i = 0; job && !err && i < vops->num_syncs; i++)
err = xe_sync_entry_add_deps(&vops->syncs[i], job);
@@ -2359,10 +2350,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
struct xe_vm *vm = vops->vm;
struct xe_vm_pgtable_update_ops *pt_update_ops =
&vops->pt_update_ops[tile->id];
- struct dma_fence *fence, *ifence, *mfence;
+ struct xe_exec_queue *q = pt_update_ops->q;
+ struct dma_fence *fence, *ifence = NULL, *mfence = NULL;
struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL;
- struct dma_fence **fences = NULL;
- struct dma_fence_array *cf = NULL;
struct xe_range_fence *rfence;
struct xe_vma_op *op;
int err = 0, i;
@@ -2390,15 +2380,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
#endif
if (pt_update_ops->needs_invalidation) {
- struct xe_exec_queue *q = pt_update_ops->q;
struct xe_dep_scheduler *dep_scheduler =
to_dep_scheduler(q, tile->primary_gt);
ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval,
- dep_scheduler,
+ dep_scheduler, vm,
pt_update_ops->start,
pt_update_ops->last,
- vm->usm.asid);
+ XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
if (IS_ERR(ijob)) {
err = PTR_ERR(ijob);
goto kill_vm_tile1;
@@ -2410,26 +2399,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
mjob = xe_tlb_inval_job_create(q,
&tile->media_gt->tlb_inval,
- dep_scheduler,
+ dep_scheduler, vm,
pt_update_ops->start,
pt_update_ops->last,
- vm->usm.asid);
+ XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT);
if (IS_ERR(mjob)) {
err = PTR_ERR(mjob);
goto free_ijob;
}
update.mjob = mjob;
-
- fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
- if (!fences) {
- err = -ENOMEM;
- goto free_ijob;
- }
- cf = dma_fence_array_alloc(2);
- if (!cf) {
- err = -ENOMEM;
- goto free_ijob;
- }
}
}
@@ -2460,31 +2438,12 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
pt_update_ops->last, fence))
dma_fence_wait(fence, false);
- /* tlb invalidation must be done before signaling unbind/rebind */
- if (ijob) {
- struct dma_fence *__fence;
-
+ if (ijob)
ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence);
- __fence = ifence;
-
- if (mjob) {
- fences[0] = ifence;
- mfence = xe_tlb_inval_job_push(mjob, tile->migrate,
- fence);
- fences[1] = mfence;
-
- dma_fence_array_init(cf, 2, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- __fence = &cf->base;
- }
-
- dma_fence_put(fence);
- fence = __fence;
- }
+ if (mjob)
+ mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence);
- if (!mjob) {
+ if (!mjob && !ijob) {
dma_resv_add_fence(xe_vm_resv(vm), fence,
pt_update_ops->wait_vm_bookkeep ?
DMA_RESV_USAGE_KERNEL :
@@ -2492,6 +2451,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
list_for_each_entry(op, &vops->list, link)
op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
+ } else if (ijob && !mjob) {
+ dma_resv_add_fence(xe_vm_resv(vm), ifence,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ list_for_each_entry(op, &vops->list, link)
+ op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL);
} else {
dma_resv_add_fence(xe_vm_resv(vm), ifence,
pt_update_ops->wait_vm_bookkeep ?
@@ -2511,16 +2478,23 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
if (pt_update_ops->needs_svm_lock)
xe_svm_notifier_unlock(vm);
+ /*
+ * The last fence is only used for zero bind queue idling; migrate
+ * queues are not exposed to user space.
+ */
+ if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE))
+ xe_exec_queue_last_fence_set(q, vm, fence);
+
xe_tlb_inval_job_put(mjob);
xe_tlb_inval_job_put(ijob);
+ dma_fence_put(ifence);
+ dma_fence_put(mfence);
return fence;
free_rfence:
kfree(rfence);
free_ijob:
- kfree(cf);
- kfree(fences);
xe_tlb_inval_job_put(mjob);
xe_tlb_inval_job_put(ijob);
kill_vm_tile1:
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 690bc327a363..7ca360b2c20d 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -89,6 +89,13 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4))
},
+ { XE_RTP_NAME("14024997852"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(WHITELIST(FF_MODE,
+ RING_FORCE_TO_NONPRIV_ACCESS_RW),
+ WHITELIST(VFLSKPD,
+ RING_FORCE_TO_NONPRIV_ACCESS_RW))
+ },
};
static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 6ae4cc6a3802..cb674a322113 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -146,6 +146,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
for (i = 0; i < width; ++i)
job->ptrs[i].batch_addr = batch_addr[i];
+ atomic_inc(&q->job_cnt);
xe_pm_runtime_get_noresume(job_to_xe(job));
trace_xe_sched_job_create(job);
return job;
@@ -177,6 +178,7 @@ void xe_sched_job_destroy(struct kref *ref)
dma_fence_put(job->fence);
drm_sched_job_cleanup(&job->drm);
job_free(job);
+ atomic_dec(&q->job_cnt);
xe_exec_queue_put(q);
xe_pm_runtime_put(xe);
}
@@ -296,23 +298,6 @@ void xe_sched_job_push(struct xe_sched_job *job)
}
/**
- * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
- * @job:job to add the last fence dependency to
- * @vm: virtual memory job belongs to
- *
- * Returns:
- * 0 on success, or an error on failing to expand the array.
- */
-int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
-{
- struct dma_fence *fence;
-
- fence = xe_exec_queue_last_fence_get(job->q, vm);
-
- return drm_sched_job_add_dependency(&job->drm, fence);
-}
-
-/**
* xe_sched_job_init_user_fence - Initialize user_fence for the job
* @job: job whose user_fence needs an init
* @sync: sync to be use to init user_fence
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index b467131b6d5f..1c1cb44216c3 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -58,7 +58,6 @@ bool xe_sched_job_completed(struct xe_sched_job *job);
void xe_sched_job_arm(struct xe_sched_job *job);
void xe_sched_job_push(struct xe_sched_job *job);
-int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
void xe_sched_job_init_user_fence(struct xe_sched_job *job,
struct xe_sync_entry *sync);
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c
index bc1ab9ee31d9..b8af93eb5b5f 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.c
@@ -16,6 +16,7 @@
#include "xe_sriov_pf.h"
#include "xe_sriov_pf_helpers.h"
#include "xe_sriov_pf_service.h"
+#include "xe_sriov_pf_sysfs.h"
#include "xe_sriov_printk.h"
static unsigned int wanted_max_vfs(struct xe_device *xe)
@@ -128,6 +129,10 @@ int xe_sriov_pf_init_late(struct xe_device *xe)
return err;
}
+ err = xe_sriov_pf_sysfs_init(xe);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c
index 663fb0c045e9..01470c42e8a7 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c
@@ -6,6 +6,7 @@
#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_policy.h"
#include "xe_sriov.h"
#include "xe_sriov_pf_helpers.h"
#include "xe_sriov_pf_provision.h"
@@ -152,3 +153,286 @@ int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provision
xe->sriov.pf.provision.mode = mode;
return 0;
}
+
+/**
+ * xe_sriov_pf_provision_bulk_apply_eq() - Change execution quantum for all VFs and PF.
+ * @xe: the PF &xe_device
+ * @eq: execution quantum in [ms] to set
+ *
+ * Change execution quantum (EQ) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(gt, eq);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_apply_vf_eq() - Change VF's execution quantum.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @eq: execution quantum in [ms] to set
+ *
+ * Change VF's execution quantum (EQ) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, eq);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+static int pf_report_unclean(struct xe_gt *gt, unsigned int vfid,
+ const char *what, u32 found, u32 expected)
+{
+ char name[8];
+
+ xe_sriov_dbg(gt_to_xe(gt), "%s on GT%u has %s=%u (expected %u)\n",
+ xe_sriov_function_name(vfid, name, sizeof(name)),
+ gt->info.id, what, found, expected);
+ return -EUCLEAN;
+}
+
+/**
+ * xe_sriov_pf_provision_query_vf_eq() - Query VF's execution quantum.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @eq: placeholder for the returned execution quantum in [ms]
+ *
+ * Query VF's execution quantum (EQ) provisioning from all tiles/GTs.
+ * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int count = 0;
+ u32 value;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ value = xe_gt_sriov_pf_config_get_exec_quantum_locked(gt, vfid);
+ if (!count++)
+ *eq = value;
+ else if (value != *eq)
+ return pf_report_unclean(gt, vfid, "EQ", value, *eq);
+ }
+
+ return !count ? -ENODATA : 0;
+}
+
+/**
+ * xe_sriov_pf_provision_bulk_apply_pt() - Change preemption timeout for all VFs and PF.
+ * @xe: the PF &xe_device
+ * @pt: preemption timeout in [us] to set
+ *
+ * Change preemption timeout (PT) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(gt, pt);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_apply_vf_pt() - Change VF's preemption timeout.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @pt: preemption timeout in [us] to set
+ *
+ * Change VF's preemption timeout (PT) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, pt);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_query_vf_pt() - Query VF's preemption timeout.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @pt: placeholder for the returned preemption timeout in [us]
+ *
+ * Query VF's preemption timeout (PT) provisioning from all tiles/GTs.
+ * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int count = 0;
+ u32 value;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ value = xe_gt_sriov_pf_config_get_preempt_timeout_locked(gt, vfid);
+ if (!count++)
+ *pt = value;
+ else if (value != *pt)
+ return pf_report_unclean(gt, vfid, "PT", value, *pt);
+ }
+
+ return !count ? -ENODATA : 0;
+}
+
+/**
+ * xe_sriov_pf_provision_bulk_apply_priority() - Change scheduling priority of all VFs and PF.
+ * @xe: the PF &xe_device
+ * @prio: scheduling priority to set
+ *
+ * Change the scheduling priority provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio)
+{
+ bool sched_if_idle;
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ /*
+ * Currently, priority changes that involves VFs are only allowed using
+ * the 'sched_if_idle' policy KLV, so only LOW and NORMAL are supported.
+ */
+ xe_assert(xe, prio < GUC_SCHED_PRIORITY_HIGH);
+ sched_if_idle = prio == GUC_SCHED_PRIORITY_NORMAL;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_policy_set_sched_if_idle(gt, sched_if_idle);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_apply_vf_priority() - Change VF's scheduling priority.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @prio: scheduling priority to set
+ *
+ * Change VF's scheduling priority provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_set_sched_priority(gt, vfid, prio);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_query_vf_priority() - Query VF's scheduling priority.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @prio: placeholder for the returned scheduling priority
+ *
+ * Query VF's scheduling priority provisioning from all tiles/GTs.
+ * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int count = 0;
+ u32 value;
+
+ for_each_gt(gt, xe, id) {
+ value = xe_gt_sriov_pf_config_get_sched_priority(gt, vfid);
+ if (!count++)
+ *prio = value;
+ else if (value != *prio)
+ return pf_report_unclean(gt, vfid, "priority", value, *prio);
+ }
+
+ return !count ? -ENODATA : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h
index cf3657a32e90..bccf23d51396 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h
@@ -6,10 +6,24 @@
#ifndef _XE_SRIOV_PF_PROVISION_H_
#define _XE_SRIOV_PF_PROVISION_H_
+#include <linux/types.h>
+
#include "xe_sriov_pf_provision_types.h"
struct xe_device;
+int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq);
+int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq);
+int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq);
+
+int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt);
+int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt);
+int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt);
+
+int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio);
+int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio);
+int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio);
+
int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs);
int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs);
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
new file mode 100644
index 000000000000..c0b767ac735c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_pci_sriov.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_provision.h"
+#include "xe_sriov_pf_sysfs.h"
+#include "xe_sriov_printk.h"
+
+static int emit_choice(char *buf, int choice, const char * const *array, size_t size)
+{
+ int pos = 0;
+ int n;
+
+ for (n = 0; n < size; n++) {
+ pos += sysfs_emit_at(buf, pos, "%s%s%s%s",
+ n ? " " : "",
+ n == choice ? "[" : "",
+ array[n],
+ n == choice ? "]" : "");
+ }
+ pos += sysfs_emit_at(buf, pos, "\n");
+
+ return pos;
+}
+
+/*
+ * /sys/bus/pci/drivers/xe/BDF/
+ * :
+ * ├── sriov_admin/
+ * ├── ...
+ * ├── .bulk_profile
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ └── sched_priority
+ * ├── pf/
+ * │ ├── ...
+ * │ ├── device -> ../../../BDF
+ * │ └── profile
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ └── sched_priority
+ * ├── vf1/
+ * │ ├── ...
+ * │ ├── device -> ../../../BDF.1
+ * │ ├── stop
+ * │ └── profile
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ └── sched_priority
+ * ├── vf2/
+ * :
+ * └── vfN/
+ */
+
+struct xe_sriov_kobj {
+ struct kobject base;
+ struct xe_device *xe;
+ unsigned int vfid;
+};
+#define to_xe_sriov_kobj(p) container_of_const((p), struct xe_sriov_kobj, base)
+
+struct xe_sriov_dev_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct xe_device *xe, char *buf);
+ ssize_t (*store)(struct xe_device *xe, const char *buf, size_t count);
+};
+#define to_xe_sriov_dev_attr(p) container_of_const((p), struct xe_sriov_dev_attr, attr)
+
+#define XE_SRIOV_DEV_ATTR(NAME) \
+struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \
+ __ATTR(NAME, 0644, xe_sriov_dev_attr_##NAME##_show, xe_sriov_dev_attr_##NAME##_store)
+
+#define XE_SRIOV_DEV_ATTR_RO(NAME) \
+struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \
+ __ATTR(NAME, 0444, xe_sriov_dev_attr_##NAME##_show, NULL)
+
+#define XE_SRIOV_DEV_ATTR_WO(NAME) \
+struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \
+ __ATTR(NAME, 0200, NULL, xe_sriov_dev_attr_##NAME##_store)
+
+struct xe_sriov_vf_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct xe_device *xe, unsigned int vfid, char *buf);
+ ssize_t (*store)(struct xe_device *xe, unsigned int vfid, const char *buf, size_t count);
+};
+#define to_xe_sriov_vf_attr(p) container_of_const((p), struct xe_sriov_vf_attr, attr)
+
+#define XE_SRIOV_VF_ATTR(NAME) \
+struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \
+ __ATTR(NAME, 0644, xe_sriov_vf_attr_##NAME##_show, xe_sriov_vf_attr_##NAME##_store)
+
+#define XE_SRIOV_VF_ATTR_RO(NAME) \
+struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \
+ __ATTR(NAME, 0444, xe_sriov_vf_attr_##NAME##_show, NULL)
+
+#define XE_SRIOV_VF_ATTR_WO(NAME) \
+struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \
+ __ATTR(NAME, 0200, NULL, xe_sriov_vf_attr_##NAME##_store)
+
+/* device level attributes go here */
+
+#define DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(NAME, ITEM, TYPE) \
+ \
+static ssize_t xe_sriov_dev_attr_##NAME##_store(struct xe_device *xe, \
+ const char *buf, size_t count) \
+{ \
+ TYPE value; \
+ int err; \
+ \
+ err = kstrto##TYPE(buf, 0, &value); \
+ if (err) \
+ return err; \
+ \
+ err = xe_sriov_pf_provision_bulk_apply_##ITEM(xe, value); \
+ return err ?: count; \
+} \
+ \
+static XE_SRIOV_DEV_ATTR_WO(NAME)
+
+DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32);
+DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32);
+
+static const char * const sched_priority_names[] = {
+ [GUC_SCHED_PRIORITY_LOW] = "low",
+ [GUC_SCHED_PRIORITY_NORMAL] = "normal",
+ [GUC_SCHED_PRIORITY_HIGH] = "high",
+};
+
+static bool sched_priority_change_allowed(unsigned int vfid)
+{
+ /* As of today GuC FW allows to selectively change only the PF priority. */
+ return vfid == PFID;
+}
+
+static bool sched_priority_high_allowed(unsigned int vfid)
+{
+ /* As of today GuC FW allows to select 'high' priority only for the PF. */
+ return vfid == PFID;
+}
+
+static bool sched_priority_bulk_high_allowed(struct xe_device *xe)
+{
+ /* all VFs are equal - it's sufficient to check VF1 only */
+ return sched_priority_high_allowed(VFID(1));
+}
+
+static ssize_t xe_sriov_dev_attr_sched_priority_store(struct xe_device *xe,
+ const char *buf, size_t count)
+{
+ size_t num_priorities = ARRAY_SIZE(sched_priority_names);
+ int match;
+ int err;
+
+ if (!sched_priority_bulk_high_allowed(xe))
+ num_priorities--;
+
+ match = __sysfs_match_string(sched_priority_names, num_priorities, buf);
+ if (match < 0)
+ return -EINVAL;
+
+ err = xe_sriov_pf_provision_bulk_apply_priority(xe, match);
+ return err ?: count;
+}
+
+static XE_SRIOV_DEV_ATTR_WO(sched_priority);
+
+static struct attribute *bulk_profile_dev_attrs[] = {
+ &xe_sriov_dev_attr_exec_quantum_ms.attr,
+ &xe_sriov_dev_attr_preempt_timeout_us.attr,
+ &xe_sriov_dev_attr_sched_priority.attr,
+ NULL
+};
+
+static const struct attribute_group bulk_profile_dev_attr_group = {
+ .name = ".bulk_profile",
+ .attrs = bulk_profile_dev_attrs,
+};
+
+static const struct attribute_group *xe_sriov_dev_attr_groups[] = {
+ &bulk_profile_dev_attr_group,
+ NULL
+};
+
+/* and VF-level attributes go here */
+
+#define DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(NAME, ITEM, TYPE, FORMAT) \
+static ssize_t xe_sriov_vf_attr_##NAME##_show(struct xe_device *xe, unsigned int vfid, \
+ char *buf) \
+{ \
+ TYPE value = 0; \
+ int err; \
+ \
+ err = xe_sriov_pf_provision_query_vf_##ITEM(xe, vfid, &value); \
+ if (err) \
+ return err; \
+ \
+ return sysfs_emit(buf, FORMAT, value); \
+} \
+ \
+static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \
+ const char *buf, size_t count) \
+{ \
+ TYPE value; \
+ int err; \
+ \
+ err = kstrto##TYPE(buf, 0, &value); \
+ if (err) \
+ return err; \
+ \
+ err = xe_sriov_pf_provision_apply_vf_##ITEM(xe, vfid, value); \
+ return err ?: count; \
+} \
+ \
+static XE_SRIOV_VF_ATTR(NAME)
+
+DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n");
+DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n");
+
+static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid,
+ char *buf)
+{
+ size_t num_priorities = ARRAY_SIZE(sched_priority_names);
+ u32 priority;
+ int err;
+
+ err = xe_sriov_pf_provision_query_vf_priority(xe, vfid, &priority);
+ if (err)
+ return err;
+
+ if (!sched_priority_high_allowed(vfid))
+ num_priorities--;
+
+ xe_assert(xe, priority < num_priorities);
+ return emit_choice(buf, priority, sched_priority_names, num_priorities);
+}
+
+static ssize_t xe_sriov_vf_attr_sched_priority_store(struct xe_device *xe, unsigned int vfid,
+ const char *buf, size_t count)
+{
+ size_t num_priorities = ARRAY_SIZE(sched_priority_names);
+ int match;
+ int err;
+
+ if (!sched_priority_change_allowed(vfid))
+ return -EOPNOTSUPP;
+
+ if (!sched_priority_high_allowed(vfid))
+ num_priorities--;
+
+ match = __sysfs_match_string(sched_priority_names, num_priorities, buf);
+ if (match < 0)
+ return -EINVAL;
+
+ err = xe_sriov_pf_provision_apply_vf_priority(xe, vfid, match);
+ return err ?: count;
+}
+
+static XE_SRIOV_VF_ATTR(sched_priority);
+
+static struct attribute *profile_vf_attrs[] = {
+ &xe_sriov_vf_attr_exec_quantum_ms.attr,
+ &xe_sriov_vf_attr_preempt_timeout_us.attr,
+ &xe_sriov_vf_attr_sched_priority.attr,
+ NULL
+};
+
+static umode_t profile_vf_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+
+ if (attr == &xe_sriov_vf_attr_sched_priority.attr &&
+ !sched_priority_change_allowed(vkobj->vfid))
+ return attr->mode & 0444;
+
+ return attr->mode;
+}
+
+static const struct attribute_group profile_vf_attr_group = {
+ .name = "profile",
+ .attrs = profile_vf_attrs,
+ .is_visible = profile_vf_attr_is_visible,
+};
+
+#define DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(NAME) \
+ \
+static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \
+ const char *buf, size_t count) \
+{ \
+ bool yes; \
+ int err; \
+ \
+ if (!vfid) \
+ return -EPERM; \
+ \
+ err = kstrtobool(buf, &yes); \
+ if (err) \
+ return err; \
+ if (!yes) \
+ return count; \
+ \
+ err = xe_sriov_pf_control_##NAME##_vf(xe, vfid); \
+ return err ?: count; \
+} \
+ \
+static XE_SRIOV_VF_ATTR_WO(NAME)
+
+DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(stop);
+
+static struct attribute *control_vf_attrs[] = {
+ &xe_sriov_vf_attr_stop.attr,
+ NULL
+};
+
+static umode_t control_vf_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+
+ if (vkobj->vfid == PFID)
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group control_vf_attr_group = {
+ .attrs = control_vf_attrs,
+ .is_visible = control_vf_attr_is_visible,
+};
+
+static const struct attribute_group *xe_sriov_vf_attr_groups[] = {
+ &profile_vf_attr_group,
+ &control_vf_attr_group,
+ NULL
+};
+
+/* no user serviceable parts below */
+
+static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_kobj *vkobj;
+
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ vkobj = kzalloc(sizeof(*vkobj), GFP_KERNEL);
+ if (!vkobj)
+ return NULL;
+
+ vkobj->xe = xe;
+ vkobj->vfid = vfid;
+ return &vkobj->base;
+}
+
+static void release_xe_sriov_kobj(struct kobject *kobj)
+{
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+
+ kfree(vkobj);
+}
+
+static ssize_t xe_sriov_dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+
+ if (!vattr->show)
+ return -EPERM;
+
+ return vattr->show(xe, buf);
+}
+
+static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+ ssize_t ret;
+
+ if (!vattr->store)
+ return -EPERM;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+ unsigned int vfid = vkobj->vfid;
+
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ if (!vattr->show)
+ return -EPERM;
+
+ return vattr->show(xe, vfid, buf);
+}
+
+static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+ unsigned int vfid = vkobj->vfid;
+ ssize_t ret;
+
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ if (!vattr->store)
+ return -EPERM;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count);
+ xe_pm_runtime_get(xe);
+
+ return ret;
+}
+
+static const struct sysfs_ops xe_sriov_dev_sysfs_ops = {
+ .show = xe_sriov_dev_attr_show,
+ .store = xe_sriov_dev_attr_store,
+};
+
+static const struct sysfs_ops xe_sriov_vf_sysfs_ops = {
+ .show = xe_sriov_vf_attr_show,
+ .store = xe_sriov_vf_attr_store,
+};
+
+static const struct kobj_type xe_sriov_dev_ktype = {
+ .release = release_xe_sriov_kobj,
+ .sysfs_ops = &xe_sriov_dev_sysfs_ops,
+ .default_groups = xe_sriov_dev_attr_groups,
+};
+
+static const struct kobj_type xe_sriov_vf_ktype = {
+ .release = release_xe_sriov_kobj,
+ .sysfs_ops = &xe_sriov_vf_sysfs_ops,
+ .default_groups = xe_sriov_vf_attr_groups,
+};
+
+static int pf_sysfs_error(struct xe_device *xe, int err, const char *what)
+{
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err));
+ return err;
+}
+
+static void pf_sysfs_note(struct xe_device *xe, int err, const char *what)
+{
+ xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err));
+}
+
+static void action_put_kobject(void *arg)
+{
+ struct kobject *kobj = arg;
+
+ kobject_put(kobj);
+}
+
+static int pf_setup_root(struct xe_device *xe)
+{
+ struct kobject *parent = &xe->drm.dev->kobj;
+ struct kobject *root;
+ int err;
+
+ root = create_xe_sriov_kobj(xe, PFID);
+ if (!root)
+ return pf_sysfs_error(xe, -ENOMEM, "root obj");
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root);
+ if (err)
+ return pf_sysfs_error(xe, err, "root action");
+
+ err = kobject_init_and_add(root, &xe_sriov_dev_ktype, parent, "sriov_admin");
+ if (err)
+ return pf_sysfs_error(xe, err, "root init");
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, !xe->sriov.pf.sysfs.root);
+ xe->sriov.pf.sysfs.root = root;
+ return 0;
+}
+
+static int pf_setup_tree(struct xe_device *xe)
+{
+ unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ struct kobject *root, *kobj;
+ unsigned int n;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ root = xe->sriov.pf.sysfs.root;
+
+ for (n = 0; n <= totalvfs; n++) {
+ kobj = create_xe_sriov_kobj(xe, VFID(n));
+ if (!kobj)
+ return pf_sysfs_error(xe, -ENOMEM, "tree obj");
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root);
+ if (err)
+ return pf_sysfs_error(xe, err, "tree action");
+
+ if (n)
+ err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype,
+ root, "vf%u", n);
+ else
+ err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype,
+ root, "pf");
+ if (err)
+ return pf_sysfs_error(xe, err, "tree init");
+
+ xe_assert(xe, !xe->sriov.pf.vfs[n].kobj);
+ xe->sriov.pf.vfs[n].kobj = kobj;
+ }
+
+ return 0;
+}
+
+static void action_rm_device_link(void *arg)
+{
+ struct kobject *kobj = arg;
+
+ sysfs_remove_link(kobj, "device");
+}
+
+static int pf_link_pf_device(struct xe_device *xe)
+{
+ struct kobject *kobj = xe->sriov.pf.vfs[PFID].kobj;
+ int err;
+
+ err = sysfs_create_link(kobj, &xe->drm.dev->kobj, "device");
+ if (err)
+ return pf_sysfs_error(xe, err, "PF device link");
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_rm_device_link, kobj);
+ if (err)
+ return pf_sysfs_error(xe, err, "PF unlink action");
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_sysfs_init() - Setup PF's SR-IOV sysfs tree.
+ * @xe: the PF &xe_device to setup sysfs
+ *
+ * This function will create additional nodes that will represent PF and VFs
+ * devices, each populated with SR-IOV Xe specific attributes.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_sysfs_init(struct xe_device *xe)
+{
+ int err;
+
+ err = pf_setup_root(xe);
+ if (err)
+ return err;
+
+ err = pf_setup_tree(xe);
+ if (err)
+ return err;
+
+ err = pf_link_pf_device(xe);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_sysfs_link_vfs() - Add VF's links in SR-IOV sysfs tree.
+ * @xe: the &xe_device where to update sysfs
+ * @num_vfs: number of enabled VFs to link
+ *
+ * This function is specific for the PF driver.
+ *
+ * This function will add symbolic links between VFs represented in the SR-IOV
+ * sysfs tree maintained by the PF and enabled VF PCI devices.
+ *
+ * The @xe_sriov_pf_sysfs_unlink_vfs() shall be used to remove those links.
+ */
+void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ struct pci_dev *pf_pdev = to_pci_dev(xe->drm.dev);
+ struct pci_dev *vf_pdev = NULL;
+ unsigned int n;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, num_vfs <= totalvfs);
+
+ for (n = 1; n <= num_vfs; n++) {
+ vf_pdev = xe_pci_sriov_get_vf_pdev(pf_pdev, VFID(n));
+ if (!vf_pdev)
+ return pf_sysfs_note(xe, -ENOENT, "VF link");
+
+ err = sysfs_create_link(xe->sriov.pf.vfs[VFID(n)].kobj,
+ &vf_pdev->dev.kobj, "device");
+
+ /* must balance xe_pci_sriov_get_vf_pdev() */
+ pci_dev_put(vf_pdev);
+
+ if (err)
+ return pf_sysfs_note(xe, err, "VF link");
+ }
+}
+
+/**
+ * xe_sriov_pf_sysfs_unlink_vfs() - Remove VF's links from SR-IOV sysfs tree.
+ * @xe: the &xe_device where to update sysfs
+ * @num_vfs: number of VFs to unlink
+ *
+ * This function shall be called only on the PF.
+ * This function will remove "device" links added by @xe_sriov_sysfs_link_vfs().
+ */
+void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ unsigned int n;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, num_vfs <= xe_sriov_pf_get_totalvfs(xe));
+
+ for (n = 1; n <= num_vfs; n++)
+ sysfs_remove_link(xe->sriov.pf.vfs[VFID(n)].kobj, "device");
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h
new file mode 100644
index 000000000000..ae92ed1766e7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_SYSFS_H_
+#define _XE_SRIOV_PF_SYSFS_H_
+
+struct xe_device;
+
+int xe_sriov_pf_sysfs_init(struct xe_device *xe);
+
+void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs);
+void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h
index c753cd59aed2..b3cd9797194b 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h
@@ -12,10 +12,15 @@
#include "xe_sriov_pf_provision_types.h"
#include "xe_sriov_pf_service_types.h"
+struct kobject;
+
/**
* struct xe_sriov_metadata - per-VF device level metadata
*/
struct xe_sriov_metadata {
+ /** @kobj: kobject representing VF in PF's SR-IOV sysfs tree. */
+ struct kobject *kobj;
+
/** @version: negotiated VF/PF ABI version */
struct xe_sriov_pf_service_version version;
};
@@ -42,6 +47,12 @@ struct xe_device_pf {
/** @service: device level service data. */
struct xe_sriov_pf_service service;
+ /** @sysfs: device level sysfs data. */
+ struct {
+ /** @sysfs.root: the root kobject for all SR-IOV entries in sysfs. */
+ struct kobject *root;
+ } sysfs;
+
/** @vfs: metadata for all VFs. */
struct xe_sriov_metadata *vfs;
};
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 13af589715a7..55c5a0eb82e1 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -104,8 +104,7 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
&vm->svm.garbage_collector.range_list);
spin_unlock(&vm->svm.garbage_collector.lock);
- queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
- &vm->svm.garbage_collector.work);
+ queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work);
}
static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 82872a51f098..ff74528ca0c6 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -14,7 +14,7 @@
#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
-#include "xe_device_types.h"
+#include "xe_device.h"
#include "xe_exec_queue.h"
#include "xe_macros.h"
#include "xe_sched_job_types.h"
@@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
struct xe_sync_entry *sync,
struct drm_xe_sync __user *sync_user,
+ struct drm_syncobj *ufence_syncobj,
+ u64 ufence_timeline_value,
unsigned int flags)
{
struct drm_xe_sync sync_in;
@@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
if (exec) {
sync->addr = sync_in.addr;
} else {
+ sync->ufence_timeline_value = ufence_timeline_value;
sync->ufence = user_fence_create(xe, sync_in.addr,
sync_in.timeline_value);
if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
return PTR_ERR(sync->ufence);
+ sync->ufence_chain_fence = dma_fence_chain_alloc();
+ if (!sync->ufence_chain_fence)
+ return -ENOMEM;
+ sync->ufence_syncobj = ufence_syncobj;
}
break;
@@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence)
} else if (sync->ufence) {
int err;
- dma_fence_get(fence);
+ drm_syncobj_add_point(sync->ufence_syncobj,
+ sync->ufence_chain_fence,
+ fence, sync->ufence_timeline_value);
+ sync->ufence_chain_fence = NULL;
+
+ fence = drm_syncobj_fence_get(sync->ufence_syncobj);
user_fence_get(sync->ufence);
err = dma_fence_add_callback(fence, &sync->ufence->cb,
user_fence_cb);
@@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
drm_syncobj_put(sync->syncobj);
dma_fence_put(sync->fence);
dma_fence_chain_free(sync->chain_fence);
- if (sync->ufence)
+ dma_fence_chain_free(sync->ufence_chain_fence);
+ if (!IS_ERR_OR_NULL(sync->ufence))
user_fence_put(sync->ufence);
}
@@ -284,51 +297,59 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
struct dma_fence *fence;
- int i, num_in_fence = 0, current_fence = 0;
+ int i, num_fence = 0, current_fence = 0;
lockdep_assert_held(&vm->lock);
- /* Count in-fences */
- for (i = 0; i < num_sync; ++i) {
- if (sync[i].fence) {
- ++num_in_fence;
- fence = sync[i].fence;
+ /* Reject in fences */
+ for (i = 0; i < num_sync; ++i)
+ if (sync[i].fence)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (q->flags & EXEC_QUEUE_FLAG_VM) {
+ struct xe_exec_queue *__q;
+ struct xe_tile *tile;
+ u8 id;
+
+ for_each_tile(tile, vm->xe, id)
+ num_fence += (1 + XE_MAX_GT_PER_TILE);
+
+ fences = kmalloc_array(num_fence, sizeof(*fences),
+ GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+
+ fences[current_fence++] =
+ xe_exec_queue_last_fence_get(q, vm);
+ for_each_tlb_inval(i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+ list_for_each_entry(__q, &q->multi_gt_list,
+ multi_gt_link) {
+ fences[current_fence++] =
+ xe_exec_queue_last_fence_get(__q, vm);
+ for_each_tlb_inval(i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i);
}
- }
- /* Easy case... */
- if (!num_in_fence) {
- fence = xe_exec_queue_last_fence_get(q, vm);
- return fence;
- }
+ xe_assert(vm->xe, current_fence == num_fence);
+ cf = dma_fence_array_create(num_fence, fences,
+ dma_fence_context_alloc(1),
+ 1, false);
+ if (!cf)
+ goto err_out;
- /* Create composite fence */
- fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL);
- if (!fences)
- return ERR_PTR(-ENOMEM);
- for (i = 0; i < num_sync; ++i) {
- if (sync[i].fence) {
- dma_fence_get(sync[i].fence);
- fences[current_fence++] = sync[i].fence;
- }
- }
- fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
- cf = dma_fence_array_create(num_in_fence, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- if (!cf) {
- --vm->composite_fence_seqno;
- goto err_out;
+ return &cf->base;
}
- return &cf->base;
+ fence = xe_exec_queue_last_fence_get(q, vm);
+ return fence;
err_out:
while (current_fence)
dma_fence_put(fences[--current_fence]);
kfree(fences);
- kfree(cf);
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 256ffc1e54dc..51f2d803e977 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -8,6 +8,7 @@
#include "xe_sync_types.h"
+struct drm_syncobj;
struct xe_device;
struct xe_exec_queue;
struct xe_file;
@@ -21,6 +22,8 @@ struct xe_vm;
int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
struct xe_sync_entry *sync,
struct drm_xe_sync __user *sync_user,
+ struct drm_syncobj *ufence_syncobj,
+ u64 ufence_timeline_value,
unsigned int flags);
int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job);
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
index 30ac3f51993b..b88f1833e28c 100644
--- a/drivers/gpu/drm/xe/xe_sync_types.h
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -18,9 +18,12 @@ struct xe_sync_entry {
struct drm_syncobj *syncobj;
struct dma_fence *fence;
struct dma_fence_chain *chain_fence;
+ struct dma_fence_chain *ufence_chain_fence;
+ struct drm_syncobj *ufence_syncobj;
struct xe_user_fence *ufence;
u64 addr;
u64 timeline_value;
+ u64 ufence_timeline_value;
u32 type;
u32 flags;
};
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index 492def04a559..1ae0dec2cf31 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -12,6 +12,7 @@
#include "xe_tlb_inval_job.h"
#include "xe_migrate.h"
#include "xe_pm.h"
+#include "xe_vm.h"
/** struct xe_tlb_inval_job - TLB invalidation job */
struct xe_tlb_inval_job {
@@ -21,6 +22,8 @@ struct xe_tlb_inval_job {
struct xe_tlb_inval *tlb_inval;
/** @q: exec queue issuing the invalidate */
struct xe_exec_queue *q;
+ /** @vm: VM which TLB invalidation is being issued for */
+ struct xe_vm *vm;
/** @refcount: ref count of this job */
struct kref refcount;
/**
@@ -32,8 +35,8 @@ struct xe_tlb_inval_job {
u64 start;
/** @end: End address to invalidate */
u64 end;
- /** @asid: Address space ID to invalidate */
- u32 asid;
+ /** @type: GT type */
+ int type;
/** @fence_armed: Fence has been armed */
bool fence_armed;
};
@@ -46,7 +49,7 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
container_of(job->fence, typeof(*ifence), base);
xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
- job->end, job->asid);
+ job->end, job->vm->usm.asid);
return job->fence;
}
@@ -70,9 +73,10 @@ static const struct xe_dep_job_ops dep_job_ops = {
* @q: exec queue issuing the invalidate
* @tlb_inval: TLB invalidation client
* @dep_scheduler: Dependency scheduler for job
+ * @vm: VM which TLB invalidation is being issued for
* @start: Start address to invalidate
* @end: End address to invalidate
- * @asid: Address space ID to invalidate
+ * @type: GT type
*
* Create a TLB invalidation job and initialize internal fields. The caller is
* responsible for releasing the creation reference.
@@ -81,8 +85,8 @@ static const struct xe_dep_job_ops dep_job_ops = {
*/
struct xe_tlb_inval_job *
xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
- struct xe_dep_scheduler *dep_scheduler, u64 start,
- u64 end, u32 asid)
+ struct xe_dep_scheduler *dep_scheduler,
+ struct xe_vm *vm, u64 start, u64 end, int type)
{
struct xe_tlb_inval_job *job;
struct drm_sched_entity *entity =
@@ -90,19 +94,24 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *ifence;
int err;
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
job = kmalloc(sizeof(*job), GFP_KERNEL);
if (!job)
return ERR_PTR(-ENOMEM);
job->q = q;
+ job->vm = vm;
job->tlb_inval = tlb_inval;
job->start = start;
job->end = end;
- job->asid = asid;
job->fence_armed = false;
job->dep.ops = &dep_job_ops;
+ job->type = type;
kref_init(&job->refcount);
xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */
+ xe_vm_get(vm); /* Pairs with put in xe_tlb_inval_job_destroy */
ifence = kmalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence) {
@@ -124,6 +133,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
err_fence:
kfree(ifence);
err_job:
+ xe_vm_put(vm);
xe_exec_queue_put(q);
kfree(job);
@@ -138,6 +148,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
container_of(job->fence, typeof(*ifence), base);
struct xe_exec_queue *q = job->q;
struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_vm *vm = job->vm;
if (!job->fence_armed)
kfree(ifence);
@@ -147,6 +158,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
drm_sched_job_cleanup(&job->dep.drm);
kfree(job);
+ xe_vm_put(vm); /* Pairs with get from xe_tlb_inval_job_create */
xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */
xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */
}
@@ -231,6 +243,11 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
dma_fence_get(&job->dep.drm.s_fence->finished);
drm_sched_entity_push_job(&job->dep.drm);
+ /* Let the upper layers fish this out */
+ xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm,
+ &job->dep.drm.s_fence->finished,
+ job->type);
+
xe_migrate_job_unlock(m, job->q);
/*
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
index e63edcb26b50..4d6df1a6c6ca 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -11,14 +11,15 @@
struct dma_fence;
struct xe_dep_scheduler;
struct xe_exec_queue;
+struct xe_migrate;
struct xe_tlb_inval;
struct xe_tlb_inval_job;
-struct xe_migrate;
+struct xe_vm;
struct xe_tlb_inval_job *
xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
struct xe_dep_scheduler *dep_scheduler,
- u64 start, u64 end, u32 asid);
+ struct xe_vm *vm, u64 start, u64 end, int type);
int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 314f42fcbcbd..79a97b086cb2 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -441,6 +441,29 @@ TRACE_EVENT(xe_eu_stall_data_read,
__entry->read_size, __entry->total_size)
);
+TRACE_EVENT(xe_exec_queue_reach_max_job_count,
+ TP_PROTO(struct xe_exec_queue *q, int max_cnt),
+ TP_ARGS(q, max_cnt),
+
+ TP_STRUCT__entry(__string(dev, __dev_name_eq(q))
+ __field(enum xe_engine_class, class)
+ __field(u32, logical_mask)
+ __field(u16, guc_id)
+ __field(int, max_cnt)
+ ),
+
+ TP_fast_assign(__assign_str(dev);
+ __entry->class = q->class;
+ __entry->logical_mask = q->logical_mask;
+ __entry->guc_id = q->guc->id;
+ __entry->max_cnt = max_cnt;
+ ),
+
+ TP_printk("dev=%s, job count exceeded the maximum limit (%d) per exec queue. engine_class=0x%x, logical_mask=0x%x, guc_id=%d",
+ __get_str(dev), __entry->max_cnt,
+ __entry->class, __entry->logical_mask, __entry->guc_id)
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h
index 1ef181c90434..a30e732c4d51 100644
--- a/drivers/gpu/drm/xe/xe_validation.h
+++ b/drivers/gpu/drm/xe/xe_validation.h
@@ -166,10 +166,10 @@ xe_validation_device_init(struct xe_validation_device *val)
*/
DEFINE_CLASS(xe_validation, struct xe_validation_ctx *,
if (_T) xe_validation_ctx_fini(_T);,
- ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags);
- _ret ? NULL : _ctx; }),
+ ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags);
+ *_ret ? NULL : _ctx; }),
struct xe_validation_ctx *_ctx, struct xe_validation_device *_val,
- struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret);
+ struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret);
static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T)
{return *_T; }
#define class_xe_validation_is_conditional true
@@ -186,7 +186,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T)
* exhaustive eviction.
*/
#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \
- scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \
+ scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \
drm_exec_until_all_locked(_exec)
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 00f3520dec38..8fb5cc6a69ec 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -27,7 +27,6 @@
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
-#include "xe_gt_pagefault.h"
#include "xe_migrate.h"
#include "xe_pat.h"
#include "xe_pm.h"
@@ -755,6 +754,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
for_each_tile(tile, vm->xe, id) {
vops.pt_update_ops[id].wait_vm_bookkeep = true;
vops.pt_update_ops[tile->id].q =
@@ -845,6 +845,7 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
for_each_tile(tile, vm->xe, id) {
vops.pt_update_ops[id].wait_vm_bookkeep = true;
vops.pt_update_ops[tile->id].q =
@@ -1458,7 +1459,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
struct xe_validation_ctx ctx;
struct drm_exec exec;
struct xe_vm *vm;
- int err, number_tiles = 0;
+ int err;
struct xe_tile *tile;
u8 id;
@@ -1619,13 +1620,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
goto err_close;
}
vm->q[id] = q;
- number_tiles++;
}
}
- if (number_tiles > 1)
- vm->composite_fence_ctx = dma_fence_context_alloc(1);
-
if (xef && xe->info.has_asid) {
u32 asid;
@@ -1731,8 +1728,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
down_write(&vm->lock);
for_each_tile(tile, xe, id) {
- if (vm->q[id])
+ if (vm->q[id]) {
+ int i;
+
xe_exec_queue_last_fence_put(vm->q[id], vm);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
+ }
}
up_write(&vm->lock);
@@ -3102,20 +3104,31 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
- int number_tiles = 0, current_fence = 0, err;
+ int number_tiles = 0, current_fence = 0, n_fence = 0, err;
u8 id;
number_tiles = vm_ops_setup_tile_args(vm, vops);
if (number_tiles == 0)
return ERR_PTR(-ENODATA);
- if (number_tiles > 1) {
- fences = kmalloc_array(number_tiles, sizeof(*fences),
- GFP_KERNEL);
- if (!fences) {
- fence = ERR_PTR(-ENOMEM);
- goto err_trace;
- }
+ if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) {
+ for_each_tile(tile, vm->xe, id)
+ ++n_fence;
+ } else {
+ for_each_tile(tile, vm->xe, id)
+ n_fence += (1 + XE_MAX_GT_PER_TILE);
+ }
+
+ fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ fence = ERR_PTR(-ENOMEM);
+ goto err_trace;
+ }
+
+ cf = dma_fence_array_alloc(n_fence);
+ if (!cf) {
+ fence = ERR_PTR(-ENOMEM);
+ goto err_out;
}
for_each_tile(tile, vm->xe, id) {
@@ -3132,30 +3145,34 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
trace_xe_vm_ops_execute(vops);
for_each_tile(tile, vm->xe, id) {
+ struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
+ int i;
+
+ fence = NULL;
if (!vops->pt_update_ops[id].num_ops)
- continue;
+ goto collect_fences;
fence = xe_pt_update_ops_run(tile, vops);
if (IS_ERR(fence))
goto err_out;
- if (fences)
- fences[current_fence++] = fence;
- }
+collect_fences:
+ fences[current_fence++] = fence ?: dma_fence_get_stub();
+ if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
+ continue;
- if (fences) {
- cf = dma_fence_array_create(number_tiles, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- if (!cf) {
- --vm->composite_fence_seqno;
- fence = ERR_PTR(-ENOMEM);
- goto err_out;
- }
- fence = &cf->base;
+ xe_migrate_job_lock(tile->migrate, q);
+ for_each_tlb_inval(i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+ xe_migrate_job_unlock(tile->migrate, q);
}
+ xe_assert(vm->xe, current_fence == n_fence);
+ dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
+ 1, false);
+ fence = &cf->base;
+
for_each_tile(tile, vm->xe, id) {
if (!vops->pt_update_ops[id].num_ops)
continue;
@@ -3215,7 +3232,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
struct dma_fence *fence)
{
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
struct xe_user_fence *ufence;
struct xe_vma_op *op;
int i;
@@ -3236,7 +3252,6 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
if (fence) {
for (i = 0; i < vops->num_syncs; i++)
xe_sync_entry_signal(vops->syncs + i, fence);
- xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
}
}
@@ -3430,19 +3445,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
struct xe_sync_entry *syncs,
int num_syncs)
{
- struct dma_fence *fence;
+ struct dma_fence *fence = NULL;
int i, err = 0;
- fence = xe_sync_in_fence_get(syncs, num_syncs,
- to_wait_exec_queue(vm, q), vm);
- if (IS_ERR(fence))
- return PTR_ERR(fence);
+ if (num_syncs) {
+ fence = xe_sync_in_fence_get(syncs, num_syncs,
+ to_wait_exec_queue(vm, q), vm);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], fence);
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], fence);
+ }
- xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
- fence);
dma_fence_put(fence);
return err;
@@ -3633,8 +3648,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
syncs_user = u64_to_user_ptr(args->syncs);
for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+ struct xe_exec_queue *__q = q ?: vm->q[0];
+
err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
&syncs_user[num_syncs],
+ __q->ufence_syncobj,
+ ++__q->ufence_timeline_value,
(xe_vm_in_lr_mode(vm) ?
SYNC_PARSE_FLAG_LR_MODE : 0) |
(!args->num_binds ?
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 830ed7b05c27..ccd6cc090309 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -221,11 +221,6 @@ struct xe_vm {
#define XE_VM_FLAG_GSC BIT(8)
unsigned long flags;
- /** @composite_fence_ctx: context composite fence */
- u64 composite_fence_ctx;
- /** @composite_fence_seqno: seqno for composite fence */
- u32 composite_fence_seqno;
-
/**
* @lock: outer most lock, protects objects of anything attached to this
* VM
@@ -471,6 +466,7 @@ struct xe_vma_ops {
#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
+#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3)
u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index ec638b431131..81e62291af45 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -679,6 +679,8 @@ static const struct xe_rtp_entry_sr engine_was[] = {
},
{ XE_RTP_NAME("14023061436"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ FUNC(xe_rtp_match_first_render_or_compute), OR,
+ GRAPHICS_VERSION_RANGE(3003, 3005),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE))
},