diff options
55 files changed, 2719 insertions, 1256 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov new file mode 100644 index 000000000000..2fd7e9b7bacc --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov @@ -0,0 +1,159 @@ +What: /sys/bus/pci/drivers/xe/.../sriov_admin/ +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + This directory appears for the particular Intel Xe device when: + + - device supports SR-IOV, and + - device is a Physical Function (PF), and + - driver support for the SR-IOV PF is enabled on given device. + + This directory is used as a root for all attributes required to + manage both Physical Function (PF) and Virtual Functions (VFs). + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/ +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + This directory holds attributes related to the SR-IOV Physical + Function (PF). + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf1/ +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf2/ +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<N>/ +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These directories hold attributes related to the SR-IOV Virtual + Functions (VFs). + + Note that the VF number <N> is 1-based as described in PCI SR-IOV + specification as the Xe driver follows that naming schema. + + There could be "vf1", "vf2" and so on, up to "vf<N>", where <N> + matches the value of the "sriov_totalvfs" attribute. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/exec_quantum_ms +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/preempt_timeout_us +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/sched_priority +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/exec_quantum_ms +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/preempt_timeout_us +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/sched_priority +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These files expose scheduling parameters for the PF and its VFs, and + are visible only on Intel Xe platforms that use time-sliced GPU sharing. + They can be changed even if VFs are enabled and running and reflect the + settings of all tiles/GTs assigned to the given function. + + exec_quantum_ms: (RW) unsigned integer + The GT execution quantum (EQ) in [ms] for the given function. + Actual quantum value might be aligned per HW/FW requirements. + + Default is 0 (unlimited). + + preempt_timeout_us: (RW) unsigned integer + The GT preemption timeout in [us] of the given function. + Actual timeout value might be aligned per HW/FW requirements. + + Default is 0 (unlimited). + + sched_priority: (RW/RO) string + The GT scheduling priority of the given function. + + "low" - function will be scheduled on the GPU for its EQ/PT + only if function has any work already submitted. + + "normal" - functions will be scheduled on the GPU for its EQ/PT + irrespective of whether it has submitted a work or not. + + "high" - function will be scheduled on the GPU for its EQ/PT + in the next time-slice after the current one completes + and function has a work submitted. + + Default is "low". + + When read, this file will display the current and available + scheduling priorities. The currently active priority level will + be enclosed in square brackets, like: + + [low] normal high + + This file can be read-only if changing the priority is not + supported. + + Writes to these attributes may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the provisioning. + + Reads from these attributes may fail with: + -EUCLEAN if value is not consistent across all tiles/GTs. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/exec_quantum_ms +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/preempt_timeout_us +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/sched_priority +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These files allows bulk reconfiguration of the scheduling parameters + of the PF or VFs and are available only for Intel Xe platforms with + GPU sharing based on the time-slice basis. These scheduling parameters + can be changed even if VFs are enabled and running. + + exec_quantum_ms: (WO) unsigned integer + The GT execution quantum (EQ) in [ms] to be applied to all functions. + See sriov_admin/{pf,vf<N>}/profile/exec_quantum_ms for more details. + + preempt_timeout_us: (WO) unsigned integer + The GT preemption timeout (PT) in [us] to be applied to all functions. + See sriov_admin/{pf,vf<N>}/profile/preempt_timeout_us for more details. + + sched_priority: (RW/RO) string + The GT scheduling priority to be applied for all functions. + See sriov_admin/{pf,vf<N>}/profile/sched_priority for more details. + + Writes to these attributes may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the provisioning. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/stop +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + This file allows to control scheduling of the VF on the Intel Xe GPU + platforms. It allows to implement custom policy mechanism in case VFs + are misbehaving or triggering adverse events above defined thresholds. + + stop: (WO) bool + All GT executions of given function shall be immediately stopped. + To allow scheduling this VF again, the VF FLR must be triggered. + + Writes to this attribute may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the scheduling. + + +What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/device +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/device +Date: October 2025 +KernelVersion: 6.19 +Contact: intel-xe@lists.freedesktop.org +Description: + These are symlinks to the underlying PCI device entry representing + given Xe SR-IOV function. For the PF, this link is always present. + For VFs, this link is present only for currently enabled VFs. diff --git a/Documentation/gpu/xe/xe_gt_freq.rst b/Documentation/gpu/xe/xe_gt_freq.rst index c0811200e327..182d6aabeee1 100644 --- a/Documentation/gpu/xe/xe_gt_freq.rst +++ b/Documentation/gpu/xe/xe_gt_freq.rst @@ -7,6 +7,9 @@ Xe GT Frequency Management .. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c :doc: Xe GT Frequency Management +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_throttle.c + :doc: Xe GT Throttle + Internal API ============ diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f8a3a1bfe42e..7b4ca591a4ae 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -58,7 +58,6 @@ xe-y += xe_bb.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ - xe_gt_pagefault.o \ xe_gt_sysfs.o \ xe_gt_throttle.o \ xe_gt_topology.o \ @@ -73,6 +72,7 @@ xe-y += xe_bb.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ xe_guc_log.o \ + xe_guc_pagefault.o \ xe_guc_pc.o \ xe_guc_submit.o \ xe_guc_tlb_inval.o \ @@ -94,6 +94,7 @@ xe-y += xe_bb.o \ xe_nvm.o \ xe_oa.o \ xe_observation.o \ + xe_pagefault.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ @@ -178,6 +179,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_sriov_pf_debugfs.o \ xe_sriov_pf_provision.o \ xe_sriov_pf_service.o \ + xe_sriov_pf_sysfs.o \ xe_tile_sriov_pf_debugfs.o # include helpers for tests even when XE is built-in diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index a895a8e801a9..2088256ad381 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -590,6 +590,7 @@ #define GT_GFX_RC6 XE_REG(0x138108) #define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) +/* Common performance limit reason bits - available on all platforms */ #define GT0_PERF_LIMIT_REASONS_MASK 0xde3 #define PROCHOT_MASK REG_BIT(0) #define THERMAL_LIMIT_MASK REG_BIT(1) @@ -599,6 +600,18 @@ #define POWER_LIMIT_4_MASK REG_BIT(8) #define POWER_LIMIT_1_MASK REG_BIT(10) #define POWER_LIMIT_2_MASK REG_BIT(11) +/* Platform-specific performance limit reason bits - for Crescent Island */ +#define CRI_PERF_LIMIT_REASONS_MASK 0xfdff +#define SOC_THERMAL_LIMIT_MASK REG_BIT(1) +#define MEM_THERMAL_MASK REG_BIT(2) +#define VR_THERMAL_MASK REG_BIT(3) +#define ICCMAX_MASK REG_BIT(4) +#define SOC_AVG_THERMAL_MASK REG_BIT(6) +#define FASTVMODE_MASK REG_BIT(7) +#define PSYS_PL1_MASK REG_BIT(12) +#define PSYS_PL2_MASK REG_BIT(13) +#define P0_FREQ_MASK REG_BIT(14) +#define PSYS_CRIT_MASK REG_BIT(15) #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index 264e9baf949c..0f79c0714454 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -24,6 +24,7 @@ #define BMG_MODS_RESIDENCY_OFFSET (0x4D0) #define BMG_G2_RESIDENCY_OFFSET (0x530) #define BMG_G6_RESIDENCY_OFFSET (0x538) +#define BMG_G7_RESIDENCY_OFFSET (0x4B0) #define BMG_G8_RESIDENCY_OFFSET (0x540) #define BMG_G10_RESIDENCY_OFFSET (0x548) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 1c3c9557a9bd..e91da9589c5f 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -142,6 +142,7 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) } residencies[] = { {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, + {BMG_G7_RESIDENCY_OFFSET, "Package G7"}, {BMG_G8_RESIDENCY_OFFSET, "Package G8"}, {BMG_G10_RESIDENCY_OFFSET, "Package G10"}, {BMG_MODS_RESIDENCY_OFFSET, "Package ModS"} diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 86d5960476af..c7d373c70f0f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -52,6 +52,7 @@ #include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" @@ -896,6 +897,10 @@ int xe_device_probe(struct xe_device *xe) return err; } + err = xe_pagefault_init(xe); + if (err) + return err; + if (xe->tiles->media_gt && XE_GT_WA(xe->tiles->media_gt, 15015404425_disable)) XE_DEVICE_WA_DISABLE(xe, 15015404425); @@ -988,21 +993,21 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + drm_dbg(&xe->drm, "Shutting down device\n"); - if (xe_driver_flr_disabled(xe)) { - struct xe_gt *gt; - u8 id; + xe_display_pm_shutdown(xe); - xe_display_pm_shutdown(xe); + xe_irq_suspend(xe); - xe_irq_suspend(xe); + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); - for_each_gt(gt, xe, id) - xe_gt_shutdown(gt); + xe_display_pm_shutdown_late(xe); - xe_display_pm_shutdown_late(xe); - } else { + if (!xe_driver_flr_disabled(xe)) { /* BOOM! */ __xe_driver_flr(xe); } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index af0ce275b032..7baf15f51575 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,6 +18,7 @@ #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_oa_types.h" +#include "xe_pagefault_types.h" #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" @@ -418,6 +419,16 @@ struct xe_device { u32 next_asid; /** @usm.lock: protects UM state */ struct rw_semaphore lock; + /** @usm.pf_wq: page fault work queue, unbound, high priority */ + struct workqueue_struct *pf_wq; + /* + * We pick 4 here because, in the current implementation, it + * yields the best bandwidth utilization of the kernel paging + * engine. + */ +#define XE_PAGEFAULT_QUEUE_COUNT 4 + /** @usm.pf_queue: Page fault queues */ + struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT]; } usm; /** @pinned: pinned BO state */ diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 521467d976f7..4d81210e41f5 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -21,6 +21,7 @@ #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_svm.h" +#include "xe_trace.h" #include "xe_vm.h" /** @@ -154,6 +155,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_exec_queue; } + if (atomic_read(&q->job_cnt) >= XE_MAX_JOB_COUNT_PER_EXEC_QUEUE) { + trace_xe_exec_queue_reach_max_job_count(q, XE_MAX_JOB_COUNT_PER_EXEC_QUEUE); + err = -EAGAIN; + goto err_exec_queue; + } + if (args->num_syncs) { syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); if (!syncs) { @@ -166,7 +173,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | + &syncs_user[num_syncs], NULL, 0, + SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) @@ -294,10 +302,6 @@ retry: goto err_put_job; if (!xe_vm_in_lr_mode(vm)) { - err = xe_sched_job_last_fence_add_dep(job, vm); - if (err) - goto err_put_job; - err = xe_svm_notifier_lock_interruptible(vm); if (err) goto err_put_job; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 90cbc95f8e2e..8724f8de67e2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,6 +10,7 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include "xe_dep_scheduler.h" @@ -368,6 +369,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -376,11 +387,20 @@ void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + int i; + + xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); + + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); xe_exec_queue_last_fence_put_unlocked(q); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); + if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { list_for_each_entry_safe(eq, next, &q->multi_gt_list, multi_gt_link) @@ -998,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) { + if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { + xe_migrate_job_lock_assert(q); + } else if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); } else { xe_vm_assert_held(vm); @@ -1097,32 +1119,104 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, struct dma_fence *fence) { xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, !dma_fence_is_container(fence)); xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } /** - * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence * @q: The exec queue - * @vm: The VM the engine does a bind or exec for + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + */ +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); +} + +/** + * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB + * invalidation fence unlocked + * @q: The exec queue + * @type: Either primary or media GT + * + * Only safe to be called from xe_exec_queue_destroy(). + */ +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type) +{ + xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + + dma_fence_put(q->tlb_inval[type].last_fence); + q->tlb_inval[type].last_fence = NULL; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @type: Either primary or media GT + * + * Get last fence, takes a ref * - * Returns: - * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + * Returns: last fence if not signaled, dma fence stub if signaled */ -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type) { struct dma_fence *fence; - int err = 0; - fence = xe_exec_queue_last_fence_get(q, vm); - if (fence) { - err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? - 0 : -ETIME; - dma_fence_put(fence); - } + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); - return err; + if (q->tlb_inval[type].last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &q->tlb_inval[type].last_fence->flags)) + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + + fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** + * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation + * @q: The exec queue + * @vm: The VM the engine does a bind for + * @fence: The fence + * @type: Either primary or media GT + * + * Set the last fence for the tlb invalidation type on the queue. Increases + * reference count for fence, when closing queue + * xe_exec_queue_tlb_inval_last_fence_put should be called. + */ +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type) +{ + xe_exec_queue_last_fence_lockdep_assert(q, vm); + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | + EXEC_QUEUE_FLAG_MIGRATE)); + xe_assert(vm->xe, !dma_fence_is_container(fence)); + + xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); + q->tlb_inval[type].last_fence = dma_fence_get(fence); } /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index a4dfbe858bda..fda4d4f9bda8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -14,6 +14,10 @@ struct drm_file; struct xe_device; struct xe_file; +#define for_each_tlb_inval(__i) \ + for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \ + __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i) + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hw_engine, u32 flags, @@ -84,8 +88,23 @@ struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue * struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); -int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, - struct xe_vm *vm); + +void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type); + +void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, + unsigned int type); + +struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, + struct xe_vm *vm, + unsigned int type); + +void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, + struct xe_vm *vm, + struct dma_fence *fence, + unsigned int type); + void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 282505fa1377..771ffe35cd0c 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -15,6 +15,7 @@ #include "xe_hw_fence_types.h" #include "xe_lrc_types.h" +struct drm_syncobj; struct xe_execlist_exec_queue; struct xe_gt; struct xe_guc_exec_queue; @@ -145,6 +146,11 @@ struct xe_exec_queue { * dependency scheduler */ struct xe_dep_scheduler *dep_scheduler; + /** + * @last_fence: last fence for tlb invalidation, protected by + * vm->lock in write mode + */ + struct dma_fence *last_fence; } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; /** @pxp: PXP info tracking */ @@ -155,6 +161,12 @@ struct xe_exec_queue { struct list_head link; } pxp; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; @@ -162,6 +174,11 @@ struct xe_exec_queue { const struct xe_ring_ops *ring_ops; /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */ struct drm_sched_entity *entity; + +#define XE_MAX_JOB_COUNT_PER_EXEC_QUEUE 1000 + /** @job_cnt: number of drm jobs in this exec queue */ + atomic_t job_cnt; + /** * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed * Protected by @vm's resv. Unused if @vm == NULL. diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 89808b33d0a8..6d479948bf21 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -32,7 +32,6 @@ #include "xe_gt_freq.h" #include "xe_gt_idle.h" #include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" @@ -49,6 +48,7 @@ #include "xe_map.h" #include "xe_migrate.h" #include "xe_mmio.h" +#include "xe_pagefault.h" #include "xe_pat.h" #include "xe_pm.h" #include "xe_mocs.h" @@ -607,6 +607,13 @@ static void xe_gt_fini(void *arg) struct xe_gt *gt = arg; int i; + if (disable_work_sync(>->reset.worker)) + /* + * If gt_reset_worker was halted from executing, take care of + * releasing the rpm reference here. + */ + xe_pm_runtime_put(gt_to_xe(gt)); + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); @@ -637,10 +644,6 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); - if (err) - return err; - err = xe_gt_idle_init(>->gtidle); if (err) return err; @@ -813,21 +816,18 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } -static int gt_reset(struct xe_gt *gt) +static void gt_reset_worker(struct work_struct *w) { + struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); unsigned int fw_ref; int err; - if (xe_device_wedged(gt_to_xe(gt))) { - err = -ECANCELED; + if (xe_device_wedged(gt_to_xe(gt))) goto err_pm_put; - } /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) { - err = -ENODEV; + if (!xe_device_uc_enabled(gt_to_xe(gt))) goto err_pm_put; - } xe_gt_info(gt, "reset started\n"); @@ -849,7 +849,7 @@ static int gt_reset(struct xe_gt *gt) xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); - xe_gt_pagefault_reset(gt); + xe_pagefault_reset(gt_to_xe(gt), gt); xe_uc_stop(>->uc); @@ -864,30 +864,24 @@ static int gt_reset(struct xe_gt *gt) goto err_out; xe_force_wake_put(gt_to_fw(gt), fw_ref); + + /* Pair with get while enqueueing the work in xe_gt_reset_async() */ xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); - return 0; + return; err_out: xe_force_wake_put(gt_to_fw(gt), fw_ref); XE_WARN_ON(xe_uc_start(>->uc)); + err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - xe_device_declare_wedged(gt_to_xe(gt)); + err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); - - return err; -} - -static void gt_reset_worker(struct work_struct *w) -{ - struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); - - gt_reset(gt); } void xe_gt_reset_async(struct xe_gt *gt) @@ -899,6 +893,8 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); + + /* Pair with put in gt_reset_worker() if work is enqueued */ xe_pm_runtime_get_noresume(gt_to_xe(gt)); if (!queue_work(gt->ordered_wq, >->reset.worker)) xe_pm_runtime_put(gt_to_xe(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index e88f113226bc..849ea6c86e8e 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -29,24 +29,26 @@ * PCODE is the ultimate decision maker of the actual running frequency, based * on thermal and other running conditions. * - * Xe's Freq provides a sysfs API for frequency management: + * Xe's Freq provides a sysfs API for frequency management under + * ``<device>/tile#/gt#/freq0/`` directory. * - * device/tile#/gt#/freq0/<item>_freq *read-only* files: + * **Read-only** attributes: * - * - act_freq: The actual resolved frequency decided by PCODE. - * - cur_freq: The current one requested by GuC PC to the PCODE. - * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. - * - rpa_freq: The Render Performance (RP) A level, which is the achievable one. - * Calculated by PCODE at runtime based on multiple running conditions - * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. - * Calculated by PCODE at runtime based on multiple running conditions - * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. + * - ``act_freq``: The actual resolved frequency decided by PCODE. + * - ``cur_freq``: The current one requested by GuC PC to the PCODE. + * - ``rpn_freq``: The Render Performance (RP) N level, which is the minimal one. + * - ``rpa_freq``: The Render Performance (RP) A level, which is the achievable one. + * Calculated by PCODE at runtime based on multiple running conditions + * - ``rpe_freq``: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions + * - ``rp0_freq``: The Render Performance (RP) 0 level, which is the maximum one. * - * device/tile#/gt#/freq0/<item>_freq *read-write* files: + * **Read-write** attributes: * - * - min_freq: Min frequency request. - * - max_freq: Max frequency request. - * If max <= min, then freq_min becomes a fixed frequency request. + * - ``min_freq``: Min frequency request. + * - ``max_freq``: Max frequency request. + * If max <= min, then freq_min becomes a fixed frequency + * request. */ static struct xe_guc_pc * diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c deleted file mode 100644 index a054d6010ae0..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ /dev/null @@ -1,679 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_gt_pagefault.h" - -#include <linux/bitfield.h> -#include <linux/circ_buf.h> - -#include <drm/drm_exec.h> -#include <drm/drm_managed.h> - -#include "abi/guc_actions_abi.h" -#include "xe_bo.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_gt_stats.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_migrate.h" -#include "xe_svm.h" -#include "xe_trace_bo.h" -#include "xe_vm.h" -#include "xe_vram_types.h" - -struct pagefault { - u64 page_addr; - u32 asid; - u16 pdata; - u8 vfid; - u8 access_type; - u8 fault_type; - u8 fault_level; - u8 engine_class; - u8 engine_instance; - u8 fault_unsuccessful; - bool trva_fault; -}; - -enum access_type { - ACCESS_TYPE_READ = 0, - ACCESS_TYPE_WRITE = 1, - ACCESS_TYPE_ATOMIC = 2, - ACCESS_TYPE_RESERVED = 3, -}; - -enum fault_type { - NOT_PRESENT = 0, - WRITE_ACCESS_VIOLATION = 1, - ATOMIC_ACCESS_VIOLATION = 2, -}; - -struct acc { - u64 va_range_base; - u32 asid; - u32 sub_granularity; - u8 granularity; - u8 vfid; - u8 access_type; - u8 engine_class; - u8 engine_instance; -}; - -static bool access_is_atomic(enum access_type access_type) -{ - return access_type == ACCESS_TYPE_ATOMIC; -} - -static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) -{ - return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, - vma->tile_invalidated); -} - -static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, - bool need_vram_move, struct xe_vram_region *vram) -{ - struct xe_bo *bo = xe_vma_bo(vma); - struct xe_vm *vm = xe_vma_vm(vma); - int err; - - err = xe_vm_lock_vma(exec, vma); - if (err) - return err; - - if (!bo) - return 0; - - return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : - xe_bo_validate(bo, vm, true, exec); -} - -static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, - bool atomic) -{ - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_validation_ctx ctx; - struct drm_exec exec; - struct dma_fence *fence; - int err, needs_vram; - - lockdep_assert_held_write(&vm->lock); - - needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); - if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) - return needs_vram < 0 ? needs_vram : -EACCES; - - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); - - trace_xe_vma_pagefault(vma); - - /* Check if VMA is valid, opportunistic check only */ - if (vma_is_valid(tile, vma) && !atomic) - return 0; - -retry_userptr: - if (xe_vma_is_userptr(vma) && - xe_vma_userptr_check_repin(to_userptr_vma(vma))) { - struct xe_userptr_vma *uvma = to_userptr_vma(vma); - - err = xe_vma_userptr_pin_pages(uvma); - if (err) - return err; - } - - /* Lock VM and BOs dma-resv */ - xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); - drm_exec_until_all_locked(&exec) { - err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); - drm_exec_retry_on_contention(&exec); - xe_validation_retry_on_oom(&ctx, &err); - if (err) - goto unlock_dma_resv; - - /* Bind VMA only to the GT that has faulted */ - trace_xe_vma_pf_bind(vma); - xe_vm_set_validation_exec(vm, &exec); - fence = xe_vma_rebind(vm, vma, BIT(tile->id)); - xe_vm_set_validation_exec(vm, NULL); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - xe_validation_retry_on_oom(&ctx, &err); - goto unlock_dma_resv; - } - } - - dma_fence_wait(fence, false); - dma_fence_put(fence); - -unlock_dma_resv: - xe_validation_ctx_fini(&ctx); - if (err == -EAGAIN) - goto retry_userptr; - - return err; -} - -static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) -{ - struct xe_vm *vm; - - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = ERR_PTR(-EINVAL); - up_read(&xe->usm.lock); - - return vm; -} - -static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_vm *vm; - struct xe_vma *vma = NULL; - int err; - bool atomic; - - /* SW isn't expected to handle TRTT faults */ - if (pf->trva_fault) - return -EFAULT; - - vm = asid_to_vm(xe, pf->asid); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - /* - * TODO: Change to read lock? Using write lock for simplicity. - */ - down_write(&vm->lock); - - if (xe_vm_is_closed(vm)) { - err = -ENOENT; - goto unlock_vm; - } - - vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); - if (!vma) { - err = -EINVAL; - goto unlock_vm; - } - - atomic = access_is_atomic(pf->access_type); - - if (xe_vma_is_cpu_addr_mirror(vma)) - err = xe_svm_handle_pagefault(vm, vma, gt, - pf->page_addr, atomic); - else - err = handle_vma_pagefault(gt, vma, atomic); - -unlock_vm: - if (!err) - vm->usm.last_fault_vma = vma; - up_write(&vm->lock); - xe_vm_put(vm); - - return err; -} - -static int send_pagefault_reply(struct xe_guc *guc, - struct xe_guc_pagefault_reply *reply) -{ - u32 action[] = { - XE_GUC_ACTION_PAGE_FAULT_RES_DESC, - reply->dw0, - reply->dw1, - }; - - return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); -} - -static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) -{ - xe_gt_dbg(gt, "\n\tASID: %d\n" - "\tVFID: %d\n" - "\tPDATA: 0x%04x\n" - "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" - "\tAccessType: %d\n" - "\tFaultLevel: %d\n" - "\tEngineClass: %d %s\n" - "\tEngineInstance: %d\n", - pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), - lower_32_bits(pf->page_addr), - pf->fault_type, pf->access_type, pf->fault_level, - pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), - pf->engine_instance); -} - -#define PF_MSG_LEN_DW 4 - -static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) -{ - const struct xe_guc_pagefault_desc *desc; - bool ret = false; - - spin_lock_irq(&pf_queue->lock); - if (pf_queue->tail != pf_queue->head) { - desc = (const struct xe_guc_pagefault_desc *) - (pf_queue->data + pf_queue->tail); - - pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); - pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); - pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); - pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); - pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << - PFD_PDATA_HI_SHIFT; - pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); - pf->asid = FIELD_GET(PFD_ASID, desc->dw1); - pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); - pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); - pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); - pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << - PFD_VIRTUAL_ADDR_HI_SHIFT; - pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << - PFD_VIRTUAL_ADDR_LO_SHIFT; - - pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - pf_queue->num_dw; - ret = true; - } - spin_unlock_irq(&pf_queue->lock); - - return ret; -} - -static bool pf_queue_full(struct pf_queue *pf_queue) -{ - lockdep_assert_held(&pf_queue->lock); - - return CIRC_SPACE(pf_queue->head, pf_queue->tail, - pf_queue->num_dw) <= - PF_MSG_LEN_DW; -} - -int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct pf_queue *pf_queue; - unsigned long flags; - u32 asid; - bool full; - - if (unlikely(len != PF_MSG_LEN_DW)) - return -EPROTO; - - asid = FIELD_GET(PFD_ASID, msg[1]); - pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); - - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); - - spin_lock_irqsave(&pf_queue->lock, flags); - full = pf_queue_full(pf_queue); - if (!full) { - memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % - pf_queue->num_dw; - queue_work(gt->usm.pf_wq, &pf_queue->worker); - } else { - xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); - } - spin_unlock_irqrestore(&pf_queue->lock, flags); - - return full ? -ENOSPC : 0; -} - -#define USM_QUEUE_MAX_RUNTIME_MS 20 - -static void pf_queue_work_func(struct work_struct *w) -{ - struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); - struct xe_gt *gt = pf_queue->gt; - struct xe_guc_pagefault_reply reply = {}; - struct pagefault pf = {}; - unsigned long threshold; - int ret; - - threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - - while (get_pagefault(pf_queue, &pf)) { - ret = handle_pagefault(gt, &pf); - if (unlikely(ret)) { - print_pagefault(gt, &pf); - pf.fault_unsuccessful = 1; - xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); - } - - reply.dw0 = FIELD_PREP(PFR_VALID, 1) | - FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | - FIELD_PREP(PFR_REPLY, PFR_ACCESS) | - FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | - FIELD_PREP(PFR_ASID, pf.asid); - - reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | - FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | - FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | - FIELD_PREP(PFR_PDATA, pf.pdata); - - send_pagefault_reply(>->uc.guc, &reply); - - if (time_after(jiffies, threshold) && - pf_queue->tail != pf_queue->head) { - queue_work(gt->usm.pf_wq, w); - break; - } - } -} - -static void acc_queue_work_func(struct work_struct *w); - -static void pagefault_fini(void *arg) -{ - struct xe_gt *gt = arg; - struct xe_device *xe = gt_to_xe(gt); - - if (!xe->info.has_usm) - return; - - destroy_workqueue(gt->usm.acc_wq); - destroy_workqueue(gt->usm.pf_wq); -} - -static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) -{ - struct xe_device *xe = gt_to_xe(gt); - xe_dss_mask_t all_dss; - int num_dss, num_eus; - - bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, - XE_MAX_DSS_FUSE_BITS); - - num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); - num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, - XE_MAX_EU_FUSE_BITS) * num_dss; - - /* - * user can issue separate page faults per EU and per CS - * - * XXX: Multiplier required as compute UMD are getting PF queue errors - * without it. Follow on why this multiplier is required. - */ -#define PF_MULTIPLIER 8 - pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; - pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); -#undef PF_MULTIPLIER - - pf_queue->gt = gt; - pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, - sizeof(u32), GFP_KERNEL); - if (!pf_queue->data) - return -ENOMEM; - - spin_lock_init(&pf_queue->lock); - INIT_WORK(&pf_queue->worker, pf_queue_work_func); - - return 0; -} - -int xe_gt_pagefault_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int i, ret = 0; - - if (!xe->info.has_usm) - return 0; - - for (i = 0; i < NUM_PF_QUEUE; ++i) { - ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); - if (ret) - return ret; - } - for (i = 0; i < NUM_ACC_QUEUE; ++i) { - gt->usm.acc_queue[i].gt = gt; - spin_lock_init(>->usm.acc_queue[i].lock); - INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); - } - - gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", - WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); - if (!gt->usm.pf_wq) - return -ENOMEM; - - gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", - WQ_UNBOUND | WQ_HIGHPRI, - NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) { - destroy_workqueue(gt->usm.pf_wq); - return -ENOMEM; - } - - return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); -} - -void xe_gt_pagefault_reset(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - int i; - - if (!xe->info.has_usm) - return; - - for (i = 0; i < NUM_PF_QUEUE; ++i) { - spin_lock_irq(>->usm.pf_queue[i].lock); - gt->usm.pf_queue[i].head = 0; - gt->usm.pf_queue[i].tail = 0; - spin_unlock_irq(>->usm.pf_queue[i].lock); - } - - for (i = 0; i < NUM_ACC_QUEUE; ++i) { - spin_lock(>->usm.acc_queue[i].lock); - gt->usm.acc_queue[i].head = 0; - gt->usm.acc_queue[i].tail = 0; - spin_unlock(>->usm.acc_queue[i].lock); - } -} - -static int granularity_in_byte(int val) -{ - switch (val) { - case 0: - return SZ_128K; - case 1: - return SZ_2M; - case 2: - return SZ_16M; - case 3: - return SZ_64M; - default: - return 0; - } -} - -static int sub_granularity_in_byte(int val) -{ - return (granularity_in_byte(val) / 32); -} - -static void print_acc(struct xe_gt *gt, struct acc *acc) -{ - xe_gt_warn(gt, "Access counter request:\n" - "\tType: %s\n" - "\tASID: %d\n" - "\tVFID: %d\n" - "\tEngine: %d:%d\n" - "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" - "\tSub_Granularity Vector: 0x%08x\n" - "\tVA Range base: 0x%016llx\n", - acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", - acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, - granularity_in_byte(acc->granularity) / SZ_1K, - sub_granularity_in_byte(acc->granularity) / SZ_1K, - acc->sub_granularity, acc->va_range_base); -} - -static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) -{ - u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * - sub_granularity_in_byte(acc->granularity); - - return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); -} - -static int handle_acc(struct xe_gt *gt, struct acc *acc) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_validation_ctx ctx; - struct drm_exec exec; - struct xe_vm *vm; - struct xe_vma *vma; - int ret = 0; - - /* We only support ACC_TRIGGER at the moment */ - if (acc->access_type != ACC_TRIGGER) - return -EINVAL; - - vm = asid_to_vm(xe, acc->asid); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - down_read(&vm->lock); - - /* Lookup VMA */ - vma = get_acc_vma(vm, acc); - if (!vma) { - ret = -EINVAL; - goto unlock_vm; - } - - trace_xe_vma_acc(vma); - - /* Userptr or null can't be migrated, nothing to do */ - if (xe_vma_has_no_bo(vma)) - goto unlock_vm; - - /* Lock VM and BOs dma-resv */ - xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); - drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); - drm_exec_retry_on_contention(&exec); - xe_validation_retry_on_oom(&ctx, &ret); - } - - xe_validation_ctx_fini(&ctx); -unlock_vm: - up_read(&vm->lock); - xe_vm_put(vm); - - return ret; -} - -#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) - -#define ACC_MSG_LEN_DW 4 - -static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) -{ - const struct xe_guc_acc_desc *desc; - bool ret = false; - - spin_lock(&acc_queue->lock); - if (acc_queue->tail != acc_queue->head) { - desc = (const struct xe_guc_acc_desc *) - (acc_queue->data + acc_queue->tail); - - acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); - acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | - FIELD_GET(ACC_SUBG_LO, desc->dw0); - acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); - acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); - acc->asid = FIELD_GET(ACC_ASID, desc->dw1); - acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); - acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); - acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, - desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); - - acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % - ACC_QUEUE_NUM_DW; - ret = true; - } - spin_unlock(&acc_queue->lock); - - return ret; -} - -static void acc_queue_work_func(struct work_struct *w) -{ - struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); - struct xe_gt *gt = acc_queue->gt; - struct acc acc = {}; - unsigned long threshold; - int ret; - - threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); - - while (get_acc(acc_queue, &acc)) { - ret = handle_acc(gt, &acc); - if (unlikely(ret)) { - print_acc(gt, &acc); - xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); - } - - if (time_after(jiffies, threshold) && - acc_queue->tail != acc_queue->head) { - queue_work(gt->usm.acc_wq, w); - break; - } - } -} - -static bool acc_queue_full(struct acc_queue *acc_queue) -{ - lockdep_assert_held(&acc_queue->lock); - - return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= - ACC_MSG_LEN_DW; -} - -int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct acc_queue *acc_queue; - u32 asid; - bool full; - - /* - * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); - - if (unlikely(len != ACC_MSG_LEN_DW)) - return -EPROTO; - - asid = FIELD_GET(ACC_ASID, msg[1]); - acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; - - spin_lock(&acc_queue->lock); - full = acc_queue_full(acc_queue); - if (!full) { - memcpy(acc_queue->data + acc_queue->head, msg, - len * sizeof(u32)); - acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; - queue_work(gt->usm.acc_wq, &acc_queue->worker); - } else { - xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); - } - spin_unlock(&acc_queue->lock); - - return full ? -ENOSPC : 0; -} diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h deleted file mode 100644 index 839c065a5e4c..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2022 Intel Corporation - */ - -#ifndef _XE_GT_PAGEFAULT_H_ -#define _XE_GT_PAGEFAULT_H_ - -#include <linux/types.h> - -struct xe_gt; -struct xe_guc; - -int xe_gt_pagefault_init(struct xe_gt *gt); -void xe_gt_pagefault_reset(struct xe_gt *gt); -int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); -int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len); - -#endif /* _XE_GT_PAGEFAULT_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c0c0215c0703..d90261a7ab7c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -924,7 +924,8 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns const char *what, const char *(*unit)(u32), unsigned int last, int err) { - xe_gt_assert(gt, first); + char name[8]; + xe_gt_assert(gt, num_vfs); xe_gt_assert(gt, first <= last); @@ -932,8 +933,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err); if (unlikely(err)) { - xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", - first, first + num_vfs - 1, what); + xe_gt_sriov_notice(gt, "Failed to bulk provision %s..VF%u with %s\n", + xe_sriov_function_name(first, name, sizeof(name)), + first + num_vfs - 1, what); if (last > first) pf_config_bulk_set_u32_done(gt, first, last - first, value, get, what, unit, last, 0); @@ -942,8 +944,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ value = get(gt, first); - xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n", - first, first + num_vfs - 1, value, unit(value), what); + xe_gt_sriov_info(gt, "%s..VF%u provisioned with %u%s %s\n", + xe_sriov_function_name(first, name, sizeof(name)), + first + num_vfs - 1, value, unit(value), what); return 0; } @@ -1724,7 +1727,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, return 0; } -static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1732,47 +1735,107 @@ static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF. + * xe_gt_sriov_pf_config_set_exec_quantum_locked() - Configure PF/VF execution quantum. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) * - * This function can only be called on PF. + * This function can only be called on PF with the master mutex hold. + * It will log the provisioned value or an error in case of the failure. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, - u32 exec_quantum) +int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) { int err; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_exec_quantum(gt, vfid, exec_quantum); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); return pf_config_set_u32_done(gt, vfid, exec_quantum, - xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid), + pf_get_exec_quantum(gt, vfid), "execution quantum", exec_quantum_unit, err); } /** - * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum. + * xe_gt_sriov_pf_config_set_exec_quantum() - Configure PF/VF execution quantum. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) * * This function can only be called on PF. + * It will log the provisioned value or an error in case of the failure. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, exec_quantum); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum_locked() - Get PF/VF execution quantum. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF with the master mutex hold. * - * Return: VF's (or PF's) execution quantum in milliseconds. + * Return: execution quantum in milliseconds (or 0 if infinity). + */ +u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_get_exec_quantum(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum() - Get PF/VF execution quantum. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF. + * + * Return: execution quantum in milliseconds (or 0 if infinity). */ u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) { - u32 exec_quantum; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - exec_quantum = pf_get_exec_quantum(gt, vfid); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return pf_get_exec_quantum(gt, vfid); +} - return exec_quantum; +/** + * xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked() - Configure EQ for PF and VFs. + * @gt: the &xe_gt to configure + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) + * + * This function can only be called on PF with the master mutex hold. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n <= totalvfs; n++) { + err = pf_provision_exec_quantum(gt, VFID(n), exec_quantum); + if (err) + break; + } + + return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, exec_quantum, + pf_get_exec_quantum, "execution quantum", + exec_quantum_unit, n, err); } static const char *preempt_timeout_unit(u32 preempt_timeout) @@ -1795,7 +1858,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, return 0; } -static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); @@ -1803,47 +1866,106 @@ static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF. + * xe_gt_sriov_pf_config_set_preempt_timeout_locked() - Configure PF/VF preemption timeout. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) * - * This function can only be called on PF. + * This function can only be called on PF with the master mutex hold. + * It will log the provisioned value or an error in case of the failure. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, - u32 preempt_timeout) +int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) { int err; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); return pf_config_set_u32_done(gt, vfid, preempt_timeout, - xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid), + pf_get_preempt_timeout(gt, vfid), "preemption timeout", preempt_timeout_unit, err); } /** - * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout. + * xe_gt_sriov_pf_config_set_preempt_timeout() - Configure PF/VF preemption timeout. * @gt: the &xe_gt - * @vfid: the VF identifier + * @vfid: the PF or VF identifier + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + + return xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, preempt_timeout); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout_locked() - Get PF/VF preemption timeout. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier + * + * This function can only be called on PF with the master mutex hold. + * + * Return: preemption timeout in microseconds (or 0 if infinity). + */ +u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_get_preempt_timeout(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout() - Get PF/VF preemption timeout. + * @gt: the &xe_gt + * @vfid: the PF or VF identifier * * This function can only be called on PF. * - * Return: VF's (or PF's) preemption timeout in microseconds. + * Return: preemption timeout in microseconds (or 0 if infinity). */ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) { - u32 preempt_timeout; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - preempt_timeout = pf_get_preempt_timeout(gt, vfid); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return pf_get_preempt_timeout(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked() - Configure PT for PF and VFs. + * @gt: the &xe_gt to configure + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF with the master mutex hold. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout) +{ + unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n <= totalvfs; n++) { + err = pf_provision_preempt_timeout(gt, VFID(n), preempt_timeout); + if (err) + break; + } - return preempt_timeout; + return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, preempt_timeout, + pf_get_preempt_timeout, "preemption timeout", + preempt_timeout_unit, n, err); } static const char *sched_priority_unit(u32 priority) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 513e6512a575..14d036790695 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -40,10 +40,20 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, uns u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum); +u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum); +int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum); + u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout); +int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout); + u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index aa962c783cdf..fa7068aac334 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -8,221 +8,168 @@ #include <regs/xe_gt_regs.h> #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_gt_throttle.h" #include "xe_mmio.h" +#include "xe_platform_types.h" #include "xe_pm.h" /** * DOC: Xe GT Throttle * - * Provides sysfs entries and other helpers for frequency throttle reasons in GT + * The GT frequency may be throttled by hardware/firmware for various reasons + * that are provided through attributes under the ``freq0/throttle/`` directory. + * Their availability depend on the platform and some may not be visible if that + * reason is not available. * - * device/gt#/freq0/throttle/status - Overall status - * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1 - * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2 - * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc. - * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal - * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot - * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL - * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT - * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC + * The following attributes are available on Crescent Island platform: + * + * - ``status``: Overall throttle status + * - ``reason_pl1``: package PL1 + * - ``reason_pl2``: package PL2 + * - ``reason_pl4``: package PL4 + * - ``reason_prochot``: prochot + * - ``reason_soc_thermal``: SoC thermal + * - ``reason_mem_thermal``: Memory thermal + * - ``reason_vr_thermal``: VR thermal + * - ``reason_iccmax``: ICCMAX + * - ``reason_ratl``: RATL thermal algorithm + * - ``reason_soc_avg_thermal``: SoC average temp + * - ``reason_fastvmode``: VR is hitting FastVMode + * - ``reason_psys_pl1``: PSYS PL1 + * - ``reason_psys_pl2``: PSYS PL2 + * - ``reason_p0_freq``: P0 frequency + * - ``reason_psys_crit``: PSYS critical + * + * Other platforms support the following reasons: + * + * - ``status``: Overall status + * - ``reason_pl1``: package PL1 + * - ``reason_pl2``: package PL2 + * - ``reason_pl4``: package PL4, Iccmax etc. + * - ``reason_thermal``: thermal + * - ``reason_prochot``: prochot + * - ``reason_ratl``: RATL hermal algorithm + * - ``reason_vr_thermalert``: VR THERMALERT + * - ``reason_vr_tdc``: VR TDC */ -static struct xe_gt * -dev_to_gt(struct device *dev) -{ - return kobj_to_gt(dev->kobj.parent); -} - -u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) -{ - u32 reg; - - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(>->mmio, MTL_MEDIA_PERF_LIMIT_REASONS); - else - reg = xe_mmio_read32(>->mmio, GT0_PERF_LIMIT_REASONS); - xe_pm_runtime_put(gt_to_xe(gt)); - - return reg; -} - -static u32 read_status(struct xe_gt *gt) -{ - u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; - - xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status); - return status; -} - -static u32 read_reason_pl1(struct xe_gt *gt) -{ - u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK; - - return pl1; -} +struct throttle_attribute { + struct kobj_attribute attr; + u32 mask; +}; -static u32 read_reason_pl2(struct xe_gt *gt) +static struct xe_gt *dev_to_gt(struct device *dev) { - u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK; - - return pl2; + return kobj_to_gt(dev->kobj.parent); } -static u32 read_reason_pl4(struct xe_gt *gt) +static struct xe_gt *throttle_to_gt(struct kobject *kobj) { - u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK; - - return pl4; + return dev_to_gt(kobj_to_dev(kobj)); } -static u32 read_reason_thermal(struct xe_gt *gt) +static struct throttle_attribute *kobj_attribute_to_throttle(struct kobj_attribute *attr) { - u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK; - - return thermal; + return container_of(attr, struct throttle_attribute, attr); } -static u32 read_reason_prochot(struct xe_gt *gt) +u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt) { - u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK; - - return prochot; -} + struct xe_device *xe = gt_to_xe(gt); + struct xe_reg reg; + u32 val, mask; -static u32 read_reason_ratl(struct xe_gt *gt) -{ - u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK; + if (xe_gt_is_media_type(gt)) + reg = MTL_MEDIA_PERF_LIMIT_REASONS; + else + reg = GT0_PERF_LIMIT_REASONS; - return ratl; -} + if (xe->info.platform == XE_CRESCENTISLAND) + mask = CRI_PERF_LIMIT_REASONS_MASK; + else + mask = GT0_PERF_LIMIT_REASONS_MASK; -static u32 read_reason_vr_thermalert(struct xe_gt *gt) -{ - u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK; + xe_pm_runtime_get(xe); + val = xe_mmio_read32(>->mmio, reg) & mask; + xe_pm_runtime_put(xe); - return thermalert; + return val; } -static u32 read_reason_vr_tdc(struct xe_gt *gt) +static bool is_throttled_by(struct xe_gt *gt, u32 mask) { - u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK; - - return tdc; + return xe_gt_throttle_get_limit_reasons(gt) & mask; } -static ssize_t status_show(struct kobject *kobj, +static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, char *buff) { - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool status = !!read_status(gt); - - return sysfs_emit(buff, "%u\n", status); -} -static struct kobj_attribute attr_status = __ATTR_RO(status); - -static ssize_t reason_pl1_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl1 = !!read_reason_pl1(gt); - - return sysfs_emit(buff, "%u\n", pl1); -} -static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1); - -static ssize_t reason_pl2_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl2 = !!read_reason_pl2(gt); - - return sysfs_emit(buff, "%u\n", pl2); -} -static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2); - -static ssize_t reason_pl4_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool pl4 = !!read_reason_pl4(gt); - - return sysfs_emit(buff, "%u\n", pl4); -} -static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4); - -static ssize_t reason_thermal_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool thermal = !!read_reason_thermal(gt); + struct throttle_attribute *ta = kobj_attribute_to_throttle(attr); + struct xe_gt *gt = throttle_to_gt(kobj); - return sysfs_emit(buff, "%u\n", thermal); + return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask)); } -static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal); -static ssize_t reason_prochot_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool prochot = !!read_reason_prochot(gt); +#define THROTTLE_ATTR_RO(name, _mask) \ + struct throttle_attribute attr_##name = { \ + .attr = __ATTR(name, 0444, reason_show, NULL), \ + .mask = _mask, \ + } - return sysfs_emit(buff, "%u\n", prochot); -} -static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot); - -static ssize_t reason_ratl_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool ratl = !!read_reason_ratl(gt); - - return sysfs_emit(buff, "%u\n", ratl); -} -static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl); - -static ssize_t reason_vr_thermalert_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool thermalert = !!read_reason_vr_thermalert(gt); - - return sysfs_emit(buff, "%u\n", thermalert); -} -static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert); - -static ssize_t reason_vr_tdc_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buff) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_gt *gt = dev_to_gt(dev); - bool tdc = !!read_reason_vr_tdc(gt); - - return sysfs_emit(buff, "%u\n", tdc); -} -static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc); +static THROTTLE_ATTR_RO(status, U32_MAX); +static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK); +static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK); +static THROTTLE_ATTR_RO(reason_pl4, POWER_LIMIT_4_MASK); +static THROTTLE_ATTR_RO(reason_thermal, THERMAL_LIMIT_MASK); +static THROTTLE_ATTR_RO(reason_prochot, PROCHOT_MASK); +static THROTTLE_ATTR_RO(reason_ratl, RATL_MASK); +static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK); +static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK); static struct attribute *throttle_attrs[] = { - &attr_status.attr, - &attr_reason_pl1.attr, - &attr_reason_pl2.attr, - &attr_reason_pl4.attr, - &attr_reason_thermal.attr, - &attr_reason_prochot.attr, - &attr_reason_ratl.attr, - &attr_reason_vr_thermalert.attr, - &attr_reason_vr_tdc.attr, + &attr_status.attr.attr, + &attr_reason_pl1.attr.attr, + &attr_reason_pl2.attr.attr, + &attr_reason_pl4.attr.attr, + &attr_reason_thermal.attr.attr, + &attr_reason_prochot.attr.attr, + &attr_reason_ratl.attr.attr, + &attr_reason_vr_thermalert.attr.attr, + &attr_reason_vr_tdc.attr.attr, + NULL +}; + +static THROTTLE_ATTR_RO(reason_vr_thermal, VR_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_soc_thermal, SOC_THERMAL_LIMIT_MASK); +static THROTTLE_ATTR_RO(reason_mem_thermal, MEM_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_iccmax, ICCMAX_MASK); +static THROTTLE_ATTR_RO(reason_soc_avg_thermal, SOC_AVG_THERMAL_MASK); +static THROTTLE_ATTR_RO(reason_fastvmode, FASTVMODE_MASK); +static THROTTLE_ATTR_RO(reason_psys_pl1, PSYS_PL1_MASK); +static THROTTLE_ATTR_RO(reason_psys_pl2, PSYS_PL2_MASK); +static THROTTLE_ATTR_RO(reason_p0_freq, P0_FREQ_MASK); +static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK); + +static struct attribute *cri_throttle_attrs[] = { + /* Common */ + &attr_status.attr.attr, + &attr_reason_pl1.attr.attr, + &attr_reason_pl2.attr.attr, + &attr_reason_pl4.attr.attr, + &attr_reason_prochot.attr.attr, + &attr_reason_ratl.attr.attr, + /* CRI */ + &attr_reason_vr_thermal.attr.attr, + &attr_reason_soc_thermal.attr.attr, + &attr_reason_mem_thermal.attr.attr, + &attr_reason_iccmax.attr.attr, + &attr_reason_soc_avg_thermal.attr.attr, + &attr_reason_fastvmode.attr.attr, + &attr_reason_psys_pl1.attr.attr, + &attr_reason_psys_pl2.attr.attr, + &attr_reason_p0_freq.attr.attr, + &attr_reason_psys_crit.attr.attr, NULL }; @@ -231,19 +178,37 @@ static const struct attribute_group throttle_group_attrs = { .attrs = throttle_attrs, }; +static const struct attribute_group cri_throttle_group_attrs = { + .name = "throttle", + .attrs = cri_throttle_attrs, +}; + +static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe) +{ + switch (xe->info.platform) { + case XE_CRESCENTISLAND: + return &cri_throttle_group_attrs; + default: + return &throttle_group_attrs; + } +} + static void gt_throttle_sysfs_fini(void *arg) { struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group = get_platform_throttle_group(xe); - sysfs_remove_group(gt->freq, &throttle_group_attrs); + sysfs_remove_group(gt->freq, group); } int xe_gt_throttle_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + const struct attribute_group *group = get_platform_throttle_group(xe); int err; - err = sysfs_create_group(gt->freq, &throttle_group_attrs); + err = sysfs_create_group(gt->freq, group); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 0b525643a048..0a728180b6fe 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -220,71 +220,6 @@ struct xe_gt { * operations (e.g. migrations, fixing page tables) */ u16 reserved_bcs_instance; - /** @usm.pf_wq: page fault work queue, unbound, high priority */ - struct workqueue_struct *pf_wq; - /** @usm.acc_wq: access counter work queue, unbound, high priority */ - struct workqueue_struct *acc_wq; - /** - * @usm.pf_queue: Page fault queue used to sync faults so faults can - * be processed not under the GuC CT lock. The queue is sized so - * it can sync all possible faults (1 per physical engine). - * Multiple queues exist for page faults from different VMs to be - * processed in parallel. - */ - struct pf_queue { - /** @usm.pf_queue.gt: back pointer to GT */ - struct xe_gt *gt; - /** @usm.pf_queue.data: data in the page fault queue */ - u32 *data; - /** - * @usm.pf_queue.num_dw: number of DWORDS in the page - * fault queue. Dynamically calculated based on the number - * of compute resources available. - */ - u32 num_dw; - /** - * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, - * moved by worker which processes faults (consumer). - */ - u16 tail; - /** - * @usm.pf_queue.head: head pointer in DWs for page fault queue, - * moved by G2H handler (producer). - */ - u16 head; - /** @usm.pf_queue.lock: protects page fault queue */ - spinlock_t lock; - /** @usm.pf_queue.worker: to process page faults */ - struct work_struct worker; -#define NUM_PF_QUEUE 4 - } pf_queue[NUM_PF_QUEUE]; - /** - * @usm.acc_queue: Same as page fault queue, cannot process access - * counters under CT lock. - */ - struct acc_queue { - /** @usm.acc_queue.gt: back pointer to GT */ - struct xe_gt *gt; -#define ACC_QUEUE_NUM_DW 128 - /** @usm.acc_queue.data: data in the page fault queue */ - u32 data[ACC_QUEUE_NUM_DW]; - /** - * @usm.acc_queue.tail: tail pointer in DWs for access counter queue, - * moved by worker which processes counters - * (consumer). - */ - u16 tail; - /** - * @usm.acc_queue.head: head pointer in DWs for access counter queue, - * moved by G2H handler (producer). - */ - u16 head; - /** @usm.acc_queue.lock: protects page fault queue */ - spinlock_t lock; - /** @usm.acc_queue.worker: to process access counters */ - struct work_struct worker; -#define NUM_ACC_QUEUE 4 - } acc_queue[NUM_ACC_QUEUE]; } usm; /** @ordered_wq: used to serialize GT resets and TDRs */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index e68953ef3a00..2697d711adb2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -21,12 +21,12 @@ #include "xe_devcoredump.h" #include "xe_device.h" #include "xe_gt.h" -#include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" #include "xe_guc.h" #include "xe_guc_log.h" +#include "xe_guc_pagefault.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" #include "xe_guc_tlb_inval.h" @@ -199,6 +199,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + cancel_work_sync(&ct->dead.worker); +#endif ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); @@ -1545,10 +1548,6 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_TLB_INVALIDATION_DONE: ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; - case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: - ret = xe_guc_access_counter_notify_handler(guc, payload, - adj_len); - break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c new file mode 100644 index 000000000000..719a18187a31 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "abi/guc_actions_abi.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_pagefault.h" +#include "xe_pagefault.h" + +static void guc_ack_fault(struct xe_pagefault *pf, int err) +{ + u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]); + u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]); + u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]); + u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) | + (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) << + PFD_PDATA_HI_SHIFT); + u32 action[] = { + XE_GUC_ACTION_PAGE_FAULT_RES_DESC, + + FIELD_PREP(PFR_VALID, 1) | + FIELD_PREP(PFR_SUCCESS, !!err) | + FIELD_PREP(PFR_REPLY, PFR_ACCESS) | + FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | + FIELD_PREP(PFR_ASID, pf->consumer.asid), + + FIELD_PREP(PFR_VFID, vfid) | + FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) | + FIELD_PREP(PFR_ENG_CLASS, engine_class) | + FIELD_PREP(PFR_PDATA, pdata), + }; + struct xe_guc *guc = pf->producer.private; + + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); +} + +static const struct xe_pagefault_ops guc_pagefault_ops = { + .ack_fault = guc_ack_fault, +}; + +/** + * xe_guc_pagefault_handler() - G2H page fault handler + * @guc: GuC object + * @msg: G2H message + * @len: Length of G2H message + * + * Parse GuC to host (G2H) message into a struct xe_pagefault and forward onto + * the Xe page fault layer. + * + * Return: 0 on success, errno on failure + */ +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_pagefault pf; + int i; + +#define GUC_PF_MSG_LEN_DW \ + (sizeof(struct xe_guc_pagefault_desc) / sizeof(u32)) + + BUILD_BUG_ON(GUC_PF_MSG_LEN_DW > XE_PAGEFAULT_PRODUCER_MSG_LEN_DW); + + if (len != GUC_PF_MSG_LEN_DW) + return -EPROTO; + + pf.gt = guc_to_gt(guc); + + /* + * XXX: These values happen to match the enum in xe_pagefault_types.h. + * If that changes, we’ll need to remap them here. + */ + pf.consumer.page_addr = ((u64)FIELD_GET(PFD_VIRTUAL_ADDR_HI, msg[3]) + << PFD_VIRTUAL_ADDR_HI_SHIFT) | + (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << + PFD_VIRTUAL_ADDR_LO_SHIFT); + pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); + pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]); + pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]); + if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) + pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK; + else + pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]); + pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]); + pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]); + + pf.producer.private = guc; + pf.producer.ops = &guc_pagefault_ops; + for (i = 0; i < GUC_PF_MSG_LEN_DW; ++i) + pf.producer.msg[i] = msg[i]; + +#undef GUC_PF_MSG_LEN_DW + + return xe_pagefault_handler(guc_to_xe(guc), &pf); +} diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.h b/drivers/gpu/drm/xe/xe_guc_pagefault.h new file mode 100644 index 000000000000..3bd599e7207c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_PAGEFAULT_H_ +#define _XE_GUC_PAGEFAULT_H_ + +#include <linux/types.h> + +struct xe_guc; + +int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 56a5804726e9..5003e3c4dd17 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2333,6 +2333,20 @@ void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q) xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +/** + * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue + * @q: Migrate queue + */ +void xe_migrate_job_lock_assert(struct xe_exec_queue *q) +{ + struct xe_migrate *m = gt_to_tile(q->gt)->migrate; + + xe_gt_assert(q->gt, q == m->q); + lockdep_assert_held(&m->job_mutex); +} +#endif + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 4fad324b6253..9b5791617f5e 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -152,6 +152,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +void xe_migrate_job_lock_assert(struct xe_exec_queue *q); +#else +static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q) +{ +} +#endif + void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q); void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index f901ba52b403..7a13a7bd99a6 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,6 +10,7 @@ #include <drm/drm_drv.h> #include <drm/drm_managed.h> +#include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> #include <generated/xe_wa_oob.h> @@ -1390,7 +1391,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro return 0; } -static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +static int xe_oa_parse_syncs(struct xe_oa *oa, + struct xe_oa_stream *stream, + struct xe_oa_open_param *param) { int ret, num_syncs, num_ufence = 0; @@ -1410,7 +1413,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], - ¶m->syncs_user[num_syncs], 0); + ¶m->syncs_user[num_syncs], + stream->ufence_syncobj, + ++stream->ufence_timeline_value, 0); if (ret) goto err_syncs; @@ -1540,7 +1545,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; param.xef = stream->xef; - err = xe_oa_parse_syncs(stream->oa, ¶m); + err = xe_oa_parse_syncs(stream->oa, stream, ¶m); if (err) goto err_config_put; @@ -1636,6 +1641,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream) if (stream->exec_q) xe_exec_queue_put(stream->exec_q); + drm_syncobj_put(stream->ufence_syncobj); kfree(stream); } @@ -1827,6 +1833,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param) { struct xe_oa_stream *stream; + struct drm_syncobj *ufence_syncobj; int stream_fd; int ret; @@ -1837,17 +1844,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, goto exit; } + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) + goto exit; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) { ret = -ENOMEM; - goto exit; + goto err_syncobj; } - + stream->ufence_syncobj = ufence_syncobj; stream->oa = oa; - ret = xe_oa_stream_init(stream, param); + + ret = xe_oa_parse_syncs(oa, stream, param); if (ret) goto err_free; + ret = xe_oa_stream_init(stream, param); + if (ret) { + while (param->num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]); + kfree(param->syncs); + goto err_free; + } + if (!param->disabled) { ret = xe_oa_enable_locked(stream); if (ret) @@ -1871,6 +1892,8 @@ err_destroy: xe_oa_stream_destroy(stream); err_free: kfree(stream); +err_syncobj: + drm_syncobj_put(ufence_syncobj); exit: return ret; } @@ -2084,22 +2107,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - ret = xe_oa_parse_syncs(oa, ¶m); - if (ret) - goto err_exec_q; - mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) - goto err_sync_cleanup; + goto err_exec_q; return ret; -err_sync_cleanup: - while (param.num_syncs--) - xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); - kfree(param.syncs); err_exec_q: if (param.exec_q) xe_exec_queue_put(param.exec_q); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 2628f78c4e8d..daf701b5d48b 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,6 +15,8 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" +struct drm_syncobj; + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { @@ -248,6 +250,12 @@ struct xe_oa_stream { /** @xef: xe_file with which the stream was opened */ struct xe_file *xef; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @last_fence: fence to use in stream destroy when needed */ struct dma_fence *last_fence; diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c new file mode 100644 index 000000000000..fe3e40145012 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/circ_buf.h> + +#include <drm/drm_exec.h> +#include <drm/drm_managed.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_gt_printk.h" +#include "xe_gt_types.h" +#include "xe_gt_stats.h" +#include "xe_hw_engine.h" +#include "xe_pagefault.h" +#include "xe_pagefault_types.h" +#include "xe_svm.h" +#include "xe_trace_bo.h" +#include "xe_vm.h" + +/** + * DOC: Xe page faults + * + * Xe page faults are handled in two layers. The producer layer interacts with + * hardware or firmware to receive and parse faults into struct xe_pagefault, + * then forwards them to the consumer. The consumer layer services the faults + * (e.g., memory migration, page table updates) and acknowledges the result back + * to the producer, which then forwards the results to the hardware or firmware. + * The consumer uses a page fault queue sized to absorb all potential faults and + * a multi-threaded worker to process them. Multiple producers are supported, + * with a single shared consumer. + * + * xe_pagefault.c implements the consumer layer. + */ + +static int xe_pagefault_entry_size(void) +{ + /* + * Power of two alignment is not a hardware requirement, rather a + * software restriction which makes the math for page fault queue + * management simplier. + */ + return roundup_pow_of_two(sizeof(struct xe_pagefault)); +} + +static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma, + struct xe_vram_region *vram, bool need_vram_move) +{ + struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); + int err; + + err = xe_vm_lock_vma(exec, vma); + if (err) + return err; + + if (!bo) + return 0; + + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : + xe_bo_validate(bo, vm, true, exec); +} + +static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma, + bool atomic) +{ + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct dma_fence *fence; + int err, needs_vram; + + lockdep_assert_held_write(&vm->lock); + + needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) + return needs_vram < 0 ? needs_vram : -EACCES; + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + xe_vma_size(vma) / SZ_1K); + + trace_xe_vma_pagefault(vma); + + /* Check if VMA is valid, opportunistic check only */ + if (xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, + vma->tile_invalidated) && !atomic) + return 0; + +retry_userptr: + if (xe_vma_is_userptr(vma) && + xe_vma_userptr_check_repin(to_userptr_vma(vma))) { + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + + err = xe_vma_userptr_pin_pages(uvma); + if (err) + return err; + } + + /* Lock VM and BOs dma-resv */ + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); + drm_exec_init(&exec, 0, 0); + drm_exec_until_all_locked(&exec) { + err = xe_pagefault_begin(&exec, vma, tile->mem.vram, + needs_vram == 1); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + if (err) + goto unlock_dma_resv; + + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + xe_vm_set_validation_exec(vm, &exec); + fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + xe_validation_retry_on_oom(&ctx, &err); + goto unlock_dma_resv; + } + } + + dma_fence_wait(fence, false); + dma_fence_put(fence); + +unlock_dma_resv: + xe_validation_ctx_fini(&ctx); + if (err == -EAGAIN) + goto retry_userptr; + + return err; +} + +static bool +xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type) +{ + return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; +} + +static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + +static int xe_pagefault_service(struct xe_pagefault *pf) +{ + struct xe_gt *gt = pf->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_vm *vm; + struct xe_vma *vma = NULL; + int err; + bool atomic; + + /* Producer flagged this fault to be nacked */ + if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK) + return -EFAULT; + + vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + /* + * TODO: Change to read lock? Using write lock for simplicity. + */ + down_write(&vm->lock); + + if (xe_vm_is_closed(vm)) { + err = -ENOENT; + goto unlock_vm; + } + + vma = xe_vm_find_vma_by_addr(vm, pf->consumer.page_addr); + if (!vma) { + err = -EINVAL; + goto unlock_vm; + } + + atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type); + + if (xe_vma_is_cpu_addr_mirror(vma)) + err = xe_svm_handle_pagefault(vm, vma, gt, + pf->consumer.page_addr, atomic); + else + err = xe_pagefault_handle_vma(gt, vma, atomic); + +unlock_vm: + if (!err) + vm->usm.last_fault_vma = vma; + up_write(&vm->lock); + xe_vm_put(vm); + + return err; +} + +static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue, + struct xe_pagefault *pf) +{ + bool found_fault = false; + + spin_lock_irq(&pf_queue->lock); + if (pf_queue->tail != pf_queue->head) { + memcpy(pf, pf_queue->data + pf_queue->tail, sizeof(*pf)); + pf_queue->tail = (pf_queue->tail + xe_pagefault_entry_size()) % + pf_queue->size; + found_fault = true; + } + spin_unlock_irq(&pf_queue->lock); + + return found_fault; +} + +static void xe_pagefault_print(struct xe_pagefault *pf) +{ + xe_gt_dbg(pf->gt, "\n\tASID: %d\n" + "\tFaulted Address: 0x%08x%08x\n" + "\tFaultType: %d\n" + "\tAccessType: %d\n" + "\tFaultLevel: %d\n" + "\tEngineClass: %d %s\n" + "\tEngineInstance: %d\n", + pf->consumer.asid, + upper_32_bits(pf->consumer.page_addr), + lower_32_bits(pf->consumer.page_addr), + pf->consumer.fault_type, + pf->consumer.access_type, + pf->consumer.fault_level, + pf->consumer.engine_class, + xe_hw_engine_class_to_str(pf->consumer.engine_class), + pf->consumer.engine_instance); +} + +static void xe_pagefault_queue_work(struct work_struct *w) +{ + struct xe_pagefault_queue *pf_queue = + container_of(w, typeof(*pf_queue), worker); + struct xe_pagefault pf; + unsigned long threshold; + +#define USM_QUEUE_MAX_RUNTIME_MS 20 + threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); + + while (xe_pagefault_queue_pop(pf_queue, &pf)) { + int err; + + if (!pf.gt) /* Fault squashed during reset */ + continue; + + err = xe_pagefault_service(&pf); + if (err) { + xe_pagefault_print(&pf); + xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } + + pf.producer.ops->ack_fault(&pf, err); + + if (time_after(jiffies, threshold)) { + queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w); + break; + } + } +#undef USM_QUEUE_MAX_RUNTIME_MS +} + +static int xe_pagefault_queue_init(struct xe_device *xe, + struct xe_pagefault_queue *pf_queue) +{ + struct xe_gt *gt; + int total_num_eus = 0; + u8 id; + + for_each_gt(gt, xe, id) { + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, + gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + total_num_eus += num_eus; + } + + xe_assert(xe, total_num_eus); + + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 + pf_queue->size = (total_num_eus + XE_NUM_HW_ENGINES) * + xe_pagefault_entry_size() * PF_MULTIPLIER; + pf_queue->size = roundup_pow_of_two(pf_queue->size); +#undef PF_MULTIPLIER + + drm_dbg(&xe->drm, "xe_pagefault_entry_size=%d, total_num_eus=%d, pf_queue->size=%u", + xe_pagefault_entry_size(), total_num_eus, pf_queue->size); + + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work); + + pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + + return 0; +} + +static void xe_pagefault_fini(void *arg) +{ + struct xe_device *xe = arg; + + destroy_workqueue(xe->usm.pf_wq); +} + +/** + * xe_pagefault_init() - Page fault init + * @xe: xe device instance + * + * Initialize Xe page fault state. Must be done after reading fuses. + * + * Return: 0 on Success, errno on failure + */ +int xe_pagefault_init(struct xe_device *xe) +{ + int err, i; + + if (!xe->info.has_usm) + return 0; + + xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue", + WQ_UNBOUND | WQ_HIGHPRI, + XE_PAGEFAULT_QUEUE_COUNT); + if (!xe->usm.pf_wq) + return -ENOMEM; + + for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) { + err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i); + if (err) + goto err_out; + } + + return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe); + +err_out: + destroy_workqueue(xe->usm.pf_wq); + return err; +} + +static void xe_pagefault_queue_reset(struct xe_device *xe, struct xe_gt *gt, + struct xe_pagefault_queue *pf_queue) +{ + u32 i; + + /* Driver load failure guard / USM not enabled guard */ + if (!pf_queue->data) + return; + + /* Squash all pending faults on the GT */ + + spin_lock_irq(&pf_queue->lock); + for (i = pf_queue->tail; i != pf_queue->head; + i = (i + xe_pagefault_entry_size()) % pf_queue->size) { + struct xe_pagefault *pf = pf_queue->data + i; + + if (pf->gt == gt) + pf->gt = NULL; + } + spin_unlock_irq(&pf_queue->lock); +} + +/** + * xe_pagefault_reset() - Page fault reset for a GT + * @xe: xe device instance + * @gt: GT being reset + * + * Reset the Xe page fault state for a GT; that is, squash any pending faults on + * the GT. + */ +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt) +{ + int i; + + for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) + xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue + i); +} + +static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue) +{ + lockdep_assert_held(&pf_queue->lock); + + return CIRC_SPACE(pf_queue->head, pf_queue->tail, pf_queue->size) <= + xe_pagefault_entry_size(); +} + +/** + * xe_pagefault_handler() - Page fault handler + * @xe: xe device instance + * @pf: Page fault + * + * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to + * service it. Safe to be called from IRQ or process context. Reclaim safe. + * + * Return: 0 on success, errno on failure + */ +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf) +{ + struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue + + (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + unsigned long flags; + bool full; + + spin_lock_irqsave(&pf_queue->lock, flags); + full = xe_pagefault_queue_full(pf_queue); + if (!full) { + memcpy(pf_queue->data + pf_queue->head, pf, sizeof(*pf)); + pf_queue->head = (pf_queue->head + xe_pagefault_entry_size()) % + pf_queue->size; + queue_work(xe->usm.pf_wq, &pf_queue->worker); + } else { + drm_warn(&xe->drm, + "PageFault Queue (%d) full, shouldn't be possible\n", + pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT); + } + spin_unlock_irqrestore(&pf_queue->lock, flags); + + return full ? -ENOSPC : 0; +} diff --git a/drivers/gpu/drm/xe/xe_pagefault.h b/drivers/gpu/drm/xe/xe_pagefault.h new file mode 100644 index 000000000000..bd0cdf9ed37f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_H_ +#define _XE_PAGEFAULT_H_ + +struct xe_device; +struct xe_gt; +struct xe_pagefault; + +int xe_pagefault_init(struct xe_device *xe); + +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt); + +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h new file mode 100644 index 000000000000..d3b516407d60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PAGEFAULT_TYPES_H_ +#define _XE_PAGEFAULT_TYPES_H_ + +#include <linux/workqueue.h> + +struct xe_gt; +struct xe_pagefault; + +/** enum xe_pagefault_access_type - Xe page fault access type */ +enum xe_pagefault_access_type { + /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */ + XE_PAGEFAULT_ACCESS_TYPE_READ = 0, + /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */ + XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1, + /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */ + XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2, +}; + +/** enum xe_pagefault_type - Xe page fault type */ +enum xe_pagefault_type { + /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */ + XE_PAGEFAULT_TYPE_NOT_PRESENT = 0, + /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */ + XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1, + /** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */ + XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2, +}; + +/** struct xe_pagefault_ops - Xe pagefault ops (producer) */ +struct xe_pagefault_ops { + /** + * @ack_fault: Ack fault + * @pf: Page fault + * @err: Error state of fault + * + * Page fault producer receives acknowledgment from the consumer and + * sends the result to the HW/FW interface. + */ + void (*ack_fault)(struct xe_pagefault *pf, int err); +}; + +/** + * struct xe_pagefault - Xe page fault + * + * Generic page fault structure for communication between producer and consumer. + * Carefully sized to be 64 bytes. Upon a device page fault, the producer + * populates this structure, and the consumer copies it into the page-fault + * queue for deferred handling. + */ +struct xe_pagefault { + /** + * @gt: GT of fault + */ + struct xe_gt *gt; + /** + * @consumer: State for the software handling the fault. Populated by + * the producer and may be modified by the consumer to communicate + * information back to the producer upon fault acknowledgment. + */ + struct { + /** @consumer.page_addr: address of page fault */ + u64 page_addr; + /** @consumer.asid: address space ID */ + u32 asid; + /** + * @consumer.access_type: access type, u8 rather than enum to + * keep size compact + */ + u8 access_type; + /** + * @consumer.fault_type: fault type, u8 rather than enum to + * keep size compact + */ + u8 fault_type; +#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */ + /** @consumer.fault_level: fault level */ + u8 fault_level; + /** @consumer.engine_class: engine class */ + u8 engine_class; + /** @consumer.engine_instance: engine instance */ + u8 engine_instance; + /** consumer.reserved: reserved bits for future expansion */ + u8 reserved[7]; + } consumer; + /** + * @producer: State for the producer (i.e., HW/FW interface). Populated + * by the producer and should not be modified—or even inspected—by the + * consumer, except for calling operations. + */ + struct { + /** @producer.private: private pointer */ + void *private; + /** @producer.ops: operations */ + const struct xe_pagefault_ops *ops; +#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4 + /** + * @producer.msg: page fault message, used by producer in fault + * acknowledgment to formulate response to HW/FW interface. + * Included in the page-fault message because the producer + * typically receives the fault in a context where memory cannot + * be allocated (e.g., atomic context or the reclaim path). + */ + u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW]; + } producer; +}; + +/** + * struct xe_pagefault_queue: Xe pagefault queue (consumer) + * + * Used to capture all device page faults for deferred processing. Size this + * queue to absorb the device’s worst-case number of outstanding faults. + */ +struct xe_pagefault_queue { + /** + * @data: Data in queue containing struct xe_pagefault, protected by + * @lock + */ + void *data; + /** @size: Size of queue in bytes */ + u32 size; + /** @head: Head pointer in bytes, moved by producer, protected by @lock */ + u32 head; + /** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */ + u32 tail; + /** @lock: protects page fault queue */ + spinlock_t lock; + /** @worker: to process page faults */ + struct work_struct worker; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 7649b554942a..68171cceea18 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -115,7 +115,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ .coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \ - XE_COH_AT_LEAST_1WAY : XE_COH_NONE \ + XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \ + .valid = 1 \ } static const struct xe_pat_table_entry xe2_pat_table[] = { @@ -368,7 +369,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) if (!fw_ref) return -ETIMEDOUT; - drm_printf(p, "PAT table:\n"); + drm_printf(p, "PAT table: (* = reserved entry)\n"); for (i = 0; i < xe->pat.n_entries; i++) { if (xe_gt_is_media_type(gt)) @@ -376,14 +377,14 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) else pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i, + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i, !!(pat & XE2_NO_PROMOTE), !!(pat & XE2_COMP_EN), REG_FIELD_GET(XE2_L3_CLOS, pat), REG_FIELD_GET(XE2_L3_POLICY, pat), REG_FIELD_GET(XE2_L4_POLICY, pat), REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + pat, xe->pat.table[i].valid ? "" : " *"); } /* @@ -426,18 +427,18 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) if (!fw_ref) return -ETIMEDOUT; - drm_printf(p, "PAT table:\n"); + drm_printf(p, "PAT table: (* = reserved entry)\n"); for (i = 0; i < xe->pat.n_entries; i++) { pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)\n", i, + drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i, !!(pat & XE2_NO_PROMOTE), REG_FIELD_GET(XE2_L3_CLOS, pat), REG_FIELD_GET(XE2_L3_POLICY, pat), REG_FIELD_GET(XE2_L4_POLICY, pat), REG_FIELD_GET(XE2_COH_MODE, pat), - pat); + pat, xe->pat.table[i].valid ? "" : " *"); } /* diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index 268c9a899f56..05dae03a5f54 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -29,6 +29,11 @@ struct xe_pat_table_entry { #define XE_COH_NONE 1 #define XE_COH_AT_LEAST_1WAY 2 u16 coh_mode; + + /** + * @valid: Set to 1 if the entry is valid, 0 if it's reserved. + */ + u16 valid; }; /** diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 735f51effc7a..d0fcde66a774 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -20,6 +20,7 @@ #include "xe_sriov_pf_control.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) @@ -30,18 +31,6 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) xe_sriov_pf_control_reset_vf(xe, n); } -static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) -{ - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - - xe_assert(xe, IS_SRIOV_PF(xe)); - - /* caller must use pci_dev_put() */ - return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), - pdev->bus->number, - pci_iov_virtfn_devfn(pdev, vf_id)); -} - static void pf_link_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev); @@ -60,7 +49,7 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs) * enforce correct resume order. */ for (n = 1; n <= num_vfs; n++) { - pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + pdev_vf = xe_pci_sriov_get_vf_pdev(pdev_pf, n); /* unlikely, something weird is happening, abort */ if (!pdev_vf) { @@ -150,6 +139,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); + xe_sriov_pf_sysfs_link_vfs(xe, num_vfs); + pf_engine_activity_stats(xe, num_vfs, true); return num_vfs; @@ -177,6 +168,8 @@ static int pf_disable_vfs(struct xe_device *xe) pf_engine_activity_stats(xe, num_vfs, false); + xe_sriov_pf_sysfs_unlink_vfs(xe, num_vfs); + pci_disable_sriov(pdev); pf_reset_vfs(xe, num_vfs); @@ -228,3 +221,25 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) return ret; } + +/** + * xe_pci_sriov_get_vf_pdev() - Lookup the VF's PCI device using the VF identifier. + * @pdev: the PF's &pci_dev + * @vfid: VF identifier (1-based) + * + * The caller must decrement the reference count by calling pci_dev_put(). + * + * Return: the VF's &pci_dev or NULL if the VF device was not found. + */ +struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + + xe_assert(xe, dev_is_pf(&pdev->dev)); + xe_assert(xe, vfid); + xe_assert(xe, vfid <= pci_sriov_get_totalvfs(pdev)); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vfid - 1)); +} diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h index c76dd0d90495..b9105d71dbb1 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.h +++ b/drivers/gpu/drm/xe/xe_pci_sriov.h @@ -10,6 +10,7 @@ struct pci_dev; #ifdef CONFIG_PCI_IOV int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs); +struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid); #else static inline int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) { diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 7c5bca78c8bf..884127b4d97d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,8 +3,6 @@ * Copyright © 2022 Intel Corporation */ -#include <linux/dma-fence-array.h> - #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1340,13 +1338,6 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - } - for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); @@ -2359,10 +2350,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm *vm = vops->vm; struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; - struct dma_fence *fence, *ifence, *mfence; + struct xe_exec_queue *q = pt_update_ops->q; + struct dma_fence *fence, *ifence = NULL, *mfence = NULL; struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -2390,15 +2380,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) #endif if (pt_update_ops->needs_invalidation) { - struct xe_exec_queue *q = pt_update_ops->q; struct xe_dep_scheduler *dep_scheduler = to_dep_scheduler(q, tile->primary_gt); ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval, - dep_scheduler, + dep_scheduler, vm, pt_update_ops->start, pt_update_ops->last, - vm->usm.asid); + XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); if (IS_ERR(ijob)) { err = PTR_ERR(ijob); goto kill_vm_tile1; @@ -2410,26 +2399,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) mjob = xe_tlb_inval_job_create(q, &tile->media_gt->tlb_inval, - dep_scheduler, + dep_scheduler, vm, pt_update_ops->start, pt_update_ops->last, - vm->usm.asid); + XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT); if (IS_ERR(mjob)) { err = PTR_ERR(mjob); goto free_ijob; } update.mjob = mjob; - - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ijob; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ijob; - } } } @@ -2460,31 +2438,12 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) pt_update_ops->last, fence)) dma_fence_wait(fence, false); - /* tlb invalidation must be done before signaling unbind/rebind */ - if (ijob) { - struct dma_fence *__fence; - + if (ijob) ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence); - __fence = ifence; - - if (mjob) { - fences[0] = ifence; - mfence = xe_tlb_inval_job_push(mjob, tile->migrate, - fence); - fences[1] = mfence; - - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - __fence = &cf->base; - } - - dma_fence_put(fence); - fence = __fence; - } + if (mjob) + mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence); - if (!mjob) { + if (!mjob && !ijob) { dma_resv_add_fence(xe_vm_resv(vm), fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : @@ -2492,6 +2451,14 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) list_for_each_entry(op, &vops->list, link) op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); + } else if (ijob && !mjob) { + dma_resv_add_fence(xe_vm_resv(vm), ifence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL); } else { dma_resv_add_fence(xe_vm_resv(vm), ifence, pt_update_ops->wait_vm_bookkeep ? @@ -2511,16 +2478,23 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) if (pt_update_ops->needs_svm_lock) xe_svm_notifier_unlock(vm); + /* + * The last fence is only used for zero bind queue idling; migrate + * queues are not exposed to user space. + */ + if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE)) + xe_exec_queue_last_fence_set(q, vm, fence); + xe_tlb_inval_job_put(mjob); xe_tlb_inval_job_put(ijob); + dma_fence_put(ifence); + dma_fence_put(mfence); return fence; free_rfence: kfree(rfence); free_ijob: - kfree(cf); - kfree(fences); xe_tlb_inval_job_put(mjob); xe_tlb_inval_job_put(ijob); kill_vm_tile1: diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 690bc327a363..7ca360b2c20d 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -89,6 +89,13 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)) }, + { XE_RTP_NAME("14024997852"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(FF_MODE, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(VFLSKPD, + RING_FORCE_TO_NONPRIV_ACCESS_RW)) + }, }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 6ae4cc6a3802..cb674a322113 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -146,6 +146,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, for (i = 0; i < width; ++i) job->ptrs[i].batch_addr = batch_addr[i]; + atomic_inc(&q->job_cnt); xe_pm_runtime_get_noresume(job_to_xe(job)); trace_xe_sched_job_create(job); return job; @@ -177,6 +178,7 @@ void xe_sched_job_destroy(struct kref *ref) dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + atomic_dec(&q->job_cnt); xe_exec_queue_put(q); xe_pm_runtime_put(xe); } @@ -296,23 +298,6 @@ void xe_sched_job_push(struct xe_sched_job *job) } /** - * xe_sched_job_last_fence_add_dep - Add last fence dependency to job - * @job:job to add the last fence dependency to - * @vm: virtual memory job belongs to - * - * Returns: - * 0 on success, or an error on failing to expand the array. - */ -int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) -{ - struct dma_fence *fence; - - fence = xe_exec_queue_last_fence_get(job->q, vm); - - return drm_sched_job_add_dependency(&job->drm, fence); -} - -/** * xe_sched_job_init_user_fence - Initialize user_fence for the job * @job: job whose user_fence needs an init * @sync: sync to be use to init user_fence diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index b467131b6d5f..1c1cb44216c3 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -58,7 +58,6 @@ bool xe_sched_job_completed(struct xe_sched_job *job); void xe_sched_job_arm(struct xe_sched_job *job); void xe_sched_job_push(struct xe_sched_job *job); -int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); void xe_sched_job_init_user_fence(struct xe_sched_job *job, struct xe_sync_entry *sync); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index bc1ab9ee31d9..b8af93eb5b5f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -16,6 +16,7 @@ #include "xe_sriov_pf.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -128,6 +129,10 @@ int xe_sriov_pf_init_late(struct xe_device *xe) return err; } + err = xe_sriov_pf_sysfs_init(xe); + if (err) + return err; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 663fb0c045e9..01470c42e8a7 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -6,6 +6,7 @@ #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_policy.h" #include "xe_sriov.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" @@ -152,3 +153,286 @@ int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provision xe->sriov.pf.provision.mode = mode; return 0; } + +/** + * xe_sriov_pf_provision_bulk_apply_eq() - Change execution quantum for all VFs and PF. + * @xe: the PF &xe_device + * @eq: execution quantum in [ms] to set + * + * Change execution quantum (EQ) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(gt, eq); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_eq() - Change VF's execution quantum. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @eq: execution quantum in [ms] to set + * + * Change VF's execution quantum (EQ) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, eq); + result = result ?: err; + } + + return result; +} + +static int pf_report_unclean(struct xe_gt *gt, unsigned int vfid, + const char *what, u32 found, u32 expected) +{ + char name[8]; + + xe_sriov_dbg(gt_to_xe(gt), "%s on GT%u has %s=%u (expected %u)\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + gt->info.id, what, found, expected); + return -EUCLEAN; +} + +/** + * xe_sriov_pf_provision_query_vf_eq() - Query VF's execution quantum. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @eq: placeholder for the returned execution quantum in [ms] + * + * Query VF's execution quantum (EQ) provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_exec_quantum_locked(gt, vfid); + if (!count++) + *eq = value; + else if (value != *eq) + return pf_report_unclean(gt, vfid, "EQ", value, *eq); + } + + return !count ? -ENODATA : 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_pt() - Change preemption timeout for all VFs and PF. + * @xe: the PF &xe_device + * @pt: preemption timeout in [us] to set + * + * Change preemption timeout (PT) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(gt, pt); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_pt() - Change VF's preemption timeout. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @pt: preemption timeout in [us] to set + * + * Change VF's preemption timeout (PT) provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, pt); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_pt() - Query VF's preemption timeout. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @pt: placeholder for the returned preemption timeout in [us] + * + * Query VF's preemption timeout (PT) provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_preempt_timeout_locked(gt, vfid); + if (!count++) + *pt = value; + else if (value != *pt) + return pf_report_unclean(gt, vfid, "PT", value, *pt); + } + + return !count ? -ENODATA : 0; +} + +/** + * xe_sriov_pf_provision_bulk_apply_priority() - Change scheduling priority of all VFs and PF. + * @xe: the PF &xe_device + * @prio: scheduling priority to set + * + * Change the scheduling priority provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio) +{ + bool sched_if_idle; + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + /* + * Currently, priority changes that involves VFs are only allowed using + * the 'sched_if_idle' policy KLV, so only LOW and NORMAL are supported. + */ + xe_assert(xe, prio < GUC_SCHED_PRIORITY_HIGH); + sched_if_idle = prio == GUC_SCHED_PRIORITY_NORMAL; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_policy_set_sched_if_idle(gt, sched_if_idle); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_priority() - Change VF's scheduling priority. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @prio: scheduling priority to set + * + * Change VF's scheduling priority provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_sched_priority(gt, vfid, prio); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_priority() - Query VF's scheduling priority. + * @xe: the PF &xe_device + * @vfid: the VF identifier + * @prio: placeholder for the returned scheduling priority + * + * Query VF's scheduling priority provisioning from all tiles/GTs. + * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio) +{ + struct xe_gt *gt; + unsigned int id; + int count = 0; + u32 value; + + for_each_gt(gt, xe, id) { + value = xe_gt_sriov_pf_config_get_sched_priority(gt, vfid); + if (!count++) + *prio = value; + else if (value != *prio) + return pf_report_unclean(gt, vfid, "priority", value, *prio); + } + + return !count ? -ENODATA : 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h index cf3657a32e90..bccf23d51396 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -6,10 +6,24 @@ #ifndef _XE_SRIOV_PF_PROVISION_H_ #define _XE_SRIOV_PF_PROVISION_H_ +#include <linux/types.h> + #include "xe_sriov_pf_provision_types.h" struct xe_device; +int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq); +int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq); +int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq); + +int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt); +int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt); +int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt); + +int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio); +int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio); +int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio); + int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c new file mode 100644 index 000000000000..c0b767ac735c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -0,0 +1,647 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_pci_sriov.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_pf_control.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_provision.h" +#include "xe_sriov_pf_sysfs.h" +#include "xe_sriov_printk.h" + +static int emit_choice(char *buf, int choice, const char * const *array, size_t size) +{ + int pos = 0; + int n; + + for (n = 0; n < size; n++) { + pos += sysfs_emit_at(buf, pos, "%s%s%s%s", + n ? " " : "", + n == choice ? "[" : "", + array[n], + n == choice ? "]" : ""); + } + pos += sysfs_emit_at(buf, pos, "\n"); + + return pos; +} + +/* + * /sys/bus/pci/drivers/xe/BDF/ + * : + * ├── sriov_admin/ + * ├── ... + * ├── .bulk_profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── pf/ + * │ ├── ... + * │ ├── device -> ../../../BDF + * │ └── profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── vf1/ + * │ ├── ... + * │ ├── device -> ../../../BDF.1 + * │ ├── stop + * │ └── profile + * │ ├── exec_quantum_ms + * │ ├── preempt_timeout_us + * │ └── sched_priority + * ├── vf2/ + * : + * └── vfN/ + */ + +struct xe_sriov_kobj { + struct kobject base; + struct xe_device *xe; + unsigned int vfid; +}; +#define to_xe_sriov_kobj(p) container_of_const((p), struct xe_sriov_kobj, base) + +struct xe_sriov_dev_attr { + struct attribute attr; + ssize_t (*show)(struct xe_device *xe, char *buf); + ssize_t (*store)(struct xe_device *xe, const char *buf, size_t count); +}; +#define to_xe_sriov_dev_attr(p) container_of_const((p), struct xe_sriov_dev_attr, attr) + +#define XE_SRIOV_DEV_ATTR(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0644, xe_sriov_dev_attr_##NAME##_show, xe_sriov_dev_attr_##NAME##_store) + +#define XE_SRIOV_DEV_ATTR_RO(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0444, xe_sriov_dev_attr_##NAME##_show, NULL) + +#define XE_SRIOV_DEV_ATTR_WO(NAME) \ +struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \ + __ATTR(NAME, 0200, NULL, xe_sriov_dev_attr_##NAME##_store) + +struct xe_sriov_vf_attr { + struct attribute attr; + ssize_t (*show)(struct xe_device *xe, unsigned int vfid, char *buf); + ssize_t (*store)(struct xe_device *xe, unsigned int vfid, const char *buf, size_t count); +}; +#define to_xe_sriov_vf_attr(p) container_of_const((p), struct xe_sriov_vf_attr, attr) + +#define XE_SRIOV_VF_ATTR(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0644, xe_sriov_vf_attr_##NAME##_show, xe_sriov_vf_attr_##NAME##_store) + +#define XE_SRIOV_VF_ATTR_RO(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0444, xe_sriov_vf_attr_##NAME##_show, NULL) + +#define XE_SRIOV_VF_ATTR_WO(NAME) \ +struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \ + __ATTR(NAME, 0200, NULL, xe_sriov_vf_attr_##NAME##_store) + +/* device level attributes go here */ + +#define DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(NAME, ITEM, TYPE) \ + \ +static ssize_t xe_sriov_dev_attr_##NAME##_store(struct xe_device *xe, \ + const char *buf, size_t count) \ +{ \ + TYPE value; \ + int err; \ + \ + err = kstrto##TYPE(buf, 0, &value); \ + if (err) \ + return err; \ + \ + err = xe_sriov_pf_provision_bulk_apply_##ITEM(xe, value); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_DEV_ATTR_WO(NAME) + +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32); +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32); + +static const char * const sched_priority_names[] = { + [GUC_SCHED_PRIORITY_LOW] = "low", + [GUC_SCHED_PRIORITY_NORMAL] = "normal", + [GUC_SCHED_PRIORITY_HIGH] = "high", +}; + +static bool sched_priority_change_allowed(unsigned int vfid) +{ + /* As of today GuC FW allows to selectively change only the PF priority. */ + return vfid == PFID; +} + +static bool sched_priority_high_allowed(unsigned int vfid) +{ + /* As of today GuC FW allows to select 'high' priority only for the PF. */ + return vfid == PFID; +} + +static bool sched_priority_bulk_high_allowed(struct xe_device *xe) +{ + /* all VFs are equal - it's sufficient to check VF1 only */ + return sched_priority_high_allowed(VFID(1)); +} + +static ssize_t xe_sriov_dev_attr_sched_priority_store(struct xe_device *xe, + const char *buf, size_t count) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + int match; + int err; + + if (!sched_priority_bulk_high_allowed(xe)) + num_priorities--; + + match = __sysfs_match_string(sched_priority_names, num_priorities, buf); + if (match < 0) + return -EINVAL; + + err = xe_sriov_pf_provision_bulk_apply_priority(xe, match); + return err ?: count; +} + +static XE_SRIOV_DEV_ATTR_WO(sched_priority); + +static struct attribute *bulk_profile_dev_attrs[] = { + &xe_sriov_dev_attr_exec_quantum_ms.attr, + &xe_sriov_dev_attr_preempt_timeout_us.attr, + &xe_sriov_dev_attr_sched_priority.attr, + NULL +}; + +static const struct attribute_group bulk_profile_dev_attr_group = { + .name = ".bulk_profile", + .attrs = bulk_profile_dev_attrs, +}; + +static const struct attribute_group *xe_sriov_dev_attr_groups[] = { + &bulk_profile_dev_attr_group, + NULL +}; + +/* and VF-level attributes go here */ + +#define DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(NAME, ITEM, TYPE, FORMAT) \ +static ssize_t xe_sriov_vf_attr_##NAME##_show(struct xe_device *xe, unsigned int vfid, \ + char *buf) \ +{ \ + TYPE value = 0; \ + int err; \ + \ + err = xe_sriov_pf_provision_query_vf_##ITEM(xe, vfid, &value); \ + if (err) \ + return err; \ + \ + return sysfs_emit(buf, FORMAT, value); \ +} \ + \ +static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \ + const char *buf, size_t count) \ +{ \ + TYPE value; \ + int err; \ + \ + err = kstrto##TYPE(buf, 0, &value); \ + if (err) \ + return err; \ + \ + err = xe_sriov_pf_provision_apply_vf_##ITEM(xe, vfid, value); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_VF_ATTR(NAME) + +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n"); +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n"); + +static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid, + char *buf) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + u32 priority; + int err; + + err = xe_sriov_pf_provision_query_vf_priority(xe, vfid, &priority); + if (err) + return err; + + if (!sched_priority_high_allowed(vfid)) + num_priorities--; + + xe_assert(xe, priority < num_priorities); + return emit_choice(buf, priority, sched_priority_names, num_priorities); +} + +static ssize_t xe_sriov_vf_attr_sched_priority_store(struct xe_device *xe, unsigned int vfid, + const char *buf, size_t count) +{ + size_t num_priorities = ARRAY_SIZE(sched_priority_names); + int match; + int err; + + if (!sched_priority_change_allowed(vfid)) + return -EOPNOTSUPP; + + if (!sched_priority_high_allowed(vfid)) + num_priorities--; + + match = __sysfs_match_string(sched_priority_names, num_priorities, buf); + if (match < 0) + return -EINVAL; + + err = xe_sriov_pf_provision_apply_vf_priority(xe, vfid, match); + return err ?: count; +} + +static XE_SRIOV_VF_ATTR(sched_priority); + +static struct attribute *profile_vf_attrs[] = { + &xe_sriov_vf_attr_exec_quantum_ms.attr, + &xe_sriov_vf_attr_preempt_timeout_us.attr, + &xe_sriov_vf_attr_sched_priority.attr, + NULL +}; + +static umode_t profile_vf_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (attr == &xe_sriov_vf_attr_sched_priority.attr && + !sched_priority_change_allowed(vkobj->vfid)) + return attr->mode & 0444; + + return attr->mode; +} + +static const struct attribute_group profile_vf_attr_group = { + .name = "profile", + .attrs = profile_vf_attrs, + .is_visible = profile_vf_attr_is_visible, +}; + +#define DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(NAME) \ + \ +static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \ + const char *buf, size_t count) \ +{ \ + bool yes; \ + int err; \ + \ + if (!vfid) \ + return -EPERM; \ + \ + err = kstrtobool(buf, &yes); \ + if (err) \ + return err; \ + if (!yes) \ + return count; \ + \ + err = xe_sriov_pf_control_##NAME##_vf(xe, vfid); \ + return err ?: count; \ +} \ + \ +static XE_SRIOV_VF_ATTR_WO(NAME) + +DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(stop); + +static struct attribute *control_vf_attrs[] = { + &xe_sriov_vf_attr_stop.attr, + NULL +}; + +static umode_t control_vf_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (vkobj->vfid == PFID) + return 0; + + return attr->mode; +} + +static const struct attribute_group control_vf_attr_group = { + .attrs = control_vf_attrs, + .is_visible = control_vf_attr_is_visible, +}; + +static const struct attribute_group *xe_sriov_vf_attr_groups[] = { + &profile_vf_attr_group, + &control_vf_attr_group, + NULL +}; + +/* no user serviceable parts below */ + +static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid) +{ + struct xe_sriov_kobj *vkobj; + + xe_sriov_pf_assert_vfid(xe, vfid); + + vkobj = kzalloc(sizeof(*vkobj), GFP_KERNEL); + if (!vkobj) + return NULL; + + vkobj->xe = xe; + vkobj->vfid = vfid; + return &vkobj->base; +} + +static void release_xe_sriov_kobj(struct kobject *kobj) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + kfree(vkobj); +} + +static ssize_t xe_sriov_dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + + if (!vattr->show) + return -EPERM; + + return vattr->show(xe, buf); +} + +static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + ssize_t ret; + + if (!vattr->store) + return -EPERM; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count); + xe_pm_runtime_put(xe); + + return ret; +} + +static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + unsigned int vfid = vkobj->vfid; + + xe_sriov_pf_assert_vfid(xe, vfid); + + if (!vattr->show) + return -EPERM; + + return vattr->show(xe, vfid, buf); +} + +static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr); + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + struct xe_device *xe = vkobj->xe; + unsigned int vfid = vkobj->vfid; + ssize_t ret; + + xe_sriov_pf_assert_vfid(xe, vfid); + + if (!vattr->store) + return -EPERM; + + xe_pm_runtime_get(xe); + ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count); + xe_pm_runtime_get(xe); + + return ret; +} + +static const struct sysfs_ops xe_sriov_dev_sysfs_ops = { + .show = xe_sriov_dev_attr_show, + .store = xe_sriov_dev_attr_store, +}; + +static const struct sysfs_ops xe_sriov_vf_sysfs_ops = { + .show = xe_sriov_vf_attr_show, + .store = xe_sriov_vf_attr_store, +}; + +static const struct kobj_type xe_sriov_dev_ktype = { + .release = release_xe_sriov_kobj, + .sysfs_ops = &xe_sriov_dev_sysfs_ops, + .default_groups = xe_sriov_dev_attr_groups, +}; + +static const struct kobj_type xe_sriov_vf_ktype = { + .release = release_xe_sriov_kobj, + .sysfs_ops = &xe_sriov_vf_sysfs_ops, + .default_groups = xe_sriov_vf_attr_groups, +}; + +static int pf_sysfs_error(struct xe_device *xe, int err, const char *what) +{ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); + return err; +} + +static void pf_sysfs_note(struct xe_device *xe, int err, const char *what) +{ + xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); +} + +static void action_put_kobject(void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + +static int pf_setup_root(struct xe_device *xe) +{ + struct kobject *parent = &xe->drm.dev->kobj; + struct kobject *root; + int err; + + root = create_xe_sriov_kobj(xe, PFID); + if (!root) + return pf_sysfs_error(xe, -ENOMEM, "root obj"); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); + if (err) + return pf_sysfs_error(xe, err, "root action"); + + err = kobject_init_and_add(root, &xe_sriov_dev_ktype, parent, "sriov_admin"); + if (err) + return pf_sysfs_error(xe, err, "root init"); + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, !xe->sriov.pf.sysfs.root); + xe->sriov.pf.sysfs.root = root; + return 0; +} + +static int pf_setup_tree(struct xe_device *xe) +{ + unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct kobject *root, *kobj; + unsigned int n; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + root = xe->sriov.pf.sysfs.root; + + for (n = 0; n <= totalvfs; n++) { + kobj = create_xe_sriov_kobj(xe, VFID(n)); + if (!kobj) + return pf_sysfs_error(xe, -ENOMEM, "tree obj"); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); + if (err) + return pf_sysfs_error(xe, err, "tree action"); + + if (n) + err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, + root, "vf%u", n); + else + err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, + root, "pf"); + if (err) + return pf_sysfs_error(xe, err, "tree init"); + + xe_assert(xe, !xe->sriov.pf.vfs[n].kobj); + xe->sriov.pf.vfs[n].kobj = kobj; + } + + return 0; +} + +static void action_rm_device_link(void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_link(kobj, "device"); +} + +static int pf_link_pf_device(struct xe_device *xe) +{ + struct kobject *kobj = xe->sriov.pf.vfs[PFID].kobj; + int err; + + err = sysfs_create_link(kobj, &xe->drm.dev->kobj, "device"); + if (err) + return pf_sysfs_error(xe, err, "PF device link"); + + err = devm_add_action_or_reset(xe->drm.dev, action_rm_device_link, kobj); + if (err) + return pf_sysfs_error(xe, err, "PF unlink action"); + + return 0; +} + +/** + * xe_sriov_pf_sysfs_init() - Setup PF's SR-IOV sysfs tree. + * @xe: the PF &xe_device to setup sysfs + * + * This function will create additional nodes that will represent PF and VFs + * devices, each populated with SR-IOV Xe specific attributes. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_sysfs_init(struct xe_device *xe) +{ + int err; + + err = pf_setup_root(xe); + if (err) + return err; + + err = pf_setup_tree(xe); + if (err) + return err; + + err = pf_link_pf_device(xe); + if (err) + return err; + + return 0; +} + +/** + * xe_sriov_pf_sysfs_link_vfs() - Add VF's links in SR-IOV sysfs tree. + * @xe: the &xe_device where to update sysfs + * @num_vfs: number of enabled VFs to link + * + * This function is specific for the PF driver. + * + * This function will add symbolic links between VFs represented in the SR-IOV + * sysfs tree maintained by the PF and enabled VF PCI devices. + * + * The @xe_sriov_pf_sysfs_unlink_vfs() shall be used to remove those links. + */ +void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct pci_dev *pf_pdev = to_pci_dev(xe->drm.dev); + struct pci_dev *vf_pdev = NULL; + unsigned int n; + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs <= totalvfs); + + for (n = 1; n <= num_vfs; n++) { + vf_pdev = xe_pci_sriov_get_vf_pdev(pf_pdev, VFID(n)); + if (!vf_pdev) + return pf_sysfs_note(xe, -ENOENT, "VF link"); + + err = sysfs_create_link(xe->sriov.pf.vfs[VFID(n)].kobj, + &vf_pdev->dev.kobj, "device"); + + /* must balance xe_pci_sriov_get_vf_pdev() */ + pci_dev_put(vf_pdev); + + if (err) + return pf_sysfs_note(xe, err, "VF link"); + } +} + +/** + * xe_sriov_pf_sysfs_unlink_vfs() - Remove VF's links from SR-IOV sysfs tree. + * @xe: the &xe_device where to update sysfs + * @num_vfs: number of VFs to unlink + * + * This function shall be called only on the PF. + * This function will remove "device" links added by @xe_sriov_sysfs_link_vfs(). + */ +void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + unsigned int n; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs <= xe_sriov_pf_get_totalvfs(xe)); + + for (n = 1; n <= num_vfs; n++) + sysfs_remove_link(xe->sriov.pf.vfs[VFID(n)].kobj, "device"); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h new file mode 100644 index 000000000000..ae92ed1766e7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SYSFS_H_ +#define _XE_SRIOV_PF_SYSFS_H_ + +struct xe_device; + +int xe_sriov_pf_sysfs_init(struct xe_device *xe); + +void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs); +void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h index c753cd59aed2..b3cd9797194b 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -12,10 +12,15 @@ #include "xe_sriov_pf_provision_types.h" #include "xe_sriov_pf_service_types.h" +struct kobject; + /** * struct xe_sriov_metadata - per-VF device level metadata */ struct xe_sriov_metadata { + /** @kobj: kobject representing VF in PF's SR-IOV sysfs tree. */ + struct kobject *kobj; + /** @version: negotiated VF/PF ABI version */ struct xe_sriov_pf_service_version version; }; @@ -42,6 +47,12 @@ struct xe_device_pf { /** @service: device level service data. */ struct xe_sriov_pf_service service; + /** @sysfs: device level sysfs data. */ + struct { + /** @sysfs.root: the root kobject for all SR-IOV entries in sysfs. */ + struct kobject *root; + } sysfs; + /** @vfs: metadata for all VFs. */ struct xe_sriov_metadata *vfs; }; diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 13af589715a7..55c5a0eb82e1 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -104,8 +104,7 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, &vm->svm.garbage_collector.range_list); spin_unlock(&vm->svm.garbage_collector.lock); - queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq, - &vm->svm.garbage_collector.work); + queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work); } static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 82872a51f098..ff74528ca0c6 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -14,7 +14,7 @@ #include <drm/drm_syncobj.h> #include <uapi/drm/xe_drm.h> -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_exec_queue.h" #include "xe_macros.h" #include "xe_sched_job_types.h" @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags) { struct drm_xe_sync sync_in; @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (exec) { sync->addr = sync_in.addr; } else { + sync->ufence_timeline_value = ufence_timeline_value; sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); + sync->ufence_chain_fence = dma_fence_chain_alloc(); + if (!sync->ufence_chain_fence) + return -ENOMEM; + sync->ufence_syncobj = ufence_syncobj; } break; @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) } else if (sync->ufence) { int err; - dma_fence_get(fence); + drm_syncobj_add_point(sync->ufence_syncobj, + sync->ufence_chain_fence, + fence, sync->ufence_timeline_value); + sync->ufence_chain_fence = NULL; + + fence = drm_syncobj_fence_get(sync->ufence_syncobj); user_fence_get(sync->ufence); err = dma_fence_add_callback(fence, &sync->ufence->cb, user_fence_cb); @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) drm_syncobj_put(sync->syncobj); dma_fence_put(sync->fence); dma_fence_chain_free(sync->chain_fence); - if (sync->ufence) + dma_fence_chain_free(sync->ufence_chain_fence); + if (!IS_ERR_OR_NULL(sync->ufence)) user_fence_put(sync->ufence); } @@ -284,51 +297,59 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct dma_fence *fence; - int i, num_in_fence = 0, current_fence = 0; + int i, num_fence = 0, current_fence = 0; lockdep_assert_held(&vm->lock); - /* Count in-fences */ - for (i = 0; i < num_sync; ++i) { - if (sync[i].fence) { - ++num_in_fence; - fence = sync[i].fence; + /* Reject in fences */ + for (i = 0; i < num_sync; ++i) + if (sync[i].fence) + return ERR_PTR(-EOPNOTSUPP); + + if (q->flags & EXEC_QUEUE_FLAG_VM) { + struct xe_exec_queue *__q; + struct xe_tile *tile; + u8 id; + + for_each_tile(tile, vm->xe, id) + num_fence += (1 + XE_MAX_GT_PER_TILE); + + fences = kmalloc_array(num_fence, sizeof(*fences), + GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + fences[current_fence++] = + xe_exec_queue_last_fence_get(q, vm); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); + list_for_each_entry(__q, &q->multi_gt_list, + multi_gt_link) { + fences[current_fence++] = + xe_exec_queue_last_fence_get(__q, vm); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i); } - } - /* Easy case... */ - if (!num_in_fence) { - fence = xe_exec_queue_last_fence_get(q, vm); - return fence; - } + xe_assert(vm->xe, current_fence == num_fence); + cf = dma_fence_array_create(num_fence, fences, + dma_fence_context_alloc(1), + 1, false); + if (!cf) + goto err_out; - /* Create composite fence */ - fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - for (i = 0; i < num_sync; ++i) { - if (sync[i].fence) { - dma_fence_get(sync[i].fence); - fences[current_fence++] = sync[i].fence; - } - } - fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); - cf = dma_fence_array_create(num_in_fence, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - goto err_out; + return &cf->base; } - return &cf->base; + fence = xe_exec_queue_last_fence_get(q, vm); + return fence; err_out: while (current_fence) dma_fence_put(fences[--current_fence]); kfree(fences); - kfree(cf); return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 256ffc1e54dc..51f2d803e977 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -8,6 +8,7 @@ #include "xe_sync_types.h" +struct drm_syncobj; struct xe_device; struct xe_exec_queue; struct xe_file; @@ -21,6 +22,8 @@ struct xe_vm; int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 30ac3f51993b..b88f1833e28c 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,9 +18,12 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; + struct dma_fence_chain *ufence_chain_fence; + struct drm_syncobj *ufence_syncobj; struct xe_user_fence *ufence; u64 addr; u64 timeline_value; + u64 ufence_timeline_value; u32 type; u32 flags; }; diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c index 492def04a559..1ae0dec2cf31 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -12,6 +12,7 @@ #include "xe_tlb_inval_job.h" #include "xe_migrate.h" #include "xe_pm.h" +#include "xe_vm.h" /** struct xe_tlb_inval_job - TLB invalidation job */ struct xe_tlb_inval_job { @@ -21,6 +22,8 @@ struct xe_tlb_inval_job { struct xe_tlb_inval *tlb_inval; /** @q: exec queue issuing the invalidate */ struct xe_exec_queue *q; + /** @vm: VM which TLB invalidation is being issued for */ + struct xe_vm *vm; /** @refcount: ref count of this job */ struct kref refcount; /** @@ -32,8 +35,8 @@ struct xe_tlb_inval_job { u64 start; /** @end: End address to invalidate */ u64 end; - /** @asid: Address space ID to invalidate */ - u32 asid; + /** @type: GT type */ + int type; /** @fence_armed: Fence has been armed */ bool fence_armed; }; @@ -46,7 +49,7 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job) container_of(job->fence, typeof(*ifence), base); xe_tlb_inval_range(job->tlb_inval, ifence, job->start, - job->end, job->asid); + job->end, job->vm->usm.asid); return job->fence; } @@ -70,9 +73,10 @@ static const struct xe_dep_job_ops dep_job_ops = { * @q: exec queue issuing the invalidate * @tlb_inval: TLB invalidation client * @dep_scheduler: Dependency scheduler for job + * @vm: VM which TLB invalidation is being issued for * @start: Start address to invalidate * @end: End address to invalidate - * @asid: Address space ID to invalidate + * @type: GT type * * Create a TLB invalidation job and initialize internal fields. The caller is * responsible for releasing the creation reference. @@ -81,8 +85,8 @@ static const struct xe_dep_job_ops dep_job_ops = { */ struct xe_tlb_inval_job * xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, - struct xe_dep_scheduler *dep_scheduler, u64 start, - u64 end, u32 asid) + struct xe_dep_scheduler *dep_scheduler, + struct xe_vm *vm, u64 start, u64 end, int type) { struct xe_tlb_inval_job *job; struct drm_sched_entity *entity = @@ -90,19 +94,24 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, struct xe_tlb_inval_fence *ifence; int err; + xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); + job = kmalloc(sizeof(*job), GFP_KERNEL); if (!job) return ERR_PTR(-ENOMEM); job->q = q; + job->vm = vm; job->tlb_inval = tlb_inval; job->start = start; job->end = end; - job->asid = asid; job->fence_armed = false; job->dep.ops = &dep_job_ops; + job->type = type; kref_init(&job->refcount); xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */ + xe_vm_get(vm); /* Pairs with put in xe_tlb_inval_job_destroy */ ifence = kmalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) { @@ -124,6 +133,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, err_fence: kfree(ifence); err_job: + xe_vm_put(vm); xe_exec_queue_put(q); kfree(job); @@ -138,6 +148,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref) container_of(job->fence, typeof(*ifence), base); struct xe_exec_queue *q = job->q; struct xe_device *xe = gt_to_xe(q->gt); + struct xe_vm *vm = job->vm; if (!job->fence_armed) kfree(ifence); @@ -147,6 +158,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref) drm_sched_job_cleanup(&job->dep.drm); kfree(job); + xe_vm_put(vm); /* Pairs with get from xe_tlb_inval_job_create */ xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */ xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */ } @@ -231,6 +243,11 @@ struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, dma_fence_get(&job->dep.drm.s_fence->finished); drm_sched_entity_push_job(&job->dep.drm); + /* Let the upper layers fish this out */ + xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm, + &job->dep.drm.s_fence->finished, + job->type); + xe_migrate_job_unlock(m, job->q); /* diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h index e63edcb26b50..4d6df1a6c6ca 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h @@ -11,14 +11,15 @@ struct dma_fence; struct xe_dep_scheduler; struct xe_exec_queue; +struct xe_migrate; struct xe_tlb_inval; struct xe_tlb_inval_job; -struct xe_migrate; +struct xe_vm; struct xe_tlb_inval_job * xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, struct xe_dep_scheduler *dep_scheduler, - u64 start, u64 end, u32 asid); + struct xe_vm *vm, u64 start, u64 end, int type); int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job); diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 314f42fcbcbd..79a97b086cb2 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -441,6 +441,29 @@ TRACE_EVENT(xe_eu_stall_data_read, __entry->read_size, __entry->total_size) ); +TRACE_EVENT(xe_exec_queue_reach_max_job_count, + TP_PROTO(struct xe_exec_queue *q, int max_cnt), + TP_ARGS(q, max_cnt), + + TP_STRUCT__entry(__string(dev, __dev_name_eq(q)) + __field(enum xe_engine_class, class) + __field(u32, logical_mask) + __field(u16, guc_id) + __field(int, max_cnt) + ), + + TP_fast_assign(__assign_str(dev); + __entry->class = q->class; + __entry->logical_mask = q->logical_mask; + __entry->guc_id = q->guc->id; + __entry->max_cnt = max_cnt; + ), + + TP_printk("dev=%s, job count exceeded the maximum limit (%d) per exec queue. engine_class=0x%x, logical_mask=0x%x, guc_id=%d", + __get_str(dev), __entry->max_cnt, + __entry->class, __entry->logical_mask, __entry->guc_id) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h index 1ef181c90434..a30e732c4d51 100644 --- a/drivers/gpu/drm/xe/xe_validation.h +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -166,10 +166,10 @@ xe_validation_device_init(struct xe_validation_device *val) */ DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, if (_T) xe_validation_ctx_fini(_T);, - ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); - _ret ? NULL : _ctx; }), + ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + *_ret ? NULL : _ctx; }), struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, - struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); + struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret); static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) {return *_T; } #define class_xe_validation_is_conditional true @@ -186,7 +186,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) * exhaustive eviction. */ #define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ - scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ drm_exec_until_all_locked(_exec) #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 00f3520dec38..8fb5cc6a69ec 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -27,7 +27,6 @@ #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" -#include "xe_gt_pagefault.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -755,6 +754,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = @@ -845,6 +845,7 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT; for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = @@ -1458,7 +1459,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vm *vm; - int err, number_tiles = 0; + int err; struct xe_tile *tile; u8 id; @@ -1619,13 +1620,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) goto err_close; } vm->q[id] = q; - number_tiles++; } } - if (number_tiles > 1) - vm->composite_fence_ctx = dma_fence_context_alloc(1); - if (xef && xe->info.has_asid) { u32 asid; @@ -1731,8 +1728,13 @@ void xe_vm_close_and_put(struct xe_vm *vm) down_write(&vm->lock); for_each_tile(tile, xe, id) { - if (vm->q[id]) + if (vm->q[id]) { + int i; + xe_exec_queue_last_fence_put(vm->q[id], vm); + for_each_tlb_inval(i) + xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i); + } } up_write(&vm->lock); @@ -3102,20 +3104,31 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int number_tiles = 0, current_fence = 0, err; + int number_tiles = 0, current_fence = 0, n_fence = 0, err; u8 id; number_tiles = vm_ops_setup_tile_args(vm, vops); if (number_tiles == 0) return ERR_PTR(-ENODATA); - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) { - fence = ERR_PTR(-ENOMEM); - goto err_trace; - } + if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) { + for_each_tile(tile, vm->xe, id) + ++n_fence; + } else { + for_each_tile(tile, vm->xe, id) + n_fence += (1 + XE_MAX_GT_PER_TILE); + } + + fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL); + if (!fences) { + fence = ERR_PTR(-ENOMEM); + goto err_trace; + } + + cf = dma_fence_array_alloc(n_fence); + if (!cf) { + fence = ERR_PTR(-ENOMEM); + goto err_out; } for_each_tile(tile, vm->xe, id) { @@ -3132,30 +3145,34 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, trace_xe_vm_ops_execute(vops); for_each_tile(tile, vm->xe, id) { + struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q; + int i; + + fence = NULL; if (!vops->pt_update_ops[id].num_ops) - continue; + goto collect_fences; fence = xe_pt_update_ops_run(tile, vops); if (IS_ERR(fence)) goto err_out; - if (fences) - fences[current_fence++] = fence; - } +collect_fences: + fences[current_fence++] = fence ?: dma_fence_get_stub(); + if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) + continue; - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - fence = ERR_PTR(-ENOMEM); - goto err_out; - } - fence = &cf->base; + xe_migrate_job_lock(tile->migrate, q); + for_each_tlb_inval(i) + fences[current_fence++] = + xe_exec_queue_tlb_inval_last_fence_get(q, vm, i); + xe_migrate_job_unlock(tile->migrate, q); } + xe_assert(vm->xe, current_fence == n_fence); + dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), + 1, false); + fence = &cf->base; + for_each_tile(tile, vm->xe, id) { if (!vops->pt_update_ops[id].num_ops) continue; @@ -3215,7 +3232,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, struct dma_fence *fence) { - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); struct xe_user_fence *ufence; struct xe_vma_op *op; int i; @@ -3236,7 +3252,6 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, if (fence) { for (i = 0; i < vops->num_syncs; i++) xe_sync_entry_signal(vops->syncs + i, fence); - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); } } @@ -3430,19 +3445,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, struct xe_sync_entry *syncs, int num_syncs) { - struct dma_fence *fence; + struct dma_fence *fence = NULL; int i, err = 0; - fence = xe_sync_in_fence_get(syncs, num_syncs, - to_wait_exec_queue(vm, q), vm); - if (IS_ERR(fence)) - return PTR_ERR(fence); + if (num_syncs) { + fence = xe_sync_in_fence_get(syncs, num_syncs, + to_wait_exec_queue(vm, q), vm); + if (IS_ERR(fence)) + return PTR_ERR(fence); - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); + for (i = 0; i < num_syncs; i++) + xe_sync_entry_signal(&syncs[i], fence); + } - xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, - fence); dma_fence_put(fence); return err; @@ -3633,8 +3648,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + struct xe_exec_queue *__q = q ?: vm->q[0]; + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], + __q->ufence_syncobj, + ++__q->ufence_timeline_value, (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0) | (!args->num_binds ? diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 830ed7b05c27..ccd6cc090309 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -221,11 +221,6 @@ struct xe_vm { #define XE_VM_FLAG_GSC BIT(8) unsigned long flags; - /** @composite_fence_ctx: context composite fence */ - u64 composite_fence_ctx; - /** @composite_fence_seqno: seqno for composite fence */ - u32 composite_fence_seqno; - /** * @lock: outer most lock, protects objects of anything attached to this * VM @@ -471,6 +466,7 @@ struct xe_vma_ops { #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) #define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) +#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ec638b431131..81e62291af45 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -679,6 +679,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("14023061436"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), OR, + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, |