diff options
| author | Dave Airlie <airlied@redhat.com> | 2025-12-02 18:09:01 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2025-12-02 18:09:08 +1000 |
| commit | b3239df349c2c2c94686674489c9629c89ca49a1 (patch) | |
| tree | 5e6652ebbc602866122c26058dfb523a6f446169 /drivers/gpu/drm/panthor | |
| parent | 62433efe0b06042d8016ba0713d801165a939229 (diff) | |
| parent | db2bad93fe206c95808b7a164a29424791728752 (diff) | |
Merge tag 'drm-misc-next-2025-12-01-1' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-next
Extra drm-misc-next for v6.19-rc1:
UAPI Changes:
- Add support for drm colorop pipeline.
- Add COLOR PIPELINE plane property.
- Add DRM_CLIENT_CAP_PLANE_COLOR_PIPELINE.
Cross-subsystem Changes:
- Attempt to use higher order mappings in system heap allocator.
- Always taint kernel with sw-sync.
Core Changes:
- Small fixes to drm/gem.
- Support emergency restore to drm-client.
- Allocate and release fb_info in single place.
- Rework ttm pipelined eviction fence handling.
Driver Changes:
- Support the drm color pipeline in vkms, amdgfx.
- Add NVJPG driver for tegra.
- Assorted small fixes and updates to rockchip, bridge/dw-hdmi-qp,
panthor.
- Add ASL CS5263 DP-to-HDMI simple bridge.
- Add and improve support for G LD070WX3-SL01 MIPI DSI, Samsung LTL106AL0,
Samsung LTL106AL01, Raystar RFF500F-AWH-DNN, Winstar WF70A8SYJHLNGA,
Wanchanglong w552946aaa, Samsung SOFEF00, Lenovo X13s panel.
- Add support for it66122 to it66121.
- Support mali-G1 gpu in panthor.
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patch.msgid.link/aa5cbd50-7676-4a59-bbed-e8428af86804@linux.intel.com
Diffstat (limited to 'drivers/gpu/drm/panthor')
| -rw-r--r-- | drivers/gpu/drm/panthor/Makefile | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_device.c | 18 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_device.h | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_fw.c | 131 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_fw.h | 32 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_gem.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_gpu.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_hw.c | 107 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_hw.h | 47 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.c | 19 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_pwr.c | 549 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_pwr.h | 23 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_regs.h | 79 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.c | 312 |
15 files changed, 1217 insertions, 128 deletions
diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile index 02db21748c12..753a32c446df 100644 --- a/drivers/gpu/drm/panthor/Makefile +++ b/drivers/gpu/drm/panthor/Makefile @@ -10,6 +10,7 @@ panthor-y := \ panthor_heap.o \ panthor_hw.o \ panthor_mmu.o \ + panthor_pwr.o \ panthor_sched.o obj-$(CONFIG_DRM_PANTHOR) += panthor.o diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 31fb8ed68199..e133b1e0ad6d 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -21,6 +21,7 @@ #include "panthor_gpu.h" #include "panthor_hw.h" #include "panthor_mmu.h" +#include "panthor_pwr.h" #include "panthor_regs.h" #include "panthor_sched.h" @@ -113,6 +114,7 @@ void panthor_device_unplug(struct panthor_device *ptdev) panthor_fw_unplug(ptdev); panthor_mmu_unplug(ptdev); panthor_gpu_unplug(ptdev); + panthor_pwr_unplug(ptdev); pm_runtime_dont_use_autosuspend(ptdev->base.dev); pm_runtime_put_sync_suspend(ptdev->base.dev); @@ -152,8 +154,8 @@ static void panthor_device_reset_work(struct work_struct *work) panthor_sched_pre_reset(ptdev); panthor_fw_pre_reset(ptdev, true); panthor_mmu_pre_reset(ptdev); - panthor_gpu_soft_reset(ptdev); - panthor_gpu_l2_power_on(ptdev); + panthor_hw_soft_reset(ptdev); + panthor_hw_l2_power_on(ptdev); panthor_mmu_post_reset(ptdev); ret = panthor_fw_post_reset(ptdev); atomic_set(&ptdev->reset.pending, 0); @@ -268,10 +270,14 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_rpm_put; - ret = panthor_gpu_init(ptdev); + ret = panthor_pwr_init(ptdev); if (ret) goto err_rpm_put; + ret = panthor_gpu_init(ptdev); + if (ret) + goto err_unplug_pwr; + ret = panthor_gpu_coherency_init(ptdev); if (ret) goto err_unplug_gpu; @@ -312,6 +318,9 @@ err_unplug_mmu: err_unplug_gpu: panthor_gpu_unplug(ptdev); +err_unplug_pwr: + panthor_pwr_unplug(ptdev); + err_rpm_put: pm_runtime_put_sync_suspend(ptdev->base.dev); return ret; @@ -465,6 +474,7 @@ static int panthor_device_resume_hw_components(struct panthor_device *ptdev) { int ret; + panthor_pwr_resume(ptdev); panthor_gpu_resume(ptdev); panthor_mmu_resume(ptdev); @@ -474,6 +484,7 @@ static int panthor_device_resume_hw_components(struct panthor_device *ptdev) panthor_mmu_suspend(ptdev); panthor_gpu_suspend(ptdev); + panthor_pwr_suspend(ptdev); return ret; } @@ -587,6 +598,7 @@ int panthor_device_suspend(struct device *dev) panthor_fw_suspend(ptdev); panthor_mmu_suspend(ptdev); panthor_gpu_suspend(ptdev); + panthor_pwr_suspend(ptdev); drm_dev_exit(cookie); } diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index f32c1868bf6d..f35e52b9546a 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -24,10 +24,12 @@ struct panthor_device; struct panthor_gpu; struct panthor_group_pool; struct panthor_heap_pool; +struct panthor_hw; struct panthor_job; struct panthor_mmu; struct panthor_fw; struct panthor_perfcnt; +struct panthor_pwr; struct panthor_vm; struct panthor_vm_pool; @@ -134,6 +136,12 @@ struct panthor_device { /** @csif_info: Command stream interface information. */ struct drm_panthor_csif_info csif_info; + /** @hw: GPU-specific data. */ + struct panthor_hw *hw; + + /** @pwr: Power control management data. */ + struct panthor_pwr *pwr; + /** @gpu: GPU management data. */ struct panthor_gpu *gpu; diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 38d87ab92eda..1a5e3c1a27fb 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -22,6 +22,7 @@ #include "panthor_fw.h" #include "panthor_gem.h" #include "panthor_gpu.h" +#include "panthor_hw.h" #include "panthor_mmu.h" #include "panthor_regs.h" #include "panthor_sched.h" @@ -33,6 +34,7 @@ #define PROGRESS_TIMEOUT_SCALE_SHIFT 10 #define IDLE_HYSTERESIS_US 800 #define PWROFF_HYSTERESIS_US 10000 +#define MCU_HALT_TIMEOUT_US (1ULL * USEC_PER_SEC) /** * struct panthor_fw_binary_hdr - Firmware binary header. @@ -317,6 +319,49 @@ panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) return &ptdev->fw->iface.streams[csg_slot][cs_slot]; } +static bool panthor_fw_has_glb_state(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + return glb_iface->control->version >= CSF_IFACE_VERSION(4, 1, 0); +} + +static bool panthor_fw_has_64bit_ep_req(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + return glb_iface->control->version >= CSF_IFACE_VERSION(4, 0, 0); +} + +u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface) +{ + if (panthor_fw_has_64bit_ep_req(ptdev)) + return csg_iface->input->endpoint_req2; + else + return csg_iface->input->endpoint_req; +} + +void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value) +{ + if (panthor_fw_has_64bit_ep_req(ptdev)) + csg_iface->input->endpoint_req2 = value; + else + csg_iface->input->endpoint_req = lower_32_bits(value); +} + +void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value, + u64 mask) +{ + if (panthor_fw_has_64bit_ep_req(ptdev)) + panthor_fw_update_reqs64(csg_iface, endpoint_req2, value, mask); + else + panthor_fw_update_reqs(csg_iface, endpoint_req, lower_32_bits(value), + lower_32_bits(mask)); +} + /** * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count * @ptdev: Device. @@ -996,6 +1041,9 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev) GLB_IDLE_EN | GLB_IDLE; + if (panthor_fw_has_glb_state(ptdev)) + glb_iface->input->ack_irq_mask |= GLB_STATE_MASK; + panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_CFG_ALLOC_EN | @@ -1069,6 +1117,54 @@ static void panthor_fw_stop(struct panthor_device *ptdev) drm_err(&ptdev->base, "Failed to stop MCU"); } +static bool panthor_fw_mcu_halted(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + bool halted; + + halted = gpu_read(ptdev, MCU_STATUS) == MCU_STATUS_HALT; + + if (panthor_fw_has_glb_state(ptdev)) + halted &= (GLB_STATE_GET(glb_iface->output->ack) == GLB_STATE_HALT); + + return halted; +} + +static void panthor_fw_halt_mcu(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + if (panthor_fw_has_glb_state(ptdev)) + panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_HALT), GLB_STATE_MASK); + else + panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); + + gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); +} + +static bool panthor_fw_wait_mcu_halted(struct panthor_device *ptdev) +{ + bool halted = false; + + if (read_poll_timeout_atomic(panthor_fw_mcu_halted, halted, halted, 10, + MCU_HALT_TIMEOUT_US, 0, ptdev)) { + drm_warn(&ptdev->base, "Timed out waiting for MCU to halt"); + return false; + } + + return true; +} + +static void panthor_fw_mcu_set_active(struct panthor_device *ptdev) +{ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); + + if (panthor_fw_has_glb_state(ptdev)) + panthor_fw_update_reqs(glb_iface, req, GLB_STATE(GLB_STATE_ACTIVE), GLB_STATE_MASK); + else + panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); +} + /** * panthor_fw_pre_reset() - Call before a reset. * @ptdev: Device. @@ -1085,19 +1181,13 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) ptdev->reset.fast = false; if (!on_hang) { - struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); - u32 status; - - panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); - gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); - if (!gpu_read_poll_timeout(ptdev, MCU_STATUS, status, - status == MCU_STATUS_HALT, 10, - 100000)) { - ptdev->reset.fast = true; - } else { + panthor_fw_halt_mcu(ptdev); + if (!panthor_fw_wait_mcu_halted(ptdev)) drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); - } + else + ptdev->reset.fast = true; } + panthor_fw_stop(ptdev); panthor_job_irq_suspend(&ptdev->fw->irq); panthor_fw_stop(ptdev); @@ -1126,14 +1216,14 @@ int panthor_fw_post_reset(struct panthor_device *ptdev) */ panthor_reload_fw_sections(ptdev, true); } else { - /* The FW detects 0 -> 1 transitions. Make sure we reset - * the HALT bit before the FW is rebooted. + /* + * If the FW was previously successfully halted in the pre-reset + * operation, we need to transition it to active again before + * the FW is rebooted. * This is not needed on a slow reset because FW sections are * re-initialized. */ - struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); - - panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); + panthor_fw_mcu_set_active(ptdev); } ret = panthor_fw_start(ptdev); @@ -1171,6 +1261,10 @@ void panthor_fw_unplug(struct panthor_device *ptdev) if (ptdev->fw->irq.irq) panthor_job_irq_suspend(&ptdev->fw->irq); + panthor_fw_halt_mcu(ptdev); + if (!panthor_fw_wait_mcu_halted(ptdev)) + drm_warn(&ptdev->base, "Failed to halt MCU on unplug"); + panthor_fw_stop(ptdev); } @@ -1186,7 +1280,7 @@ void panthor_fw_unplug(struct panthor_device *ptdev) ptdev->fw->vm = NULL; if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) - panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); + panthor_hw_l2_power_off(ptdev); } /** @@ -1365,7 +1459,7 @@ int panthor_fw_init(struct panthor_device *ptdev) return ret; } - ret = panthor_gpu_l2_power_on(ptdev); + ret = panthor_hw_l2_power_on(ptdev); if (ret) return ret; @@ -1409,3 +1503,4 @@ MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin"); MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin"); MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin"); MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch14.8/mali_csffw.bin"); diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h index 6598d96c6d2a..fbdc21469ba3 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.h +++ b/drivers/gpu/drm/panthor/panthor_fw.h @@ -167,10 +167,11 @@ struct panthor_fw_csg_input_iface { #define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) #define CSG_EP_REQ_EXCL_COMPUTE BIT(20) #define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) -#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) #define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) +#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & CSG_EP_REQ_PRIORITY_MASK) +#define CSG_EP_REQ_PRIORITY_GET(x) (((x) & CSG_EP_REQ_PRIORITY_MASK) >> 28) u32 endpoint_req; - u32 reserved2[2]; + u64 endpoint_req2; u64 suspend_buf; u64 protm_suspend_buf; u32 config; @@ -214,6 +215,13 @@ struct panthor_fw_global_input_iface { #define GLB_FWCFG_UPDATE BIT(9) #define GLB_IDLE_EN BIT(10) #define GLB_SLEEP BIT(12) +#define GLB_STATE_MASK GENMASK(14, 12) +#define GLB_STATE_ACTIVE 0 +#define GLB_STATE_HALT 1 +#define GLB_STATE_SLEEP 2 +#define GLB_STATE_SUSPEND 3 +#define GLB_STATE(x) (((x) << 12) & GLB_STATE_MASK) +#define GLB_STATE_GET(x) (((x) & GLB_STATE_MASK) >> 12) #define GLB_INACTIVE_COMPUTE BIT(20) #define GLB_INACTIVE_FRAGMENT BIT(21) #define GLB_INACTIVE_TILER BIT(22) @@ -457,6 +465,16 @@ struct panthor_fw_global_iface { spin_unlock(&(__iface)->lock); \ } while (0) +#define panthor_fw_update_reqs64(__iface, __in_reg, __val, __mask) \ + do { \ + u64 __cur_val, __new_val; \ + spin_lock(&(__iface)->lock); \ + __cur_val = READ_ONCE((__iface)->input->__in_reg); \ + __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ + WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ + spin_unlock(&(__iface)->lock); \ + } while (0) + struct panthor_fw_global_iface * panthor_fw_get_glb_iface(struct panthor_device *ptdev); @@ -466,6 +484,16 @@ panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); struct panthor_fw_cs_iface * panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); +u64 panthor_fw_csg_endpoint_req_get(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface); + +void panthor_fw_csg_endpoint_req_set(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value); + +void panthor_fw_csg_endpoint_req_update(struct panthor_device *ptdev, + struct panthor_fw_csg_iface *csg_iface, u64 value, + u64 mask); + int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, u32 *acked, u32 timeout_ms); diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 2c12c1c58e2b..fbde78db270a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -145,6 +145,9 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, bo = to_panthor_bo(&obj->base); kbo->obj = &obj->base; bo->flags = bo_flags; + bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); + drm_gem_object_get(bo->exclusive_vm_root_gem); + bo->base.base.resv = bo->exclusive_vm_root_gem->resv; if (vm == panthor_fw_vm(ptdev)) debug_flags |= PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED; @@ -168,9 +171,6 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, goto err_free_va; kbo->vm = panthor_vm_get(vm); - bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); - drm_gem_object_get(bo->exclusive_vm_root_gem); - bo->base.base.resv = bo->exclusive_vm_root_gem->resv; return kbo; err_free_va: diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index eda670229184..06b231b2460a 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -19,6 +19,7 @@ #include "panthor_device.h" #include "panthor_gpu.h" +#include "panthor_hw.h" #include "panthor_regs.h" /** @@ -241,6 +242,11 @@ int panthor_gpu_block_power_on(struct panthor_device *ptdev, return 0; } +void panthor_gpu_l2_power_off(struct panthor_device *ptdev) +{ + panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); +} + /** * panthor_gpu_l2_power_on() - Power-on the L2-cache * @ptdev: Device. @@ -368,9 +374,9 @@ void panthor_gpu_suspend(struct panthor_device *ptdev) { /* On a fast reset, simply power down the L2. */ if (!ptdev->reset.fast) - panthor_gpu_soft_reset(ptdev); + panthor_hw_soft_reset(ptdev); else - panthor_gpu_power_off(ptdev, L2, 1, 20000); + panthor_hw_l2_power_off(ptdev); panthor_gpu_irq_suspend(&ptdev->gpu->irq); } @@ -385,6 +391,6 @@ void panthor_gpu_suspend(struct panthor_device *ptdev) void panthor_gpu_resume(struct panthor_device *ptdev) { panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); - panthor_gpu_l2_power_on(ptdev); + panthor_hw_l2_power_on(ptdev); } diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h index 7c17a8c06858..12e66f48ced1 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.h +++ b/drivers/gpu/drm/panthor/panthor_gpu.h @@ -46,6 +46,7 @@ int panthor_gpu_block_power_off(struct panthor_device *ptdev, type ## _PWRTRANS, \ mask, timeout_us) +void panthor_gpu_l2_power_off(struct panthor_device *ptdev); int panthor_gpu_l2_power_on(struct panthor_device *ptdev); int panthor_gpu_flush_caches(struct panthor_device *ptdev, u32 l2, u32 lsc, u32 other); diff --git a/drivers/gpu/drm/panthor/panthor_hw.c b/drivers/gpu/drm/panthor/panthor_hw.c index c44033a0bba8..87ebb7ae42c4 100644 --- a/drivers/gpu/drm/panthor/panthor_hw.c +++ b/drivers/gpu/drm/panthor/panthor_hw.c @@ -4,12 +4,55 @@ #include <drm/drm_print.h> #include "panthor_device.h" +#include "panthor_gpu.h" #include "panthor_hw.h" +#include "panthor_pwr.h" #include "panthor_regs.h" #define GPU_PROD_ID_MAKE(arch_major, prod_major) \ (((arch_major) << 24) | (prod_major)) +/** struct panthor_hw_entry - HW arch major to panthor_hw binding entry */ +struct panthor_hw_entry { + /** @arch_min: Minimum supported architecture major value (inclusive) */ + u8 arch_min; + + /** @arch_max: Maximum supported architecture major value (inclusive) */ + u8 arch_max; + + /** @hwdev: Pointer to panthor_hw structure */ + struct panthor_hw *hwdev; +}; + +static struct panthor_hw panthor_hw_arch_v10 = { + .ops = { + .soft_reset = panthor_gpu_soft_reset, + .l2_power_off = panthor_gpu_l2_power_off, + .l2_power_on = panthor_gpu_l2_power_on, + }, +}; + +static struct panthor_hw panthor_hw_arch_v14 = { + .ops = { + .soft_reset = panthor_pwr_reset_soft, + .l2_power_off = panthor_pwr_l2_power_off, + .l2_power_on = panthor_pwr_l2_power_on, + }, +}; + +static struct panthor_hw_entry panthor_hw_match[] = { + { + .arch_min = 10, + .arch_max = 13, + .hwdev = &panthor_hw_arch_v10, + }, + { + .arch_min = 14, + .arch_max = 14, + .hwdev = &panthor_hw_arch_v14, + }, +}; + static char *get_gpu_model_name(struct panthor_device *ptdev) { const u32 gpu_id = ptdev->gpu_info.gpu_id; @@ -55,6 +98,12 @@ static char *get_gpu_model_name(struct panthor_device *ptdev) fallthrough; case GPU_PROD_ID_MAKE(13, 1): return "Mali-G625"; + case GPU_PROD_ID_MAKE(14, 0): + return "Mali-G1-Ultra"; + case GPU_PROD_ID_MAKE(14, 1): + return "Mali-G1-Premium"; + case GPU_PROD_ID_MAKE(14, 3): + return "Mali-G1-Pro"; } return "(Unknown Mali GPU)"; @@ -64,7 +113,6 @@ static void panthor_gpu_info_init(struct panthor_device *ptdev) { unsigned int i; - ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); @@ -82,12 +130,19 @@ static void panthor_gpu_info_init(struct panthor_device *ptdev) ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); - ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); - ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); - ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); - /* Introduced in arch 11.x */ ptdev->gpu_info.gpu_features = gpu_read64(ptdev, GPU_FEATURES); + + if (panthor_hw_has_pwr_ctrl(ptdev)) { + /* Introduced in arch 14.x */ + ptdev->gpu_info.l2_present = gpu_read64(ptdev, PWR_L2_PRESENT); + ptdev->gpu_info.tiler_present = gpu_read64(ptdev, PWR_TILER_PRESENT); + ptdev->gpu_info.shader_present = gpu_read64(ptdev, PWR_SHADER_PRESENT); + } else { + ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); + ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); + ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); + } } static void panthor_hw_info_init(struct panthor_device *ptdev) @@ -119,8 +174,50 @@ static void panthor_hw_info_init(struct panthor_device *ptdev) ptdev->gpu_info.tiler_present); } +static int panthor_hw_bind_device(struct panthor_device *ptdev) +{ + struct panthor_hw *hdev = NULL; + const u32 arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); + int i = 0; + + for (i = 0; i < ARRAY_SIZE(panthor_hw_match); i++) { + struct panthor_hw_entry *entry = &panthor_hw_match[i]; + + if (arch_major >= entry->arch_min && arch_major <= entry->arch_max) { + hdev = entry->hwdev; + break; + } + } + + if (!hdev) + return -EOPNOTSUPP; + + ptdev->hw = hdev; + + return 0; +} + +static int panthor_hw_gpu_id_init(struct panthor_device *ptdev) +{ + ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); + if (!ptdev->gpu_info.gpu_id) + return -ENXIO; + + return 0; +} + int panthor_hw_init(struct panthor_device *ptdev) { + int ret = 0; + + ret = panthor_hw_gpu_id_init(ptdev); + if (ret) + return ret; + + ret = panthor_hw_bind_device(ptdev); + if (ret) + return ret; + panthor_hw_info_init(ptdev); return 0; diff --git a/drivers/gpu/drm/panthor/panthor_hw.h b/drivers/gpu/drm/panthor/panthor_hw.h index 0af6acc6aa6a..56c68c1e9c26 100644 --- a/drivers/gpu/drm/panthor/panthor_hw.h +++ b/drivers/gpu/drm/panthor/panthor_hw.h @@ -4,8 +4,53 @@ #ifndef __PANTHOR_HW_H__ #define __PANTHOR_HW_H__ -struct panthor_device; +#include "panthor_device.h" +#include "panthor_regs.h" + +/** + * struct panthor_hw_ops - HW operations that are specific to a GPU + */ +struct panthor_hw_ops { + /** @soft_reset: Soft reset function pointer */ + int (*soft_reset)(struct panthor_device *ptdev); + + /** @l2_power_off: L2 power off function pointer */ + void (*l2_power_off)(struct panthor_device *ptdev); + + /** @l2_power_on: L2 power on function pointer */ + int (*l2_power_on)(struct panthor_device *ptdev); +}; + +/** + * struct panthor_hw - GPU specific register mapping and functions + */ +struct panthor_hw { + /** @features: Bitmap containing panthor_hw_feature */ + + /** @ops: Panthor HW specific operations */ + struct panthor_hw_ops ops; +}; int panthor_hw_init(struct panthor_device *ptdev); +static inline int panthor_hw_soft_reset(struct panthor_device *ptdev) +{ + return ptdev->hw->ops.soft_reset(ptdev); +} + +static inline int panthor_hw_l2_power_on(struct panthor_device *ptdev) +{ + return ptdev->hw->ops.l2_power_on(ptdev); +} + +static inline void panthor_hw_l2_power_off(struct panthor_device *ptdev) +{ + ptdev->hw->ops.l2_power_off(ptdev); +} + +static inline bool panthor_hw_has_pwr_ctrl(struct panthor_device *ptdev) +{ + return GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id) >= 14; +} + #endif /* __PANTHOR_HW_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 478ea98db95c..d4839d282689 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -904,10 +904,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; struct io_pgtable_ops *ops = vm->pgtbl_ops; + u64 start_iova = iova; u64 offset = 0; - drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); - while (offset < size) { size_t unmapped_sz = 0, pgcount; size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); @@ -922,6 +921,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) panthor_vm_flush_range(vm, iova, offset + unmapped_sz); return -EINVAL; } + + drm_dbg(&ptdev->base, + "unmap: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pgcnt=%zu, pgsz=%zu", + vm->as.id, start_iova, size, iova + offset, + unmapped_sz / pgsize, pgsize); + offset += unmapped_sz; } @@ -937,6 +942,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, struct scatterlist *sgl; struct io_pgtable_ops *ops = vm->pgtbl_ops; u64 start_iova = iova; + u64 start_size = size; int ret; if (!size) @@ -956,15 +962,18 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, len = min_t(size_t, len, size); size -= len; - drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx", - vm->as.id, iova, &paddr, len); - while (len) { size_t pgcount, mapped = 0; size_t pgsize = get_pgsize(iova | paddr, len, &pgcount); ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_KERNEL, &mapped); + + drm_dbg(&ptdev->base, + "map: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pa=%pad, pgcnt=%zu, pgsz=%zu", + vm->as.id, start_iova, start_size, iova, &paddr, + mapped / pgsize, pgsize); + iova += mapped; paddr += mapped; len -= mapped; diff --git a/drivers/gpu/drm/panthor/panthor_pwr.c b/drivers/gpu/drm/panthor/panthor_pwr.c new file mode 100644 index 000000000000..57cfc7ce715b --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_pwr.c @@ -0,0 +1,549 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT +/* Copyright 2025 ARM Limited. All rights reserved. */ + +#include <linux/platform_device.h> +#include <linux/interrupt.h> +#include <linux/cleanup.h> +#include <linux/iopoll.h> +#include <linux/wait.h> + +#include <drm/drm_managed.h> +#include <drm/drm_print.h> + +#include "panthor_device.h" +#include "panthor_hw.h" +#include "panthor_pwr.h" +#include "panthor_regs.h" + +#define PWR_INTERRUPTS_MASK \ + (PWR_IRQ_POWER_CHANGED_SINGLE | \ + PWR_IRQ_POWER_CHANGED_ALL | \ + PWR_IRQ_DELEGATION_CHANGED | \ + PWR_IRQ_RESET_COMPLETED | \ + PWR_IRQ_RETRACT_COMPLETED | \ + PWR_IRQ_INSPECT_COMPLETED | \ + PWR_IRQ_COMMAND_NOT_ALLOWED | \ + PWR_IRQ_COMMAND_INVALID) + +#define PWR_ALL_CORES_MASK GENMASK_U64(63, 0) + +#define PWR_DOMAIN_MAX_BITS 16 + +#define PWR_TRANSITION_TIMEOUT_US (2ULL * USEC_PER_SEC) + +#define PWR_RETRACT_TIMEOUT_US (2ULL * USEC_PER_MSEC) + +#define PWR_RESET_TIMEOUT_MS 500 + +/** + * struct panthor_pwr - PWR_CONTROL block management data. + */ +struct panthor_pwr { + /** @irq: PWR irq. */ + struct panthor_irq irq; + + /** @reqs_lock: Lock protecting access to pending_reqs. */ + spinlock_t reqs_lock; + + /** @pending_reqs: Pending PWR requests. */ + u32 pending_reqs; + + /** @reqs_acked: PWR request wait queue. */ + wait_queue_head_t reqs_acked; +}; + +static void panthor_pwr_irq_handler(struct panthor_device *ptdev, u32 status) +{ + spin_lock(&ptdev->pwr->reqs_lock); + gpu_write(ptdev, PWR_INT_CLEAR, status); + + if (unlikely(status & PWR_IRQ_COMMAND_NOT_ALLOWED)) + drm_err(&ptdev->base, "PWR_IRQ: COMMAND_NOT_ALLOWED"); + + if (unlikely(status & PWR_IRQ_COMMAND_INVALID)) + drm_err(&ptdev->base, "PWR_IRQ: COMMAND_INVALID"); + + if (status & ptdev->pwr->pending_reqs) { + ptdev->pwr->pending_reqs &= ~status; + wake_up_all(&ptdev->pwr->reqs_acked); + } + spin_unlock(&ptdev->pwr->reqs_lock); +} +PANTHOR_IRQ_HANDLER(pwr, PWR, panthor_pwr_irq_handler); + +static void panthor_pwr_write_command(struct panthor_device *ptdev, u32 command, u64 args) +{ + if (args) + gpu_write64(ptdev, PWR_CMDARG, args); + + gpu_write(ptdev, PWR_COMMAND, command); +} + +static bool reset_irq_raised(struct panthor_device *ptdev) +{ + return gpu_read(ptdev, PWR_INT_RAWSTAT) & PWR_IRQ_RESET_COMPLETED; +} + +static bool reset_pending(struct panthor_device *ptdev) +{ + return (ptdev->pwr->pending_reqs & PWR_IRQ_RESET_COMPLETED); +} + +static int panthor_pwr_reset(struct panthor_device *ptdev, u32 reset_cmd) +{ + scoped_guard(spinlock_irqsave, &ptdev->pwr->reqs_lock) { + if (reset_pending(ptdev)) { + drm_WARN(&ptdev->base, 1, "Reset already pending"); + } else { + ptdev->pwr->pending_reqs |= PWR_IRQ_RESET_COMPLETED; + gpu_write(ptdev, PWR_INT_CLEAR, PWR_IRQ_RESET_COMPLETED); + panthor_pwr_write_command(ptdev, reset_cmd, 0); + } + } + + if (!wait_event_timeout(ptdev->pwr->reqs_acked, !reset_pending(ptdev), + msecs_to_jiffies(PWR_RESET_TIMEOUT_MS))) { + guard(spinlock_irqsave)(&ptdev->pwr->reqs_lock); + + if (reset_pending(ptdev) && !reset_irq_raised(ptdev)) { + drm_err(&ptdev->base, "RESET timed out (0x%x)", reset_cmd); + return -ETIMEDOUT; + } + + ptdev->pwr->pending_reqs &= ~PWR_IRQ_RESET_COMPLETED; + } + + return 0; +} + +static const char *get_domain_name(u8 domain) +{ + switch (domain) { + case PWR_COMMAND_DOMAIN_L2: + return "L2"; + case PWR_COMMAND_DOMAIN_TILER: + return "Tiler"; + case PWR_COMMAND_DOMAIN_SHADER: + return "Shader"; + case PWR_COMMAND_DOMAIN_BASE: + return "Base"; + case PWR_COMMAND_DOMAIN_STACK: + return "Stack"; + } + return "Unknown"; +} + +static u32 get_domain_base(u8 domain) +{ + switch (domain) { + case PWR_COMMAND_DOMAIN_L2: + return PWR_L2_PRESENT; + case PWR_COMMAND_DOMAIN_TILER: + return PWR_TILER_PRESENT; + case PWR_COMMAND_DOMAIN_SHADER: + return PWR_SHADER_PRESENT; + case PWR_COMMAND_DOMAIN_BASE: + return PWR_BASE_PRESENT; + case PWR_COMMAND_DOMAIN_STACK: + return PWR_STACK_PRESENT; + } + return 0; +} + +static u32 get_domain_ready_reg(u32 domain) +{ + return get_domain_base(domain) + (PWR_L2_READY - PWR_L2_PRESENT); +} + +static u32 get_domain_pwrtrans_reg(u32 domain) +{ + return get_domain_base(domain) + (PWR_L2_PWRTRANS - PWR_L2_PRESENT); +} + +static bool is_valid_domain(u32 domain) +{ + return get_domain_base(domain) != 0; +} + +static bool has_rtu(struct panthor_device *ptdev) +{ + return ptdev->gpu_info.gpu_features & GPU_FEATURES_RAY_TRAVERSAL; +} + +static u8 get_domain_subdomain(struct panthor_device *ptdev, u32 domain) +{ + if (domain == PWR_COMMAND_DOMAIN_SHADER && has_rtu(ptdev)) + return PWR_COMMAND_SUBDOMAIN_RTU; + + return 0; +} + +static int panthor_pwr_domain_wait_transition(struct panthor_device *ptdev, u32 domain, + u32 timeout_us) +{ + u32 pwrtrans_reg = get_domain_pwrtrans_reg(domain); + u64 val; + int ret = 0; + + ret = gpu_read64_poll_timeout(ptdev, pwrtrans_reg, val, !(PWR_ALL_CORES_MASK & val), 100, + timeout_us); + if (ret) { + drm_err(&ptdev->base, "%s domain power in transition, pwrtrans(0x%llx)", + get_domain_name(domain), val); + return ret; + } + + return 0; +} + +static void panthor_pwr_debug_info_show(struct panthor_device *ptdev) +{ + drm_info(&ptdev->base, "GPU_FEATURES: 0x%016llx", gpu_read64(ptdev, GPU_FEATURES)); + drm_info(&ptdev->base, "PWR_STATUS: 0x%016llx", gpu_read64(ptdev, PWR_STATUS)); + drm_info(&ptdev->base, "L2_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_L2_PRESENT)); + drm_info(&ptdev->base, "L2_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_L2_PWRTRANS)); + drm_info(&ptdev->base, "L2_READY: 0x%016llx", gpu_read64(ptdev, PWR_L2_READY)); + drm_info(&ptdev->base, "TILER_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_TILER_PRESENT)); + drm_info(&ptdev->base, "TILER_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_TILER_PWRTRANS)); + drm_info(&ptdev->base, "TILER_READY: 0x%016llx", gpu_read64(ptdev, PWR_TILER_READY)); + drm_info(&ptdev->base, "SHADER_PRESENT: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_PRESENT)); + drm_info(&ptdev->base, "SHADER_PWRTRANS: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_PWRTRANS)); + drm_info(&ptdev->base, "SHADER_READY: 0x%016llx", gpu_read64(ptdev, PWR_SHADER_READY)); +} + +static int panthor_pwr_domain_transition(struct panthor_device *ptdev, u32 cmd, u32 domain, + u64 mask, u32 timeout_us) +{ + u32 ready_reg = get_domain_ready_reg(domain); + u32 pwr_cmd = PWR_COMMAND_DEF(cmd, domain, get_domain_subdomain(ptdev, domain)); + u64 expected_val = 0; + u64 val; + int ret = 0; + + if (drm_WARN_ON(&ptdev->base, !is_valid_domain(domain))) + return -EINVAL; + + switch (cmd) { + case PWR_COMMAND_POWER_DOWN: + expected_val = 0; + break; + case PWR_COMMAND_POWER_UP: + expected_val = mask; + break; + default: + drm_err(&ptdev->base, "Invalid power domain transition command (0x%x)", cmd); + return -EINVAL; + } + + ret = panthor_pwr_domain_wait_transition(ptdev, domain, timeout_us); + if (ret) + return ret; + + /* domain already in target state, return early */ + if ((gpu_read64(ptdev, ready_reg) & mask) == expected_val) + return 0; + + panthor_pwr_write_command(ptdev, pwr_cmd, mask); + + ret = gpu_read64_poll_timeout(ptdev, ready_reg, val, (mask & val) == expected_val, 100, + timeout_us); + if (ret) { + drm_err(&ptdev->base, + "timeout waiting on %s power domain transition, cmd(0x%x), arg(0x%llx)", + get_domain_name(domain), pwr_cmd, mask); + panthor_pwr_debug_info_show(ptdev); + return ret; + } + + return 0; +} + +#define panthor_pwr_domain_power_off(__ptdev, __domain, __mask, __timeout_us) \ + panthor_pwr_domain_transition(__ptdev, PWR_COMMAND_POWER_DOWN, __domain, __mask, \ + __timeout_us) + +#define panthor_pwr_domain_power_on(__ptdev, __domain, __mask, __timeout_us) \ + panthor_pwr_domain_transition(__ptdev, PWR_COMMAND_POWER_UP, __domain, __mask, __timeout_us) + +/** + * retract_domain() - Retract control of a domain from MCU + * @ptdev: Device. + * @domain: Domain to retract the control + * + * Retracting L2 domain is not expected since it won't be delegated. + * + * Return: 0 on success or retracted already. + * -EPERM if domain is L2. + * A negative error code otherwise. + */ +static int retract_domain(struct panthor_device *ptdev, u32 domain) +{ + const u32 pwr_cmd = PWR_COMMAND_DEF(PWR_COMMAND_RETRACT, domain, 0); + const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS); + const u64 delegated_mask = PWR_STATUS_DOMAIN_DELEGATED(domain); + const u64 allow_mask = PWR_STATUS_DOMAIN_ALLOWED(domain); + u64 val; + int ret; + + if (drm_WARN_ON(&ptdev->base, domain == PWR_COMMAND_DOMAIN_L2)) + return -EPERM; + + ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, !(PWR_STATUS_RETRACT_PENDING & val), + 0, PWR_RETRACT_TIMEOUT_US); + if (ret) { + drm_err(&ptdev->base, "%s domain retract pending", get_domain_name(domain)); + return ret; + } + + if (!(pwr_status & delegated_mask)) { + drm_dbg(&ptdev->base, "%s domain already retracted", get_domain_name(domain)); + return 0; + } + + panthor_pwr_write_command(ptdev, pwr_cmd, 0); + + /* + * On successful retraction + * allow-flag will be set with delegated-flag being cleared. + */ + ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, + ((delegated_mask | allow_mask) & val) == allow_mask, 10, + PWR_TRANSITION_TIMEOUT_US); + if (ret) { + drm_err(&ptdev->base, "Retracting %s domain timeout, cmd(0x%x)", + get_domain_name(domain), pwr_cmd); + return ret; + } + + return 0; +} + +/** + * delegate_domain() - Delegate control of a domain to MCU + * @ptdev: Device. + * @domain: Domain to delegate the control + * + * Delegating L2 domain is prohibited. + * + * Return: + * * 0 on success or delegated already. + * * -EPERM if domain is L2. + * * A negative error code otherwise. + */ +static int delegate_domain(struct panthor_device *ptdev, u32 domain) +{ + const u32 pwr_cmd = PWR_COMMAND_DEF(PWR_COMMAND_DELEGATE, domain, 0); + const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS); + const u64 allow_mask = PWR_STATUS_DOMAIN_ALLOWED(domain); + const u64 delegated_mask = PWR_STATUS_DOMAIN_DELEGATED(domain); + u64 val; + int ret; + + if (drm_WARN_ON(&ptdev->base, domain == PWR_COMMAND_DOMAIN_L2)) + return -EPERM; + + /* Already delegated, exit early */ + if (pwr_status & delegated_mask) + return 0; + + /* Check if the command is allowed before delegating. */ + if (!(pwr_status & allow_mask)) { + drm_warn(&ptdev->base, "Delegating %s domain not allowed", get_domain_name(domain)); + return -EPERM; + } + + ret = panthor_pwr_domain_wait_transition(ptdev, domain, PWR_TRANSITION_TIMEOUT_US); + if (ret) + return ret; + + panthor_pwr_write_command(ptdev, pwr_cmd, 0); + + /* + * On successful delegation + * allow-flag will be cleared with delegated-flag being set. + */ + ret = gpu_read64_poll_timeout(ptdev, PWR_STATUS, val, + ((delegated_mask | allow_mask) & val) == delegated_mask, + 10, PWR_TRANSITION_TIMEOUT_US); + if (ret) { + drm_err(&ptdev->base, "Delegating %s domain timeout, cmd(0x%x)", + get_domain_name(domain), pwr_cmd); + return ret; + } + + return 0; +} + +static int panthor_pwr_delegate_domains(struct panthor_device *ptdev) +{ + int ret; + + if (!ptdev->pwr) + return 0; + + ret = delegate_domain(ptdev, PWR_COMMAND_DOMAIN_SHADER); + if (ret) + return ret; + + ret = delegate_domain(ptdev, PWR_COMMAND_DOMAIN_TILER); + if (ret) + goto err_retract_shader; + + return 0; + +err_retract_shader: + retract_domain(ptdev, PWR_COMMAND_DOMAIN_SHADER); + + return ret; +} + +/** + * panthor_pwr_domain_force_off - Forcefully power down a domain. + * @ptdev: Device. + * @domain: Domain to forcefully power down. + * + * This function will attempt to retract and power off the requested power + * domain. However, if retraction fails, the operation is aborted. If power off + * fails, the domain will remain retracted and under the host control. + * + * Return: 0 on success or a negative error code on failure. + */ +static int panthor_pwr_domain_force_off(struct panthor_device *ptdev, u32 domain) +{ + const u64 domain_ready = gpu_read64(ptdev, get_domain_ready_reg(domain)); + int ret; + + /* Domain already powered down, early exit. */ + if (!domain_ready) + return 0; + + /* Domain has to be in host control to issue power off command. */ + ret = retract_domain(ptdev, domain); + if (ret) + return ret; + + return panthor_pwr_domain_power_off(ptdev, domain, domain_ready, PWR_TRANSITION_TIMEOUT_US); +} + +void panthor_pwr_unplug(struct panthor_device *ptdev) +{ + unsigned long flags; + + if (!ptdev->pwr) + return; + + /* Make sure the IRQ handler is not running after that point. */ + panthor_pwr_irq_suspend(&ptdev->pwr->irq); + + /* Wake-up all waiters. */ + spin_lock_irqsave(&ptdev->pwr->reqs_lock, flags); + ptdev->pwr->pending_reqs = 0; + wake_up_all(&ptdev->pwr->reqs_acked); + spin_unlock_irqrestore(&ptdev->pwr->reqs_lock, flags); +} + +int panthor_pwr_init(struct panthor_device *ptdev) +{ + struct panthor_pwr *pwr; + int err, irq; + + if (!panthor_hw_has_pwr_ctrl(ptdev)) + return 0; + + pwr = drmm_kzalloc(&ptdev->base, sizeof(*pwr), GFP_KERNEL); + if (!pwr) + return -ENOMEM; + + spin_lock_init(&pwr->reqs_lock); + init_waitqueue_head(&pwr->reqs_acked); + ptdev->pwr = pwr; + + irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); + if (irq < 0) + return irq; + + err = panthor_request_pwr_irq(ptdev, &pwr->irq, irq, PWR_INTERRUPTS_MASK); + if (err) + return err; + + return 0; +} + +int panthor_pwr_reset_soft(struct panthor_device *ptdev) +{ + if (!(gpu_read64(ptdev, PWR_STATUS) & PWR_STATUS_ALLOW_SOFT_RESET)) { + drm_err(&ptdev->base, "RESET_SOFT not allowed"); + return -EOPNOTSUPP; + } + + return panthor_pwr_reset(ptdev, PWR_COMMAND_RESET_SOFT); +} + +void panthor_pwr_l2_power_off(struct panthor_device *ptdev) +{ + const u64 l2_allow_mask = PWR_STATUS_DOMAIN_ALLOWED(PWR_COMMAND_DOMAIN_L2); + const u64 pwr_status = gpu_read64(ptdev, PWR_STATUS); + + /* Abort if L2 power off constraints are not satisfied */ + if (!(pwr_status & l2_allow_mask)) { + drm_warn(&ptdev->base, "Power off L2 domain not allowed"); + return; + } + + /* It is expected that when halting the MCU, it would power down its + * delegated domains. However, an unresponsive or hung MCU may not do + * so, which is why we need to check and retract the domains back into + * host control to be powered down in the right order before powering + * down the L2. + */ + if (panthor_pwr_domain_force_off(ptdev, PWR_COMMAND_DOMAIN_TILER)) + return; + + if (panthor_pwr_domain_force_off(ptdev, PWR_COMMAND_DOMAIN_SHADER)) + return; + + panthor_pwr_domain_power_off(ptdev, PWR_COMMAND_DOMAIN_L2, ptdev->gpu_info.l2_present, + PWR_TRANSITION_TIMEOUT_US); +} + +int panthor_pwr_l2_power_on(struct panthor_device *ptdev) +{ + const u32 pwr_status = gpu_read64(ptdev, PWR_STATUS); + const u32 l2_allow_mask = PWR_STATUS_DOMAIN_ALLOWED(PWR_COMMAND_DOMAIN_L2); + int ret; + + if ((pwr_status & l2_allow_mask) == 0) { + drm_warn(&ptdev->base, "Power on L2 domain not allowed"); + return -EPERM; + } + + ret = panthor_pwr_domain_power_on(ptdev, PWR_COMMAND_DOMAIN_L2, ptdev->gpu_info.l2_present, + PWR_TRANSITION_TIMEOUT_US); + if (ret) + return ret; + + /* Delegate control of the shader and tiler power domains to the MCU as + * it can better manage which shader/tiler cores need to be powered up + * or can be powered down based on currently running jobs. + * + * If the shader and tiler domains are already delegated to the MCU, + * this call would just return early. + */ + return panthor_pwr_delegate_domains(ptdev); +} + +void panthor_pwr_suspend(struct panthor_device *ptdev) +{ + if (!ptdev->pwr) + return; + + panthor_pwr_irq_suspend(&ptdev->pwr->irq); +} + +void panthor_pwr_resume(struct panthor_device *ptdev) +{ + if (!ptdev->pwr) + return; + + panthor_pwr_irq_resume(&ptdev->pwr->irq, PWR_INTERRUPTS_MASK); +} diff --git a/drivers/gpu/drm/panthor/panthor_pwr.h b/drivers/gpu/drm/panthor/panthor_pwr.h new file mode 100644 index 000000000000..adf1f6136abc --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_pwr.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 ARM Limited. All rights reserved. */ + +#ifndef __PANTHOR_PWR_H__ +#define __PANTHOR_PWR_H__ + +struct panthor_device; + +void panthor_pwr_unplug(struct panthor_device *ptdev); + +int panthor_pwr_init(struct panthor_device *ptdev); + +int panthor_pwr_reset_soft(struct panthor_device *ptdev); + +void panthor_pwr_l2_power_off(struct panthor_device *ptdev); + +int panthor_pwr_l2_power_on(struct panthor_device *ptdev); + +void panthor_pwr_suspend(struct panthor_device *ptdev); + +void panthor_pwr_resume(struct panthor_device *ptdev); + +#endif /* __PANTHOR_PWR_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index 8fa69f33e911..08bf06c452d6 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -74,6 +74,7 @@ #define GPU_FEATURES 0x60 #define GPU_FEATURES_RAY_INTERSECTION BIT(2) +#define GPU_FEATURES_RAY_TRAVERSAL BIT(5) #define GPU_TIMESTAMP_OFFSET 0x88 #define GPU_CYCLE_COUNT 0x90 @@ -209,4 +210,82 @@ #define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000)) #define CSF_GLB_DOORBELL_ID 0 +/* PWR Control registers */ + +#define PWR_CONTROL_BASE 0x800 +#define PWR_CTRL_REG(x) (PWR_CONTROL_BASE + (x)) + +#define PWR_INT_RAWSTAT PWR_CTRL_REG(0x0) +#define PWR_INT_CLEAR PWR_CTRL_REG(0x4) +#define PWR_INT_MASK PWR_CTRL_REG(0x8) +#define PWR_INT_STAT PWR_CTRL_REG(0xc) +#define PWR_IRQ_POWER_CHANGED_SINGLE BIT(0) +#define PWR_IRQ_POWER_CHANGED_ALL BIT(1) +#define PWR_IRQ_DELEGATION_CHANGED BIT(2) +#define PWR_IRQ_RESET_COMPLETED BIT(3) +#define PWR_IRQ_RETRACT_COMPLETED BIT(4) +#define PWR_IRQ_INSPECT_COMPLETED BIT(5) +#define PWR_IRQ_COMMAND_NOT_ALLOWED BIT(30) +#define PWR_IRQ_COMMAND_INVALID BIT(31) + +#define PWR_STATUS PWR_CTRL_REG(0x20) +#define PWR_STATUS_ALLOW_L2 BIT_U64(0) +#define PWR_STATUS_ALLOW_TILER BIT_U64(1) +#define PWR_STATUS_ALLOW_SHADER BIT_U64(8) +#define PWR_STATUS_ALLOW_BASE BIT_U64(14) +#define PWR_STATUS_ALLOW_STACK BIT_U64(15) +#define PWR_STATUS_DOMAIN_ALLOWED(x) BIT_U64(x) +#define PWR_STATUS_DELEGATED_L2 BIT_U64(16) +#define PWR_STATUS_DELEGATED_TILER BIT_U64(17) +#define PWR_STATUS_DELEGATED_SHADER BIT_U64(24) +#define PWR_STATUS_DELEGATED_BASE BIT_U64(30) +#define PWR_STATUS_DELEGATED_STACK BIT_U64(31) +#define PWR_STATUS_DELEGATED_SHIFT 16 +#define PWR_STATUS_DOMAIN_DELEGATED(x) BIT_U64((x) + PWR_STATUS_DELEGATED_SHIFT) +#define PWR_STATUS_ALLOW_SOFT_RESET BIT_U64(33) +#define PWR_STATUS_ALLOW_FAST_RESET BIT_U64(34) +#define PWR_STATUS_POWER_PENDING BIT_U64(41) +#define PWR_STATUS_RESET_PENDING BIT_U64(42) +#define PWR_STATUS_RETRACT_PENDING BIT_U64(43) +#define PWR_STATUS_INSPECT_PENDING BIT_U64(44) + +#define PWR_COMMAND PWR_CTRL_REG(0x28) +#define PWR_COMMAND_POWER_UP 0x10 +#define PWR_COMMAND_POWER_DOWN 0x11 +#define PWR_COMMAND_DELEGATE 0x20 +#define PWR_COMMAND_RETRACT 0x21 +#define PWR_COMMAND_RESET_SOFT 0x31 +#define PWR_COMMAND_RESET_FAST 0x32 +#define PWR_COMMAND_INSPECT 0xF0 +#define PWR_COMMAND_DOMAIN_L2 0 +#define PWR_COMMAND_DOMAIN_TILER 1 +#define PWR_COMMAND_DOMAIN_SHADER 8 +#define PWR_COMMAND_DOMAIN_BASE 14 +#define PWR_COMMAND_DOMAIN_STACK 15 +#define PWR_COMMAND_SUBDOMAIN_RTU BIT(0) +#define PWR_COMMAND_DEF(cmd, domain, subdomain) \ + (((subdomain) << 16) | ((domain) << 8) | (cmd)) + +#define PWR_CMDARG PWR_CTRL_REG(0x30) + +#define PWR_L2_PRESENT PWR_CTRL_REG(0x100) +#define PWR_L2_READY PWR_CTRL_REG(0x108) +#define PWR_L2_PWRTRANS PWR_CTRL_REG(0x110) +#define PWR_L2_PWRACTIVE PWR_CTRL_REG(0x118) +#define PWR_TILER_PRESENT PWR_CTRL_REG(0x140) +#define PWR_TILER_READY PWR_CTRL_REG(0x148) +#define PWR_TILER_PWRTRANS PWR_CTRL_REG(0x150) +#define PWR_TILER_PWRACTIVE PWR_CTRL_REG(0x158) +#define PWR_SHADER_PRESENT PWR_CTRL_REG(0x200) +#define PWR_SHADER_READY PWR_CTRL_REG(0x208) +#define PWR_SHADER_PWRTRANS PWR_CTRL_REG(0x210) +#define PWR_SHADER_PWRACTIVE PWR_CTRL_REG(0x218) +#define PWR_BASE_PRESENT PWR_CTRL_REG(0x380) +#define PWR_BASE_READY PWR_CTRL_REG(0x388) +#define PWR_BASE_PWRTRANS PWR_CTRL_REG(0x390) +#define PWR_BASE_PWRACTIVE PWR_CTRL_REG(0x398) +#define PWR_STACK_PRESENT PWR_CTRL_REG(0x3c0) +#define PWR_STACK_READY PWR_CTRL_REG(0x3c8) +#define PWR_STACK_PWRTRANS PWR_CTRL_REG(0x3d0) + #endif diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index e74ca071159d..b834123a6560 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -364,17 +364,20 @@ struct panthor_queue { /** @name: DRM scheduler name for this queue. */ char *name; - /** - * @remaining_time: Time remaining before the job timeout expires. - * - * The job timeout is suspended when the queue is not scheduled by the - * FW. Every time we suspend the timer, we need to save the remaining - * time so we can restore it later on. - */ - unsigned long remaining_time; + /** @timeout: Queue timeout related fields. */ + struct { + /** @timeout.work: Work executed when a queue timeout occurs. */ + struct delayed_work work; - /** @timeout_suspended: True if the job timeout was suspended. */ - bool timeout_suspended; + /** + * @timeout.remaining: Time remaining before a queue timeout. + * + * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT. + * When the timer is suspended, it's set to the time remaining when the + * timer was suspended. + */ + unsigned long remaining; + } timeout; /** * @doorbell_id: Doorbell assigned to this queue. @@ -899,6 +902,10 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * if (IS_ERR_OR_NULL(queue)) return; + /* This should have been disabled before that point. */ + drm_WARN_ON(&group->ptdev->base, + disable_delayed_work_sync(&queue->timeout.work)); + if (queue->entity.fence_context) drm_sched_entity_destroy(&queue->entity); @@ -1046,6 +1053,115 @@ group_unbind_locked(struct panthor_group *group) return 0; } +static bool +group_is_idle(struct panthor_group *group) +{ + struct panthor_device *ptdev = group->ptdev; + u32 inactive_queues; + + if (group->csg_id >= 0) + return ptdev->scheduler->csg_slots[group->csg_id].idle; + + inactive_queues = group->idle_queues | group->blocked_queues; + return hweight32(inactive_queues) == group->queue_count; +} + +static void +queue_reset_timeout_locked(struct panthor_queue *queue) +{ + lockdep_assert_held(&queue->fence_ctx.lock); + + if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) { + mod_delayed_work(queue->scheduler.timeout_wq, + &queue->timeout.work, + msecs_to_jiffies(JOB_TIMEOUT_MS)); + } +} + +static bool +group_can_run(struct panthor_group *group) +{ + return group->state != PANTHOR_CS_GROUP_TERMINATED && + group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE && + !group->destroyed && group->fatal_queues == 0 && + !group->timedout; +} + +static bool +queue_timeout_is_suspended(struct panthor_queue *queue) +{ + /* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */ + return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT; +} + +static void +queue_suspend_timeout_locked(struct panthor_queue *queue) +{ + unsigned long qtimeout, now; + struct panthor_group *group; + struct panthor_job *job; + bool timer_was_active; + + lockdep_assert_held(&queue->fence_ctx.lock); + + /* Already suspended, nothing to do. */ + if (queue_timeout_is_suspended(queue)) + return; + + job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs, + struct panthor_job, node); + group = job ? job->group : NULL; + + /* If the queue is blocked and the group is idle, we want the timer to + * keep running because the group can't be unblocked by other queues, + * so it has to come from an external source, and we want to timebox + * this external signalling. + */ + if (group && group_can_run(group) && + (group->blocked_queues & BIT(job->queue_idx)) && + group_is_idle(group)) + return; + + now = jiffies; + qtimeout = queue->timeout.work.timer.expires; + + /* Cancel the timer. */ + timer_was_active = cancel_delayed_work(&queue->timeout.work); + if (!timer_was_active || !job) + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS); + else if (time_after(qtimeout, now)) + queue->timeout.remaining = qtimeout - now; + else + queue->timeout.remaining = 0; + + if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS))) + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS); +} + +static void +queue_suspend_timeout(struct panthor_queue *queue) +{ + spin_lock(&queue->fence_ctx.lock); + queue_suspend_timeout_locked(queue); + spin_unlock(&queue->fence_ctx.lock); +} + +static void +queue_resume_timeout(struct panthor_queue *queue) +{ + spin_lock(&queue->fence_ctx.lock); + + if (queue_timeout_is_suspended(queue)) { + mod_delayed_work(queue->scheduler.timeout_wq, + &queue->timeout.work, + queue->timeout.remaining); + + queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT; + } + + spin_unlock(&queue->fence_ctx.lock); +} + /** * cs_slot_prog_locked() - Program a queue slot * @ptdev: Device. @@ -1084,10 +1200,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) CS_IDLE_EMPTY | CS_STATE_MASK | CS_EXTRACT_EVENT); - if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) { - drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time); - queue->timeout_suspended = false; - } + if (queue->iface.input->insert != queue->iface.input->extract) + queue_resume_timeout(queue); } /** @@ -1114,14 +1228,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) CS_STATE_STOP, CS_STATE_MASK); - /* If the queue is blocked, we want to keep the timeout running, so - * we can detect unbounded waits and kill the group when that happens. - */ - if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) { - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); - queue->timeout_suspended = true; - WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS)); - } + queue_suspend_timeout(queue); return 0; } @@ -1140,11 +1247,13 @@ csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id) { struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; struct panthor_fw_csg_iface *csg_iface; + u64 endpoint_req; lockdep_assert_held(&ptdev->scheduler->lock); csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); - csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28; + endpoint_req = panthor_fw_csg_endpoint_req_get(ptdev, csg_iface); + csg_slot->priority = CSG_EP_REQ_PRIORITY_GET(endpoint_req); } /** @@ -1304,6 +1413,7 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) struct panthor_csg_slot *csg_slot; struct panthor_group *group; u32 queue_mask = 0, i; + u64 endpoint_req; lockdep_assert_held(&ptdev->scheduler->lock); @@ -1330,10 +1440,12 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) csg_iface->input->allow_compute = group->compute_core_mask; csg_iface->input->allow_fragment = group->fragment_core_mask; csg_iface->input->allow_other = group->tiler_core_mask; - csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | - CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | - CSG_EP_REQ_TILER(group->max_tiler_cores) | - CSG_EP_REQ_PRIORITY(priority); + endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | + CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | + CSG_EP_REQ_TILER(group->max_tiler_cores) | + CSG_EP_REQ_PRIORITY(priority); + panthor_fw_csg_endpoint_req_set(ptdev, csg_iface, endpoint_req); + csg_iface->input->config = panthor_vm_as(group->vm); if (group->suspend_buf) @@ -1916,28 +2028,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched, return ctx->group_count == sched->csg_slot_count; } -static bool -group_is_idle(struct panthor_group *group) -{ - struct panthor_device *ptdev = group->ptdev; - u32 inactive_queues; - - if (group->csg_id >= 0) - return ptdev->scheduler->csg_slots[group->csg_id].idle; - - inactive_queues = group->idle_queues | group->blocked_queues; - return hweight32(inactive_queues) == group->queue_count; -} - -static bool -group_can_run(struct panthor_group *group) -{ - return group->state != PANTHOR_CS_GROUP_TERMINATED && - group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE && - !group->destroyed && group->fatal_queues == 0 && - !group->timedout; -} - static void tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx, @@ -2231,9 +2321,9 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c continue; } - panthor_fw_update_reqs(csg_iface, endpoint_req, - CSG_EP_REQ_PRIORITY(new_csg_prio), - CSG_EP_REQ_PRIORITY_MASK); + panthor_fw_csg_endpoint_req_update(ptdev, csg_iface, + CSG_EP_REQ_PRIORITY(new_csg_prio), + CSG_EP_REQ_PRIORITY_MASK); csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, CSG_ENDPOINT_CONFIG); @@ -2619,6 +2709,7 @@ static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) static void queue_stop(struct panthor_queue *queue, struct panthor_job *bad_job) { + disable_delayed_work_sync(&queue->timeout.work); drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); } @@ -2630,6 +2721,7 @@ static void queue_start(struct panthor_queue *queue) list_for_each_entry(job, &queue->scheduler.pending_list, base.list) job->base.s_fence->parent = dma_fence_get(job->done_fence); + enable_delayed_work(&queue->timeout.work); drm_sched_start(&queue->scheduler, 0); } @@ -2696,7 +2788,6 @@ void panthor_sched_suspend(struct panthor_device *ptdev) { struct panthor_scheduler *sched = ptdev->scheduler; struct panthor_csg_slots_upd_ctx upd_ctx; - struct panthor_group *group; u32 suspended_slots; u32 i; @@ -2750,13 +2841,23 @@ void panthor_sched_suspend(struct panthor_device *ptdev) while (slot_mask) { u32 csg_id = ffs(slot_mask) - 1; struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; + struct panthor_group *group = csg_slot->group; /* Terminate command timedout, but the soft-reset will * automatically terminate all active groups, so let's * force the state to halted here. */ - if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED) - csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; + if (group->state != PANTHOR_CS_GROUP_TERMINATED) { + group->state = PANTHOR_CS_GROUP_TERMINATED; + + /* Reset the queue slots manually if the termination + * request failed. + */ + for (i = 0; i < group->queue_count; i++) { + if (group->queues[i]) + cs_slot_reset_locked(ptdev, csg_id, i); + } + } slot_mask &= ~BIT(csg_id); } } @@ -2786,8 +2887,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev) for (i = 0; i < sched->csg_slot_count; i++) { struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; + struct panthor_group *group = csg_slot->group; - group = csg_slot->group; if (!group) continue; @@ -2916,35 +3017,47 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile) xa_unlock(&gpool->xa); } -static void group_sync_upd_work(struct work_struct *work) +static bool queue_check_job_completion(struct panthor_queue *queue) { - struct panthor_group *group = - container_of(work, struct panthor_group, sync_upd_work); + struct panthor_syncobj_64b *syncobj = NULL; struct panthor_job *job, *job_tmp; + bool cookie, progress = false; LIST_HEAD(done_jobs); - u32 queue_idx; - bool cookie; cookie = dma_fence_begin_signalling(); - for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { - struct panthor_queue *queue = group->queues[queue_idx]; - struct panthor_syncobj_64b *syncobj; + spin_lock(&queue->fence_ctx.lock); + list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { + if (!syncobj) { + struct panthor_group *group = job->group; - if (!queue) - continue; + syncobj = group->syncobjs->kmap + + (job->queue_idx * sizeof(*syncobj)); + } - syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj)); + if (syncobj->seqno < job->done_fence->seqno) + break; - spin_lock(&queue->fence_ctx.lock); - list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { - if (syncobj->seqno < job->done_fence->seqno) - break; + list_move_tail(&job->node, &done_jobs); + dma_fence_signal_locked(job->done_fence); + } - list_move_tail(&job->node, &done_jobs); - dma_fence_signal_locked(job->done_fence); - } - spin_unlock(&queue->fence_ctx.lock); + if (list_empty(&queue->fence_ctx.in_flight_jobs)) { + /* If we have no job left, we cancel the timer, and reset remaining + * time to its default so it can be restarted next time + * queue_resume_timeout() is called. + */ + queue_suspend_timeout_locked(queue); + + /* If there's no job pending, we consider it progress to avoid a + * spurious timeout if the timeout handler and the sync update + * handler raced. + */ + progress = true; + } else if (!list_empty(&done_jobs)) { + queue_reset_timeout_locked(queue); + progress = true; } + spin_unlock(&queue->fence_ctx.lock); dma_fence_end_signalling(cookie); list_for_each_entry_safe(job, job_tmp, &done_jobs, node) { @@ -2954,6 +3067,27 @@ static void group_sync_upd_work(struct work_struct *work) panthor_job_put(&job->base); } + return progress; +} + +static void group_sync_upd_work(struct work_struct *work) +{ + struct panthor_group *group = + container_of(work, struct panthor_group, sync_upd_work); + u32 queue_idx; + bool cookie; + + cookie = dma_fence_begin_signalling(); + for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { + struct panthor_queue *queue = group->queues[queue_idx]; + + if (!queue) + continue; + + queue_check_job_completion(queue); + } + dma_fence_end_signalling(cookie); + group_put(group); } @@ -3201,17 +3335,6 @@ queue_run_job(struct drm_sched_job *sched_job) queue->iface.input->insert = job->ringbuf.end; if (group->csg_id < 0) { - /* If the queue is blocked, we want to keep the timeout running, so we - * can detect unbounded waits and kill the group when that happens. - * Otherwise, we suspend the timeout so the time we spend waiting for - * a CSG slot is not counted. - */ - if (!(group->blocked_queues & BIT(job->queue_idx)) && - !queue->timeout_suspended) { - queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); - queue->timeout_suspended = true; - } - group_schedule_locked(group, BIT(job->queue_idx)); } else { gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1); @@ -3220,6 +3343,7 @@ queue_run_job(struct drm_sched_job *sched_job) pm_runtime_get(ptdev->base.dev); sched->pm.has_ref = true; } + queue_resume_timeout(queue); panthor_devfreq_record_busy(sched->ptdev); } @@ -3269,7 +3393,6 @@ queue_timedout_job(struct drm_sched_job *sched_job) mutex_unlock(&sched->lock); queue_start(queue); - return DRM_GPU_SCHED_STAT_RESET; } @@ -3312,6 +3435,17 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev, return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64)); } +static void queue_timeout_work(struct work_struct *work) +{ + struct panthor_queue *queue = container_of(work, struct panthor_queue, + timeout.work.work); + bool progress; + + progress = queue_check_job_completion(queue); + if (!progress) + drm_sched_fault(&queue->scheduler); +} + static struct panthor_queue * group_create_queue(struct panthor_group *group, const struct drm_panthor_queue_create *args, @@ -3328,7 +3462,7 @@ group_create_queue(struct panthor_group *group, * their profiling status. */ .credit_limit = args->ringbuf_size / sizeof(u64), - .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), + .timeout = MAX_SCHEDULE_TIMEOUT, .timeout_wq = group->ptdev->reset.wq, .dev = group->ptdev->base.dev, }; @@ -3350,6 +3484,8 @@ group_create_queue(struct panthor_group *group, if (!queue) return ERR_PTR(-ENOMEM); + queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS); + INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work); queue->fence_ctx.id = dma_fence_context_alloc(1); spin_lock_init(&queue->fence_ctx.lock); INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs); |