diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2025-10-13 09:19:19 +0200 |
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2025-10-13 09:19:19 +0200 |
| commit | 9b966ae42235a88eaea714be09ff3d698535bdfe (patch) | |
| tree | 7470df78fb74fdfda1f773feb3822c1c9ab38616 /drivers/gpu | |
| parent | 5385871282e5c2831c226d32cf2ce26b45a7b164 (diff) | |
| parent | 3a8660878839faadb4f1a6dd72c3179c1df56787 (diff) | |
Merge drm/drm-next into drm-misc-next
Updating drm-misc-next to the state of v6.18-rc1.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu')
578 files changed, 14926 insertions, 10249 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f7ea8e895c0c..7e6bc0b3a589 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -396,9 +396,11 @@ source "drivers/gpu/drm/sprd/Kconfig" source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" - depends on DRM && PCI && HYPERV + depends on DRM && PCI && HYPERV_VMBUS select DRM_CLIENT_SELECTION select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 5ba4ffdb8055..c2672f369aed 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -221,6 +221,7 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ obj-$(CONFIG_DRM_LIMA) += lima/ obj-$(CONFIG_DRM_PANFROST) += panfrost/ obj-$(CONFIG_DRM_PANTHOR) += panthor/ +obj-$(CONFIG_DRM_TYR) += tyr/ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ obj-$(CONFIG_DRM_MCDE) += mcde/ obj-$(CONFIG_DRM_TIDSS) += tidss/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2d0fea87af79..64e7acff8f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -138,7 +138,6 @@ amdgpu-y += \ # add DCE block amdgpu-y += \ dce_v10_0.o \ - dce_v11_0.o \ amdgpu_vkms.o # add GFX block diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 17848ce65d1f..2a0df4cabb99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -63,6 +63,7 @@ #include "kgd_pp_interface.h" #include "amd_shared.h" +#include "amdgpu_utils.h" #include "amdgpu_mode.h" #include "amdgpu_ih.h" #include "amdgpu_irq.h" @@ -434,7 +435,6 @@ struct amdgpu_clock { uint32_t default_mclk; uint32_t default_sclk; uint32_t default_dispclk; - uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; }; @@ -545,7 +545,7 @@ struct amdgpu_wb { * this value can be accessed directly by using the offset as an index. * For the GPU address, it is necessary to use gpu_addr and the offset. */ - volatile uint32_t *wb; + uint32_t *wb; /** * @gpu_addr: @@ -721,7 +721,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, /* VRAM scratch page for HDP bug, default vram page */ struct amdgpu_mem_scratch { struct amdgpu_bo *robj; - volatile uint32_t *ptr; + uint32_t *ptr; u64 gpu_addr; }; @@ -752,6 +752,7 @@ typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, u struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; + struct amdgpu_bo *bo; }; /* Define the HW IP blocks will be used in driver , add more if necessary */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fbe7616555c8..a2879d2b7c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -250,16 +250,24 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) { - if (adev->kfd.dev) - kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + if (adev->kfd.dev) { + if (adev->in_s0ix) + kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); + else + kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + } } int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) { int r = 0; - if (adev->kfd.dev) - r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + if (adev->kfd.dev) { + if (adev->in_s0ix) + r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); + else + r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 127927b16ee2..9e120c934cc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -428,7 +428,9 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, bool retry_fault); @@ -518,11 +520,21 @@ static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return 0; } +static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } +static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 04ef0ca10541..0239114fb6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -352,7 +352,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -449,7 +449,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 6d08bc2781a3..f2278a0937ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -338,7 +338,7 @@ static int hqd_dump_v10_3(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -435,7 +435,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+12) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index e0e6a6a49d90..aaccf0b9947d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -323,7 +323,7 @@ static int hqd_dump_v11(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -420,7 +420,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (7+11+1+12+12) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c index 6f0dc23c901b..e0ceab400b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c @@ -115,7 +115,7 @@ static int hqd_dump_v12(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -146,7 +146,7 @@ static int hqd_sdma_dump_v12(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (last_reg - first_reg + 1) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c3b34a410375..83020963dfde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1089,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); + ret = amdgpu_ttm_tt_get_user_pages(bo, &range); if (ret) { if (ret == -EAGAIN) pr_debug("Failed to get user pages, try again\n"); @@ -1103,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, pr_err("%s: Failed to reserve BO\n", __func__); goto release_out; } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) @@ -2565,8 +2568,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &mem->range); + ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2584,17 +2586,24 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * from the KFD, trigger a segmentation fault in VM debug mode. */ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) { + struct kfd_process *p; + pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n", pid_nr(process_info->pid), mem->va); // Send GPU VM fault to user space - kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid), - mem->va); + p = kfd_lookup_process_by_pid(process_info->pid); + if (p) { + kfd_signal_vm_fault_event_with_userptr(p, mem->va); + kfd_unref_process(p); + } } ret = 0; } + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range); + mutex_lock(&process_info->notifier_lock); /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9dfdc08cc887..763f2b8dcf13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -706,7 +706,6 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) } adev->clock.dp_extclk = le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); - adev->clock.current_dispclk = adev->clock.default_dispclk; adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock); if (adev->clock.max_pixel_clock == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 702f6610d024..66fb37b64388 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -184,43 +184,36 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list) int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, struct drm_amdgpu_bo_list_entry **info_param) { - const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); + const uint32_t bo_info_size = in->bo_info_size; + const uint32_t bo_number = in->bo_number; struct drm_amdgpu_bo_list_entry *info; - int r; - - info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL); - if (!info) - return -ENOMEM; /* copy the handle array from userspace to a kernel buffer */ - r = -EFAULT; - if (likely(info_size == in->bo_info_size)) { - unsigned long bytes = in->bo_number * - in->bo_info_size; - - if (copy_from_user(info, uptr, bytes)) - goto error_free; - + if (likely(info_size == bo_info_size)) { + info = vmemdup_array_user(uptr, bo_number, info_size); + if (IS_ERR(info)) + return PTR_ERR(info); } else { - unsigned long bytes = min(in->bo_info_size, info_size); + const uint32_t bytes = min(bo_info_size, info_size); unsigned i; - memset(info, 0, in->bo_number * info_size); - for (i = 0; i < in->bo_number; ++i) { - if (copy_from_user(&info[i], uptr, bytes)) - goto error_free; + info = kvmalloc_array(bo_number, info_size, GFP_KERNEL); + if (!info) + return -ENOMEM; - uptr += in->bo_info_size; + memset(info, 0, bo_number * info_size); + for (i = 0; i < bo_number; ++i, uptr += bo_info_size) { + if (copy_from_user(&info[i], uptr, bytes)) { + kvfree(info); + return -EFAULT; + } } } *info_param = info; return 0; - -error_free: - kvfree(info); - return r; } int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 555cd6d877c3..a716c9886c74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo *bo; struct amdgpu_bo_va *bo_va; uint32_t priority; - struct page **user_pages; struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index bf38fc69c1cf..47e9bfba0642 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -398,30 +398,28 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, struct drm_display_mode *mode = NULL; struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; int i; - static const struct mode_size { + int n; + struct mode_size { + char name[DRM_DISPLAY_MODE_LEN]; int w; int h; - } common_modes[17] = { - { 640, 480}, - { 720, 480}, - { 800, 600}, - { 848, 480}, - {1024, 768}, - {1152, 768}, - {1280, 720}, - {1280, 800}, - {1280, 854}, - {1280, 960}, - {1280, 1024}, - {1440, 900}, - {1400, 1050}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200} + } common_modes[] = { + { "640x480", 640, 480}, + { "800x600", 800, 600}, + { "1024x768", 1024, 768}, + { "1280x720", 1280, 720}, + { "1280x800", 1280, 800}, + {"1280x1024", 1280, 1024}, + { "1440x900", 1440, 900}, + {"1680x1050", 1680, 1050}, + {"1600x1200", 1600, 1200}, + {"1920x1080", 1920, 1080}, + {"1920x1200", 1920, 1200} }; - for (i = 0; i < 17; i++) { + n = ARRAY_SIZE(common_modes); + + for (i = 0; i < n; i++) { if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) { if (common_modes[i].w > 1024 || common_modes[i].h > 768) @@ -434,12 +432,11 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, common_modes[i].h == native_mode->vdisplay)) continue; } - if (common_modes[i].w < 320 || common_modes[i].h < 200) - continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); if (!mode) return; + strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN); drm_mode_probed_add(connector, mode); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2ac9729e4c86..9cd7741d2254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -29,6 +29,7 @@ #include <linux/pagemap.h> #include <linux/sync_file.h> #include <linux/dma-buf.h> +#include <linux/hmm.h> #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> @@ -178,25 +179,17 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; struct amdgpu_vm *vm = &fpriv->vm; - uint64_t *chunk_array_user; uint64_t *chunk_array; uint32_t uf_offset = 0; size_t size; int ret; int i; - chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), - GFP_KERNEL); - if (!chunk_array) - return -ENOMEM; - - /* get chunks */ - chunk_array_user = u64_to_user_ptr(cs->in.chunks); - if (copy_from_user(chunk_array, chunk_array_user, - sizeof(uint64_t)*cs->in.num_chunks)) { - ret = -EFAULT; - goto free_chunk; - } + chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks), + cs->in.num_chunks, + sizeof(uint64_t)); + if (IS_ERR(chunk_array)) + return PTR_ERR(chunk_array); p->nchunks = cs->in.num_chunks; p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), @@ -209,7 +202,6 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, for (i = 0; i < p->nchunks; i++) { struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; - uint32_t __user *cdata; chunk_ptr = u64_to_user_ptr(chunk_array[i]); if (copy_from_user(&user_chunk, chunk_ptr, @@ -222,20 +214,16 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = u64_to_user_ptr(user_chunk.chunk_data); - p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), - GFP_KERNEL); - if (p->chunks[i].kdata == NULL) { - ret = -ENOMEM; + p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data), + size, + sizeof(uint32_t)); + if (IS_ERR(p->chunks[i].kdata)) { + ret = PTR_ERR(p->chunks[i].kdata); i--; goto free_partial_kdata; } size *= sizeof(uint32_t); - if (copy_from_user(p->chunks[i].kdata, cdata, size)) { - ret = -EFAULT; - goto free_partial_kdata; - } /* Assume the worst on the following checks */ ret = -EINVAL; @@ -286,7 +274,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } } - if (!p->gang_size) { + if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) { ret = -EINVAL; goto free_all_kdata; } @@ -896,26 +884,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - int i; - - e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL); - if (!e->user_pages) { - drm_err(adev_to_drm(p->adev), "kvmalloc_array failure\n"); - r = -ENOMEM; - goto out_free_user_pages; - } - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); - if (r) { - kvfree(e->user_pages); - e->user_pages = NULL; + r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); + if (r) goto out_free_user_pages; - } for (i = 0; i < bo->tbo.ttm->num_pages; i++) { - if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) { userpage_invalidated = true; break; } @@ -959,7 +934,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && - e->user_invalidated && e->user_pages) { + e->user_invalidated) { amdgpu_bo_placement_from_domain(e->bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, @@ -968,11 +943,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, - e->user_pages); + e->range); } - - kvfree(e->user_pages); - e->user_pages = NULL; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -1014,11 +986,7 @@ out_free_user_pages: amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = e->bo; - if (!e->user_pages) - continue; amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); - kvfree(e->user_pages); - e->user_pages = NULL; e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); @@ -1767,30 +1735,21 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, { struct amdgpu_device *adev = drm_to_adev(dev); union drm_amdgpu_wait_fences *wait = data; - uint32_t fence_count = wait->in.fence_count; - struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; /* Get the fences from userspace */ - fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), - GFP_KERNEL); - if (fences == NULL) - return -ENOMEM; - - fences_user = u64_to_user_ptr(wait->in.fences); - if (copy_from_user(fences, fences_user, - sizeof(struct drm_amdgpu_fence) * fence_count)) { - r = -EFAULT; - goto err_free_fences; - } + fences = memdup_array_user(u64_to_user_ptr(wait->in.fences), + wait->in.fence_count, + sizeof(struct drm_amdgpu_fence)); + if (IS_ERR(fences)) + return PTR_ERR(fences); if (wait->in.wait_all) r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences); else r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences); -err_free_fences: kfree(fences); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bdfb80377e6a..7a899fb4de29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5072,6 +5072,10 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) return 0; + /* No need to evict when going to S5 through S4 callbacks */ + if (system_state == SYSTEM_POWER_OFF) + return 0; + ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); if (ret) { dev_warn(adev->dev, "evicting device resources failed\n"); @@ -5196,7 +5200,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) adev->in_suspend = true; if (amdgpu_sriov_vf(adev)) { - if (!adev->in_s0ix && !adev->in_runpm) + if (!adev->in_runpm) amdgpu_amdkfd_suspend_process(adev); amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); @@ -5216,10 +5220,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) { - amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - amdgpu_userq_suspend(adev); - } + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + amdgpu_userq_suspend(adev); r = amdgpu_device_evict_resources(adev); if (r) @@ -5314,15 +5316,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) goto exit; } - if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); - if (r) - goto exit; + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (r) + goto exit; - r = amdgpu_userq_resume(adev); - if (r) - goto exit; - } + r = amdgpu_userq_resume(adev); + if (r) + goto exit; r = amdgpu_device_ip_late_init(adev); if (r) @@ -5335,7 +5335,7 @@ exit: amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); - if (!adev->in_s0ix && !r && !adev->in_runpm) + if (!r && !adev->in_runpm) r = amdgpu_amdkfd_resume_process(adev); } @@ -6389,23 +6389,28 @@ static int amdgpu_device_sched_resume(struct list_head *device_list, if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); - if (tmp_adev->asic_reset_res) - r = tmp_adev->asic_reset_res; - - tmp_adev->asic_reset_res = 0; - - if (r) { + if (tmp_adev->asic_reset_res) { /* bad news, how to tell it to userspace ? * for ras error, we should report GPU bad status instead of * reset failure */ if (reset_context->src != AMDGPU_RESET_SRC_RAS || !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", - atomic_read(&tmp_adev->gpu_reset_counter)); - amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + dev_info( + tmp_adev->dev, + "GPU reset(%d) failed with error %d \n", + atomic_read( + &tmp_adev->gpu_reset_counter), + tmp_adev->asic_reset_res); + amdgpu_vf_error_put(tmp_adev, + AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, + tmp_adev->asic_reset_res); + if (!r) + r = tmp_adev->asic_reset_res; + tmp_adev->asic_reset_res = 0; } else { - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", + atomic_read(&tmp_adev->gpu_reset_counter)); if (amdgpu_acpi_smart_shift_update(tmp_adev, AMDGPU_SS_DEV_D0)) dev_warn(tmp_adev->dev, @@ -6937,7 +6942,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_hive_info *hive __free(xgmi_put_hive) = + amdgpu_get_xgmi_hive(adev); struct amdgpu_reset_context reset_context; struct list_head device_list; @@ -6976,10 +6982,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta amdgpu_device_recovery_get_reset_lock(adev, &device_list); amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, hive, false); - if (hive) { + if (hive) mutex_unlock(&hive->hive_lock); - amdgpu_put_xgmi_hive(hive); - } return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ @@ -7158,28 +7162,35 @@ void amdgpu_pci_resume(struct pci_dev *pdev) static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev) { - struct pci_dev *parent = pci_upstream_bridge(adev->pdev); + struct pci_dev *swus, *swds; int r; - if (parent->vendor != PCI_VENDOR_ID_ATI) + swds = pci_upstream_bridge(adev->pdev); + if (!swds || swds->vendor != PCI_VENDOR_ID_ATI || + pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM) + return; + swus = pci_upstream_bridge(swds); + if (!swus || + (swus->vendor != PCI_VENDOR_ID_ATI && + swus->vendor != PCI_VENDOR_ID_AMD) || + pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM) return; /* If already saved, return */ if (adev->pcie_reset_ctx.swus) return; /* Upstream bridge is ATI, assume it's SWUS/DS architecture */ - r = pci_save_state(parent); + r = pci_save_state(swds); if (r) return; - adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent); + adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds); - parent = pci_upstream_bridge(parent); - r = pci_save_state(parent); + r = pci_save_state(swus); if (r) return; - adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent); + adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus); - adev->pcie_reset_ctx.swus = parent; + adev->pcie_reset_ctx.swus = swus; } static void amdgpu_device_load_switch_state(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ece251cbe8c3..bff25ef3e2d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -960,7 +960,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); */ MODULE_PARM_DESC( freesync_video, - "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); + "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)"); module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); /** @@ -2674,7 +2674,7 @@ static int amdgpu_pmops_thaw(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); /* do not resume device if it's normal hibernation */ - if (!pm_hibernate_is_recovering()) + if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend()) return 0; return amdgpu_device_resume(drm_dev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 91d638098889..b349bb3676d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -70,6 +70,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) [AMDGPU_PL_GWS] = "gws", [AMDGPU_PL_OA] = "oa", [AMDGPU_PL_DOORBELL] = "doorbell", + [AMDGPU_PL_MMIO_REMAP] = "mmioremap", }; unsigned int hw_ip, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 1c9b5009304e..094c508d3d44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -458,6 +458,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, /* always clear VRAM */ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP) + return -EINVAL; + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { @@ -569,8 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &range); + r = amdgpu_ttm_tt_get_user_pages(bo, &range); if (r) goto release_object; @@ -578,6 +580,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto user_pages_done; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 98aa99b314c9..ebe2b4c68b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) + goto failed_kiq_read; + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } @@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) + goto failed_kiq_write; msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); @@ -2280,7 +2285,7 @@ void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) * Return: * return the latest index. */ -u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer) +u32 amdgpu_gfx_csb_preamble_start(u32 *buffer) { u32 count = 0; @@ -2304,7 +2309,7 @@ u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer) * Return: * return the latest index. */ -u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count) +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count) { const struct cs_section_def *sect = NULL; const struct cs_extent_def *ext = NULL; @@ -2331,7 +2336,7 @@ u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, * @buffer: This is an output variable that gets the PACKET3 preamble end. * @count: Index to start set the preemble end. */ -void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count) +void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count) { buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 08f268dab8f5..fb5f7a0ee029 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -642,9 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work); void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring); -u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer); -u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count); -void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count); +u32 amdgpu_gfx_csb_preamble_start(u32 *buffer); +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count); +void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count); void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index e36fede7f74c..2c6a6b858112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -167,13 +167,12 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, - void *owner, struct page **pages, + void *owner, struct hmm_range **phmm_range) { struct hmm_range *hmm_range; unsigned long end; unsigned long timeout; - unsigned long i; unsigned long *pfns; int r = 0; @@ -222,14 +221,6 @@ retry: hmm_range->start = start; hmm_range->hmm_pfns = pfns; - /* - * Due to default_flags, all pages are HMM_PFN_VALID or - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside - * the notifier_lock, and mmu_interval_read_retry() must be done first. - */ - for (i = 0; pages && i < npages; i++) - pages[i] = hmm_pfn_to_page(pfns[i]); - *phmm_range = hmm_range; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index e2edcd010ccc..953e1d06de20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -33,7 +33,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, - void *owner, struct page **pages, + void *owner, struct hmm_range **phmm_range); bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 57101d24422f..9cb72f0c5277 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -184,7 +184,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, snprintf(i2c->adapter.name, sizeof(i2c->adapter.name), "AMDGPU i2c hw bus %s", name); i2c->adapter.algo = &amdgpu_atombios_i2c_algo; - ret = i2c_add_adapter(&i2c->adapter); + ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter); if (ret) goto out_free; } else { @@ -215,15 +215,6 @@ out_free: } -void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) -{ - if (!i2c) - return; - WARN_ON(i2c->has_aux); - i2c_del_adapter(&i2c->adapter); - kfree(i2c); -} - void amdgpu_i2c_init(struct amdgpu_device *adev) { if (!adev->is_atom_fw) { @@ -248,12 +239,9 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev) { int i; - for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) { - if (adev->i2c_bus[i]) { - amdgpu_i2c_destroy(adev->i2c_bus[i]); + for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) + if (adev->i2c_bus[i]) adev->i2c_bus[i] = NULL; - } - } } /* looks up bus based on id */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 5dd78a9cb12d..3ef5bc95642c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->vm_hub; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; - *id = id_mgr->reserved; + *id = vm->reserved_vmid[vmhub]; if ((*id)->owner != vm->immediate.fence_context || !amdgpu_vmid_compatible(*id, job) || (*id)->flushed_updates < updates || @@ -474,40 +473,61 @@ bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) return vm->reserved_vmid[vmhub]; } -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, +/* + * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm + * @adev: amdgpu device structure + * @vm: the VM to reserve an ID for + * @vmhub: the VMHUB which should be used + * + * Mostly used to have a reserved VMID for debugging and SPM. + * + * Returns: 0 for success, -ENOENT if an ID is already reserved. + */ +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id; + int r = 0; mutex_lock(&id_mgr->lock); - - ++id_mgr->reserved_use_count; - if (!id_mgr->reserved) { - struct amdgpu_vmid *id; - - id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, - list); - /* Remove from normal round robin handling */ - list_del_init(&id->list); - id_mgr->reserved = id; + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (id_mgr->reserved_vmid) { + r = -ENOENT; + goto unlock; } - + /* Remove from normal round robin handling */ + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); + list_del_init(&id->list); + vm->reserved_vmid[vmhub] = id; + id_mgr->reserved_vmid = true; mutex_unlock(&id_mgr->lock); + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; } -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, +/* + * amdgpu_vmid_free_reserved - free up a reserved VMID again + * @adev: amdgpu device structure + * @vm: the VM with the reserved ID + * @vmhub: the VMHUB which should be used + */ +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (!--id_mgr->reserved_use_count) { - /* give the reserved ID back to normal round robin */ - list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); - id_mgr->reserved = NULL; + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + id_mgr->reserved_vmid = false; } - mutex_unlock(&id_mgr->lock); } @@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); - id_mgr->reserved_use_count = 0; /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) @@ -594,11 +613,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } - /* alloc a default reserved vmid to enforce isolation */ - for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..b3649cd3af56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; - struct amdgpu_vmid *reserved; - unsigned int reserved_use_count; + bool reserved_vmid; }; int amdgpu_pasid_alloc(unsigned int bits); @@ -79,10 +78,10 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - unsigned vmhub); -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - unsigned vmhub); +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned vmhub); +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 7f7ea046e209..f58b6be7fccc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -56,14 +56,14 @@ struct amdgpu_ih_ring { bool use_bus_addr; struct amdgpu_bo *ring_obj; - volatile uint32_t *ring; + uint32_t *ring; uint64_t gpu_addr; uint64_t wptr_addr; - volatile uint32_t *wptr_cpu; + uint32_t *wptr_cpu; uint64_t rptr_addr; - volatile uint32_t *rptr_cpu; + uint32_t *rptr_cpu; bool enabled; unsigned rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 22da65f45226..6b7d66b6d4cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -540,3 +540,68 @@ void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_pri drm_printf(p, "\nInactive Instance:JPEG%d\n", i); } } + +static inline bool amdgpu_jpeg_reg_valid(u32 reg) +{ + if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END || + (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END)) + return false; + else + return true; +} + +/** + * amdgpu_jpeg_dec_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ + +int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || + !amdgpu_jpeg_reg_valid(reg)) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || + !amdgpu_jpeg_reg_valid(reg)) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 4f0775e39b54..346ae0ab09d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -25,11 +25,18 @@ #define __AMDGPU_JPEG_H__ #include "amdgpu_ras.h" +#include "amdgpu_cs.h" #define AMDGPU_MAX_JPEG_INSTANCES 4 #define AMDGPU_MAX_JPEG_RINGS 10 #define AMDGPU_MAX_JPEG_RINGS_4_0_3 8 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 +#define JPEG_ATOMIC_RANGE_START 0x4120 +#define JPEG_ATOMIC_RANGE_END 0x412A + + #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) @@ -170,5 +177,8 @@ int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev, const struct amdgpu_hwip_reg_entry *reg, u32 count); void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block); void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p); +int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 8a76960803c6..a9327472c651 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -939,6 +939,10 @@ out: if (adev->gfx.config.ta_cntl2_truncate_coord_mode) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; + /* Gang submit is not supported under SRIOV currently */ + if (!amdgpu_sriov_vf(adev)) + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT; + if (amdgpu_passthrough(adev)) dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT << AMDGPU_IDS_FLAGS_MODE_SHIFT) & @@ -1417,14 +1421,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_debugfs_vm_init(file_priv); - r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid); if (r) goto error_pasid; - r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); - if (r) - goto error_vm; - fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); if (!fpriv->prt_va) { r = -ENOMEM; @@ -1464,10 +1464,8 @@ error_vm: amdgpu_vm_fini(adev, &fpriv->vm); error_pasid: - if (pasid) { + if (pasid) amdgpu_pasid_free(pasid); - amdgpu_vm_set_pasid(adev, &fpriv->vm, 0); - } kfree(fpriv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d18bade9c98f..e08f58de4b17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -153,6 +153,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_MMIO_REMAP; + places[c].flags = 0; + c++; + } + if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -1546,6 +1554,8 @@ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) return AMDGPU_PL_OA; case AMDGPU_GEM_DOMAIN_DOORBELL: return AMDGPU_PL_DOORBELL; + case AMDGPU_GEM_DOMAIN_MMIO_REMAP: + return AMDGPU_PL_MMIO_REMAP; default: return TTM_PL_SYSTEM; } @@ -1629,6 +1639,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) case AMDGPU_PL_DOORBELL: placement = "DOORBELL"; break; + case AMDGPU_PL_MMIO_REMAP: + placement = "MMIO REMAP"; + break; case TTM_PL_SYSTEM: default: placement = "CPU"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 87523fcd4386..656b8a931dae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -167,6 +167,8 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) return AMDGPU_GEM_DOMAIN_OA; case AMDGPU_PL_DOORBELL: return AMDGPU_GEM_DOMAIN_DOORBELL; + case AMDGPU_PL_MMIO_REMAP: + return AMDGPU_GEM_DOMAIN_MMIO_REMAP; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3696f48c233b..8c0e5d03de50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -506,7 +506,8 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block) } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_VRAM, + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); @@ -2351,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp) } ret = psp_ta_load(psp, &psp->securedisplay_context.context); - if (!ret) { + if (!ret && !psp->securedisplay_context.context.resp_status) { psp->securedisplay_context.context.initialized = true; mutex_init(&psp->securedisplay_context.mutex); } else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 38face981c3e..6e8aad91bcd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -171,13 +171,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t copy_pos += sizeof(uint32_t); - ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); - if (!ta_bin) - return -ENOMEM; - if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) { - ret = -EFAULT; - goto err_free_bin; - } + ta_bin = memdup_user(&buf[copy_pos], ta_bin_len); + if (IS_ERR(ta_bin)) + return PTR_ERR(ta_bin); /* Set TA context and functions */ set_ta_context_funcs(psp, ta_type, &context); @@ -327,13 +323,9 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size return -EFAULT; copy_pos += sizeof(uint32_t); - shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); - if (!shared_buf) - return -ENOMEM; - if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) { - ret = -EFAULT; - goto err_free_shared_buf; - } + shared_buf = memdup_user(&buf[copy_pos], shared_buf_len); + if (IS_ERR(shared_buf)) + return PTR_ERR(shared_buf); set_ta_context_funcs(psp, ta_type, &context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7fe5b1940df8..e0ee21150860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -219,10 +219,17 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev, struct amdgpu_vram_block_info blk_info; uint64_t page_pfns[32] = {0}; int i, ret, count; + bool hit = false; if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) return 0; + if (amdgpu_sriov_vf(adev)) { + if (amdgpu_virt_check_vf_critical_region(adev, address, &hit)) + return -EPERM; + return hit ? -EACCES : 0; + } + if ((address >= adev->gmc.mc_vram_size) || (address >= RAS_UMC_INJECT_ADDR_LIMIT)) return -EFAULT; @@ -2702,6 +2709,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + unsigned int error_query_mode; enum ras_event_type type; if (hive) { @@ -2730,6 +2738,13 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) device_list_handle = &device_list; } + if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) { + if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) { + /* wait 500ms to ensure pmfw polling mca bank info done */ + msleep(500); + } + } + type = amdgpu_ras_get_fatal_error_event(adev); list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 50fcd86e1033..be2e56ce1355 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -91,6 +91,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, break; case TTM_PL_TT: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: node = to_ttm_range_mgr_node(res)->mm_nodes; while (start >= node->size << PAGE_SHIFT) start -= node++->size << PAGE_SHIFT; @@ -153,6 +154,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) break; case TTM_PL_TT: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: node = cur->node; cur->node = ++node; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 486c3646710c..8f6ce948c684 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -364,7 +364,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* Allocate ring buffer */ if (ring->ring_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, &ring->gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7670f5d82b9e..b6b649179776 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -114,7 +114,7 @@ struct amdgpu_sched { */ struct amdgpu_fence_driver { uint64_t gpu_addr; - volatile uint32_t *cpu_addr; + uint32_t *cpu_addr; /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; @@ -211,7 +211,18 @@ struct amdgpu_ring_funcs { bool support_64bit_ptrs; bool no_user_fence; bool secure_submission_supported; - unsigned extra_dw; + + /** + * @extra_bytes: + * + * Optional extra space in bytes that is added to the ring size + * when allocating the BO that holds the contents of the ring. + * This space isn't used for command submission to the ring, + * but is just there to satisfy some hardware requirements or + * implement workarounds. It's up to the implementation of each + * specific ring to initialize this space. + */ + unsigned extra_bytes; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); @@ -298,7 +309,7 @@ struct amdgpu_ring { unsigned int ring_backup_entries_to_copy; unsigned rptr_offs; u64 rptr_gpu_addr; - volatile u32 *rptr_cpu_addr; + u32 *rptr_cpu_addr; /** * @wptr: @@ -378,19 +389,19 @@ struct amdgpu_ring { * This is the CPU address pointer in the writeback slot. This is used * to commit changes to the GPU. */ - volatile u32 *wptr_cpu_addr; + u32 *wptr_cpu_addr; unsigned fence_offs; u64 fence_gpu_addr; - volatile u32 *fence_cpu_addr; + u32 *fence_cpu_addr; uint64_t current_ctx; char name[16]; u32 trail_seq; unsigned trail_fence_offs; u64 trail_fence_gpu_addr; - volatile u32 *trail_fence_cpu_addr; + u32 *trail_fence_cpu_addr; unsigned cond_exe_offs; u64 cond_exe_gpu_addr; - volatile u32 *cond_exe_cpu_addr; + u32 *cond_exe_cpu_addr; unsigned int set_q_mode_offs; u32 *set_q_mode_ptr; u64 set_q_mode_token; @@ -470,10 +481,7 @@ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring, static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { - int i = 0; - while (i <= ring->buf_mask) - ring->ring[i++] = ring->funcs->nop; - + memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1); } static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index db5791e1a7ce..5aa830a02d80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -89,7 +89,7 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id) int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) { const u32 *src_ptr; - volatile u32 *dst_ptr; + u32 *dst_ptr; u32 i; int r; @@ -189,7 +189,7 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev) void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) { const __le32 *fw_data; - volatile u32 *dst_ptr; + u32 *dst_ptr; int me, i, max_me; u32 bo_offset = 0; u32 table_offset, table_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index c210625be220..2ce310b31942 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -251,7 +251,7 @@ struct amdgpu_rlc_funcs { * and it also provides a pointer to it which is used by the firmware * to load the clear state in some cases. */ - void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); + void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer); int (*get_cp_table_num)(struct amdgpu_device *adev); int (*resume)(struct amdgpu_device *adev); void (*stop)(struct amdgpu_device *adev); @@ -275,19 +275,19 @@ struct amdgpu_rlc { /* for power gating */ struct amdgpu_bo *save_restore_obj; uint64_t save_restore_gpu_addr; - volatile uint32_t *sr_ptr; + uint32_t *sr_ptr; const u32 *reg_list; u32 reg_list_size; /* for clear state */ struct amdgpu_bo *clear_state_obj; uint64_t clear_state_gpu_addr; - volatile uint32_t *cs_ptr; + uint32_t *cs_ptr; const struct cs_section_def *cs_data; u32 clear_state_size; /* for cp tables */ struct amdgpu_bo *cp_table_obj; uint64_t cp_table_gpu_addr; - volatile uint32_t *cp_table_ptr; + uint32_t *cp_table_ptr; u32 cp_table_size; /* safe mode for updating CG/PG state */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 428265046815..aa9ee5dffa45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -123,6 +123,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GWS: case AMDGPU_PL_OA: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: placement->num_placement = 0; return; @@ -448,7 +449,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, return false; if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || - res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL) + res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL || + res->mem_type == AMDGPU_PL_MMIO_REMAP) return true; if (res->mem_type != TTM_PL_VRAM) @@ -539,10 +541,12 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || old_mem->mem_type == AMDGPU_PL_DOORBELL || + old_mem->mem_type == AMDGPU_PL_MMIO_REMAP || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || new_mem->mem_type == AMDGPU_PL_OA || - new_mem->mem_type == AMDGPU_PL_DOORBELL) { + new_mem->mem_type == AMDGPU_PL_DOORBELL || + new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) { /* Nothing to save here */ amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); @@ -630,6 +634,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.is_iomem = true; mem->bus.caching = ttm_uncached; break; + case AMDGPU_PL_MMIO_REMAP: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->rmmio_remap.bus_addr; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_uncached; + break; default: return -EINVAL; } @@ -647,6 +657,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, if (bo->resource->mem_type == AMDGPU_PL_DOORBELL) return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT; + else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP) + return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT; return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; } @@ -696,7 +708,7 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; @@ -733,7 +745,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, - readonly, NULL, pages, range); + readonly, NULL, range); out_unlock: mmap_read_unlock(mm); if (r) @@ -785,12 +797,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, * that backs user memory and will ultimately be mapped into the device * address space. */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range) { unsigned long i; for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i] = pages ? pages[i] : NULL; + ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL; } /* @@ -1356,7 +1368,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) if (mem && (mem->mem_type == TTM_PL_TT || mem->mem_type == AMDGPU_PL_DOORBELL || - mem->mem_type == AMDGPU_PL_PREEMPT)) { + mem->mem_type == AMDGPU_PL_PREEMPT || + mem->mem_type == AMDGPU_PL_MMIO_REMAP)) { flags |= AMDGPU_PTE_SYSTEM; if (ttm->caching == ttm_cached) @@ -1843,6 +1856,59 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) adev->mman.ttm_pools = NULL; } +/** + * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO + * @adev: amdgpu device + * + * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the + * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host + * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular + * GEM object (amdgpu_bo_create). + * + * Return: + * * 0 on success or intentional skip (feature not present/unsupported) + * * negative errno on allocation failure + */ +static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) +{ + struct amdgpu_bo_param bp; + int r; + + /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */ + if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE) + return 0; + + memset(&bp, 0, sizeof(bp)); + + /* Create exactly one GEM BO in the MMIO_REMAP domain. */ + bp.type = ttm_bo_type_device; /* userspace-mappable GEM */ + bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */ + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP; + bp.flags = 0; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo); + if (r) + return r; + + return 0; +} + +/** + * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO + * @adev: amdgpu device + * + * Frees the kernel-owned MMIO_REMAP BO if it was allocated by + * amdgpu_ttm_mmio_remap_bo_init(). + */ +static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_unref(&adev->rmmio_remap.bo); + adev->rmmio_remap.bo = NULL; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1879,11 +1945,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } adev->mman.initialized = true; - /* Initialize VRAM pool with all of VRAM divided into pages */ - r = amdgpu_vram_mgr_init(adev); - if (r) { - dev_err(adev->dev, "Failed initializing VRAM heap.\n"); - return r; + if (!adev->gmc.is_app_apu) { + /* Initialize VRAM pool with all of VRAM divided into pages */ + r = amdgpu_vram_mgr_init(adev); + if (r) { + dev_err(adev->dev, "Failed initializing VRAM heap.\n"); + return r; + } } /* Change the size here instead of the init above so only lpfn is affected */ @@ -2010,6 +2078,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + /* Initialize MMIO-remap pool (single page 4K) */ + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1); + if (r) { + dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n"); + return r; + } + + /* Allocate the singleton MMIO_REMAP BO (4K) if supported */ + r = amdgpu_ttm_mmio_remap_bo_init(adev); + if (r) + return r; + /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); if (r) { @@ -2072,6 +2152,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); + + amdgpu_ttm_mmio_remap_bo_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); @@ -2084,7 +2166,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) drm_dev_exit(idx); } - amdgpu_vram_mgr_fini(adev); + if (!adev->gmc.is_app_apu) + amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); amdgpu_doorbell_fini(adev); @@ -2093,6 +2176,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; dev_info(adev->dev, "amdgpu: ttm finalized\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index d82d107fdcc6..0be2728aa872 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -34,7 +34,8 @@ #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) -#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5) +#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5) +#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 @@ -190,7 +191,7 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range **range); void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, struct hmm_range *range); @@ -198,7 +199,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages, struct hmm_range **range) { return -EPERM; @@ -214,7 +214,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, } #endif -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range); int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, uint64_t *user_addr); int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 467e8fa6cb8b..1add21160d21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -44,8 +44,41 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) return userq_ip_mask; } +int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, + u64 expected_size) +{ + struct amdgpu_bo_va_mapping *va_map; + u64 user_addr; + u64 size; + int r = 0; + + user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT; + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; + + r = amdgpu_bo_reserve(vm->root.bo, false); + if (r) + return r; + + va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); + if (!va_map) { + r = -EINVAL; + goto out_err; + } + /* Only validate the userq whether resident in the VM mapping range */ + if (user_addr >= va_map->start && + va_map->last - user_addr + 1 >= size) { + amdgpu_bo_unreserve(vm->root.bo); + return 0; + } + + r = -EINVAL; +out_err: + amdgpu_bo_unreserve(vm->root.bo); + return r; +} + static int -amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, +amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; @@ -54,6 +87,49 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, int r = 0; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { + r = userq_funcs->preempt(uq_mgr, queue); + if (r) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + } else { + queue->state = AMDGPU_USERQ_STATE_PREEMPTED; + } + } + + return r; +} + +static int +amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { + r = userq_funcs->restore(uq_mgr, queue); + if (r) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + } else { + queue->state = AMDGPU_USERQ_STATE_MAPPED; + } + } + + return r; +} + +static int +amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || + (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { r = userq_funcs->unmap(uq_mgr, queue); if (r) queue->state = AMDGPU_USERQ_STATE_HUNG; @@ -112,22 +188,6 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, kfree(queue); } -int -amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr) -{ - struct amdgpu_usermode_queue *queue; - int queue_id; - int ret = 0; - - mutex_lock(&uq_mgr->userq_mutex); - /* Resume all the queues for this process */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) - ret += queue->state == AMDGPU_USERQ_STATE_MAPPED; - - mutex_unlock(&uq_mgr->userq_mutex); - return ret; -} - static struct amdgpu_usermode_queue * amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { @@ -323,6 +383,11 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) debugfs_remove_recursive(queue->debugfs_queue); #endif r = amdgpu_userq_unmap_helper(uq_mgr, queue); + /*TODO: It requires a reset for userq hw unmap error*/ + if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) { + drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); + queue->state = AMDGPU_USERQ_STATE_HUNG; + } amdgpu_userq_cleanup(uq_mgr, queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); @@ -404,27 +469,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; - /* Usermode queues are only supported for GFX IP as of now */ - if (args->in.ip_type != AMDGPU_HW_IP_GFX && - args->in.ip_type != AMDGPU_HW_IP_DMA && - args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { - drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n", - args->in.ip_type); - return -EINVAL; - } - r = amdgpu_userq_priority_permit(filp, priority); if (r) return r; - if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && - (args->in.ip_type != AMDGPU_HW_IP_GFX) && - (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && - !amdgpu_is_tmz(adev)) { - drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n"); - return -EINVAL; - } - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); @@ -456,6 +504,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = -ENOMEM; goto unlock; } + + /* Validate the userq virtual address.*/ + if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) || + amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + r = -EINVAL; + kfree(queue); + goto unlock; + } queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; @@ -543,22 +600,45 @@ unlock: return r; } -int amdgpu_userq_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) +static int amdgpu_userq_input_args_validate(struct drm_device *dev, + union drm_amdgpu_userq *args, + struct drm_file *filp) { - union drm_amdgpu_userq *args = data; - int r; + struct amdgpu_device *adev = drm_to_adev(dev); switch (args->in.op) { case AMDGPU_USERQ_OP_CREATE: if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK | AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE)) return -EINVAL; - r = amdgpu_userq_create(filp, args); - if (r) - drm_file_err(filp, "Failed to create usermode queue\n"); - break; + /* Usermode queues are only supported for GFX IP as of now */ + if (args->in.ip_type != AMDGPU_HW_IP_GFX && + args->in.ip_type != AMDGPU_HW_IP_DMA && + args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { + drm_file_err(filp, "Usermode queue doesn't support IP type %u\n", + args->in.ip_type); + return -EINVAL; + } + + if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && + (args->in.ip_type != AMDGPU_HW_IP_GFX) && + (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && + !amdgpu_is_tmz(adev)) { + drm_file_err(filp, "Secure only supported on GFX/Compute queues\n"); + return -EINVAL; + } + if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET || + args->in.queue_va == 0 || + args->in.queue_size == 0) { + drm_file_err(filp, "invalidate userq queue va or size\n"); + return -EINVAL; + } + if (!args->in.wptr_va || !args->in.rptr_va) { + drm_file_err(filp, "invalidate userq queue rptr or wptr\n"); + return -EINVAL; + } + break; case AMDGPU_USERQ_OP_FREE: if (args->in.ip_type || args->in.doorbell_handle || @@ -571,6 +651,31 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, args->in.mqd || args->in.mqd_size) return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_userq *args = data; + int r; + + if (amdgpu_userq_input_args_validate(dev, args, filp) < 0) + return -EINVAL; + + switch (args->in.op) { + case AMDGPU_USERQ_OP_CREATE: + r = amdgpu_userq_create(filp, args); + if (r) + drm_file_err(filp, "Failed to create usermode queue\n"); + break; + + case AMDGPU_USERQ_OP_FREE: r = amdgpu_userq_destroy(filp, args->in.queue_id); if (r) drm_file_err(filp, "Failed to destroy usermode queue\n"); @@ -593,7 +698,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { - r = amdgpu_userq_map_helper(uq_mgr, queue); + r = amdgpu_userq_restore_helper(uq_mgr, queue); if (r) ret = r; } @@ -603,108 +708,106 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) return ret; } +static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + +/* Handle all BOs on the invalidated list, validate them and update the PTs */ static int -amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec, + struct amdgpu_vm *vm) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; int ret; - amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + spin_lock(&vm->status_lock); + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, + struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (ret) - DRM_ERROR("Fail to validate\n"); + bo = bo_va->base.bo; + ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); + if (unlikely(ret)) + return ret; - return ret; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + return ret; + + /* This moves the bo_va to the done list */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) + return ret; + + spin_lock(&vm->status_lock); + } + spin_unlock(&vm->status_lock); + + return 0; } +/* Make sure the whole VM is ready to be used */ static int -amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr) +amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); - struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; - struct ww_acquire_ctx *ticket; struct drm_exec exec; - struct amdgpu_bo *bo; - struct dma_resv *resv; - bool clear, unlock; - int ret = 0; + int ret; drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { - ret = amdgpu_vm_lock_pd(vm, &exec, 2); + ret = amdgpu_vm_lock_pd(vm, &exec, 1); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) { - drm_file_err(uq_mgr->file, "Failed to lock PD\n"); + if (unlikely(ret)) goto unlock_all; - } - /* Lock the done list */ - list_for_each_entry(bo_va, &vm->done, base.vm_status) { - bo = bo_va->base.bo; - if (!bo) - continue; - - ret = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) - goto unlock_all; - } - } - - spin_lock(&vm->status_lock); - while (!list_empty(&vm->moved)) { - bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, - base.vm_status); - spin_unlock(&vm->status_lock); - - /* Per VM BOs never need to bo cleared in the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false); - if (ret) + ret = amdgpu_vm_lock_done_list(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) goto unlock_all; - spin_lock(&vm->status_lock); - } - - ticket = &exec.ticket; - while (!list_empty(&vm->invalidated)) { - bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, - base.vm_status); - resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->status_lock); - bo = bo_va->base.bo; - ret = amdgpu_userq_validate_vm_bo(NULL, bo); - if (ret) { - drm_file_err(uq_mgr->file, "Failed to validate BO\n"); + /* This validates PDs, PTs and per VM BOs */ + ret = amdgpu_vm_validate(adev, vm, NULL, + amdgpu_userq_validate_vm, + NULL); + if (unlikely(ret)) goto unlock_all; - } - /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) { - clear = false; - unlock = true; - /* The caller is already holding the reservation lock */ - } else if (dma_resv_locking_ctx(resv) == ticket) { - clear = false; - unlock = false; - /* Somebody else is using the BO right now */ - } else { - clear = true; - unlock = false; - } + /* This locks and validates the remaining evicted BOs */ + ret = amdgpu_userq_bo_validate(adev, &exec, vm); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_all; + } - ret = amdgpu_vm_bo_update(adev, bo_va, clear); + ret = amdgpu_vm_handle_moved(adev, vm, NULL); + if (ret) + goto unlock_all; - if (unlock) - dma_resv_unlock(resv); - if (ret) - goto unlock_all; + ret = amdgpu_vm_update_pdes(adev, vm, false); + if (ret) + goto unlock_all; - spin_lock(&vm->status_lock); - } - spin_unlock(&vm->status_lock); + /* + * We need to wait for all VM updates to finish before restarting the + * queues. Using the done list like that is now ok since everything is + * locked in place. + */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) + dma_fence_wait(bo_va->last_pt_update, false); + dma_fence_wait(vm->last_update, false); ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); if (ret) @@ -725,7 +828,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) mutex_lock(&uq_mgr->userq_mutex); - ret = amdgpu_userq_validate_bos(uq_mgr); + ret = amdgpu_userq_vm_validate(uq_mgr); if (ret) { drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); goto unlock; @@ -750,7 +853,7 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) /* Try to unmap all the queues in this process ctx */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { - r = amdgpu_userq_unmap_helper(uq_mgr, queue); + r = amdgpu_userq_preempt_helper(uq_mgr, queue); if (r) ret = r; } @@ -876,7 +979,10 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) cancel_delayed_work_sync(&uqm->resume_work); mutex_lock(&uqm->userq_mutex); idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - r = amdgpu_userq_unmap_helper(uqm, queue); + if (adev->in_s0ix) + r = amdgpu_userq_preempt_helper(uqm, queue); + else + r = amdgpu_userq_unmap_helper(uqm, queue); if (r) ret = r; } @@ -901,7 +1007,10 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { mutex_lock(&uqm->userq_mutex); idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - r = amdgpu_userq_map_helper(uqm, queue); + if (adev->in_s0ix) + r = amdgpu_userq_restore_helper(uqm, queue); + else + r = amdgpu_userq_map_helper(uqm, queue); if (r) ret = r; } @@ -935,7 +1044,7 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - r = amdgpu_userq_unmap_helper(uqm, queue); + r = amdgpu_userq_preempt_helper(uqm, queue); if (r) ret = r; } @@ -969,7 +1078,7 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - r = amdgpu_userq_map_helper(uqm, queue); + r = amdgpu_userq_restore_helper(uqm, queue); if (r) ret = r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 1bd84f4cce78..c027dd916672 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -120,8 +120,6 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence *ev_fence); -int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr); - void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); @@ -139,4 +137,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, u32 idx); +int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, + u64 expected_size); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 95e91d1dc58a..761bad98da3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -284,7 +284,7 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, /* Check if hardware has already processed the job */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled_locked(fence)) + if (!dma_fence_is_signaled(fence)) list_add_tail(&userq_fence->link, &fence_drv->fences); else dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h new file mode 100644 index 000000000000..1e40ca3b1584 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_UTILS_H_ +#define AMDGPU_UTILS_H_ + +/* ---------- Generic 2‑bit capability attribute encoding ---------- + * 00 INVALID, 01 RO, 10 WO, 11 RW + */ +enum amdgpu_cap_attr { + AMDGPU_CAP_ATTR_INVALID = 0, + AMDGPU_CAP_ATTR_RO = 1 << 0, + AMDGPU_CAP_ATTR_WO = 1 << 1, + AMDGPU_CAP_ATTR_RW = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO), +}; + +#define AMDGPU_CAP_ATTR_BITS 2 +#define AMDGPU_CAP_ATTR_MAX ((1U << AMDGPU_CAP_ATTR_BITS) - 1) + +/* Internal helper to build helpers for a given enum NAME */ +#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) \ +enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS }; \ +struct NAME##_caps { \ + DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS); \ +}; \ +static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap) \ +{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; } \ +static inline void NAME##_attr_init(struct NAME##_caps *c) \ +{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); } \ +static inline int NAME##_attr_set(struct NAME##_caps *c, \ + enum NAME##_cap_id cap, enum amdgpu_cap_attr attr) \ +{ \ + if (!c) \ + return -EINVAL; \ + if (cap >= NAME##_COUNT) \ + return -EINVAL; \ + if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX) \ + return -EINVAL; \ + bitmap_write(c->bmap, (unsigned long)attr, \ + NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \ + return 0; \ +} \ +static inline int NAME##_attr_get(const struct NAME##_caps *c, \ + enum NAME##_cap_id cap, enum amdgpu_cap_attr *out) \ +{ \ + unsigned long v; \ + if (!c || !out) \ + return -EINVAL; \ + if (cap >= NAME##_COUNT) \ + return -EINVAL; \ + v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \ + *out = (enum amdgpu_cap_attr)v; \ + return 0; \ +} \ +static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id) \ +{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; } \ +static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id) \ +{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; } \ +static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id) \ +{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; } + +/* Element expander for enum creation */ +#define _CAP_ENUM_ELEM(x) x, + +/* Public macro: declare enum + helpers from an X‑macro list */ +#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO) \ + enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT }; \ + DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) + +#endif /* AMDGPU_UTILS_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 595f0df17bcc..5e0786ea911b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -257,12 +257,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) return 0; } -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) +void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) { int j; if (adev->vcn.harvest_config & (1 << i)) - return 0; + return; amdgpu_bo_free_kernel( &adev->vcn.inst[i].dpg_sram_bo, @@ -292,8 +292,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); - - return 0; } bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) @@ -1159,7 +1157,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, { struct amdgpu_vcn_inst *vcn; void *log_buf; - volatile struct amdgpu_vcn_fwlog *plog; + struct amdgpu_vcn_fwlog *plog; unsigned int read_pos, write_pos, available, i, read_bytes = 0; unsigned int read_num[2] = {0}; @@ -1172,7 +1170,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; - plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; + plog = (struct amdgpu_vcn_fwlog *)log_buf; read_pos = plog->rptr; write_pos = plog->wptr; @@ -1239,11 +1237,11 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) { #if defined(CONFIG_DEBUG_FS) - volatile uint32_t *flag = vcn->fw_shared.cpu_addr; + uint32_t *flag = vcn->fw_shared.cpu_addr; void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; - volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; - volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; + struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + vcn->fw_shared.log_offset; *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); fw_log->is_enabled = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6d9acd36041d..dc8a17bcc3c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -501,7 +501,7 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; struct amdgpu_fw_shared_drm_key_wa drm_key_wa; - uint8_t pad3[9]; + uint8_t pad3[404]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 @@ -516,7 +516,7 @@ enum vcn_ring_type { int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i); int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i); -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i); +void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i); int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i); int amdgpu_vcn_resume(struct amdgpu_device *adev, int i); void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 13f0cdeb59c4..3328ab63376b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -828,11 +828,14 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev) { ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1); ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1); + ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1); ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs, RATELIMIT_MSG_ON_RELEASE); ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs, RATELIMIT_MSG_ON_RELEASE); + ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs, + RATELIMIT_MSG_ON_RELEASE); mutex_init(&adev->virt.ras.ras_telemetry_mutex); @@ -1501,3 +1504,55 @@ void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev) if (virt->ops && virt->ops->req_bad_pages) virt->ops->req_bad_pages(adev); } + +static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev, + struct amdsriov_ras_telemetry *host_telemetry, + bool *hit) +{ + struct amd_sriov_ras_chk_criti *tmp = NULL; + uint32_t checksum, used_size; + + checksum = host_telemetry->header.checksum; + used_size = host_telemetry->header.used_size; + + if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + return 0; + + tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0)) + goto out; + + if (hit) + *hit = tmp->hit ? true : false; + +out: + kfree(tmp); + + return 0; +} + +int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit) +{ + struct amdgpu_virt *virt = &adev->virt; + int r = -EPERM; + + if (!virt->ops || !virt->ops->req_ras_chk_criti) + return -EOPNOTSUPP; + + /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host + * will ignore incoming guest messages. Ratelimit the guest messages to + * prevent guest self DOS. + */ + if (__ratelimit(&virt->ras.ras_chk_criti_rs)) { + mutex_lock(&virt->ras.ras_telemetry_mutex); + if (!virt->ops->req_ras_chk_criti(adev, addr)) + r = amdgpu_virt_cache_chk_criti_hit( + adev, virt->fw_reserve.ras_telemetry, hit); + mutex_unlock(&virt->ras.ras_telemetry_mutex); + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 58accf2259b3..d1172c8e58c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -98,6 +98,7 @@ struct amdgpu_virt_ops { int (*req_ras_err_count)(struct amdgpu_device *adev); int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); int (*req_bad_pages)(struct amdgpu_device *adev); + int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr); }; /* @@ -252,10 +253,15 @@ struct amdgpu_virt_ras_err_handler_data { struct amdgpu_virt_ras { struct ratelimit_state ras_error_cnt_rs; struct ratelimit_state ras_cper_dump_rs; + struct ratelimit_state ras_chk_criti_rs; struct mutex ras_telemetry_mutex; uint64_t cper_rptr; }; +#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT) + +DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST); + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -274,6 +280,7 @@ struct amdgpu_virt { const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; + struct amdgpu_virt_caps virt_caps; uint32_t gim_feature; uint32_t reg_access_mode; int req_init_data_ver; @@ -448,4 +455,5 @@ int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, enum amdgpu_ras_block block); void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev); +int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 155bb9891a17..79bad9cbe2ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -14,7 +14,6 @@ #include "dce_v8_0.h" #endif #include "dce_v10_0.h" -#include "dce_v11_0.h" #include "ivsrcid/ivsrcid_vislands30.h" #include "amdgpu_vkms.h" #include "amdgpu_display.h" @@ -581,13 +580,6 @@ static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block) case CHIP_TONGA: dce_v10_0_disable_dce(adev); break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_VEGAM: - dce_v11_0_disable_dce(adev); - break; case CHIP_TOPAZ: #ifdef CONFIG_DRM_AMDGPU_SI case CHIP_HAINAN: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dbda3a38a2b0..c1a801203949 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -128,43 +128,14 @@ struct amdgpu_vm_tlb_seq_struct { }; /** - * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm pointer - * @pasid: the pasid the VM is using on this GPU - * - * Set the pasid this VM is using on this GPU, can also be used to remove the - * pasid by passing in zero. + * amdgpu_vm_assert_locked - check if VM is correctly locked + * @vm: the VM which schould be tested * + * Asserts that the VM root PD is locked. */ -int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid) +static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm) { - int r; - - if (vm->pasid == pasid) - return 0; - - if (vm->pasid) { - r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid)); - if (r < 0) - return r; - - vm->pasid = 0; - } - - if (pasid) { - r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, - GFP_KERNEL)); - if (r < 0) - return r; - - vm->pasid = pasid; - } - - - return 0; + dma_resv_assert_held(vm->root.bo->tbo.base.resv); } /** @@ -181,6 +152,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_bo *bo = vm_bo->bo; vm_bo->moved = true; + amdgpu_vm_assert_locked(vm); spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); @@ -198,6 +170,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); spin_unlock(&vm_bo->vm->status_lock); @@ -213,6 +186,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); spin_unlock(&vm_bo->vm->status_lock); @@ -260,6 +234,7 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); if (vm_bo->bo->parent) { spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); @@ -279,6 +254,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); spin_unlock(&vm_bo->vm->status_lock); @@ -295,10 +271,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) { struct amdgpu_vm_bo_base *vm_bo, *tmp; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); list_splice_init(&vm->done, &vm->invalidated); list_for_each_entry(vm_bo, &vm->invalidated, vm_status) vm_bo->moved = true; + list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; @@ -327,6 +306,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); bool shared; + dma_resv_assert_held(bo->tbo.base.resv); spin_lock(&vm->status_lock); shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); if (base->shared != shared) { @@ -485,6 +465,42 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, } /** + * amdgpu_vm_lock_done_list - lock all BOs on the done list + * @vm: vm providing the BOs + * @exec: drm execution context + * @num_fences: number of extra fences to reserve + * + * Lock the BOs on the done list in the DRM execution context. + */ +int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct list_head *prev = &vm->done; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + int ret; + + /* We can only trust prev->next while holding the lock */ + spin_lock(&vm->status_lock); + while (!list_is_head(prev->next, &vm->done)) { + bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_va->base.bo; + if (bo) { + ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); + if (unlikely(ret)) + return ret; + } + spin_lock(&vm->status_lock); + prev = prev->next; + } + spin_unlock(&vm->status_lock); + + return 0; +} + +/** * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU * * @adev: amdgpu device pointer @@ -616,18 +632,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo; - - if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { - struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); - - pr_warn_ratelimited("Evicted user BO is not reserved\n"); - if (ti) { - pr_warn_ratelimited("pid %d\n", ti->task.pid); - amdgpu_vm_put_task_info(ti); - } - - return -EINVAL; - } + dma_resv_assert_held(bo->tbo.base.resv); r = validate(param, bo); if (r) @@ -660,6 +665,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { bool ret; + amdgpu_vm_assert_locked(vm); + amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); @@ -962,6 +969,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, LIST_HEAD(relocated); int r, idx; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); list_splice_init(&vm->relocated, &relocated); spin_unlock(&vm->status_lock); @@ -2540,6 +2549,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: requested vm * @xcp_id: GPU partition selection id + * @pasid: the pasid the VM is using on this GPU * * Init @vm fields. * @@ -2547,7 +2557,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * 0 for success, error for failure. */ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int32_t xcp_id) + int32_t xcp_id, uint32_t pasid) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2623,12 +2633,26 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) dev_dbg(adev->dev, "Failed to create task info for VM\n"); + /* Store new PASID in XArray (if non-zero) */ + if (pasid != 0) { + r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL)); + if (r < 0) + goto error_free_root; + + vm->pasid = pasid; + } + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); return 0; error_free_root: + /* If PASID was partially set, erase it from XArray before failing */ + if (vm->pasid != 0) { + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); + vm->pasid = 0; + } amdgpu_vm_pt_free_root(adev, vm); amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2734,7 +2758,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - amdgpu_vm_set_pasid(adev, vm, 0); + /* Remove PASID mapping before destroying VM */ + if (vm->pasid != 0) { + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); + vm->pasid = 0; + } dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); dma_fence_wait(vm->last_tlb_flush, false); @@ -2775,10 +2803,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { - if (vm->reserved_vmid[i]) { - amdgpu_vmid_free_reserved(adev, i); - vm->reserved_vmid[i] = false; - } + amdgpu_vmid_free_reserved(adev, vm, i); } ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); @@ -2874,6 +2899,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; /* No valid flags defined yet */ if (args->in.flags) @@ -2882,17 +2908,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ - if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; - } - + amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; case AMDGPU_VM_OP_UNRESERVE_VMID: - if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; - } + amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; default: return -EINVAL; @@ -3030,6 +3049,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_done_objs = 0; unsigned int id = 0; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 67eaf5402e7e..cf0ec94e8a07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -349,12 +349,16 @@ struct amdgpu_vm { /* Memory statistics for this vm, protected by status_lock */ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; + /* + * The following lists contain amdgpu_vm_bo_base objects for either + * PDs, PTs or per VM BOs. The state transits are: + * + * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle + */ + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; - /* BOs for user mode queues that need a validation */ - struct list_head evicted_user; - /* PT BOs which relocated and their parent need an update */ struct list_head relocated; @@ -364,15 +368,29 @@ struct amdgpu_vm { /* All BOs of this VM not currently in the state machine */ struct list_head idle; + /* + * The following lists contain amdgpu_vm_bo_base objects for BOs which + * have their own dma_resv object and not depend on the root PD. Their + * state transits are: + * + * evicted_user or invalidated -> done + */ + + /* BOs for user mode queues that need a validation */ + struct list_head evicted_user; + /* regular invalidated BOs, but not yet updated in the PT */ struct list_head invalidated; - /* BO mappings freed, but not yet updated in the PT */ - struct list_head freed; - /* BOs which are invalidated, has been updated in the PTs */ struct list_head done; + /* + * This list contains amdgpu_bo_va_mapping objects which have been freed + * but not updated in the PTs + */ + struct list_head freed; + /* contains the page directory */ struct amdgpu_vm_bo_base root; struct dma_fence *last_update; @@ -394,7 +412,7 @@ struct amdgpu_vm { struct dma_fence *last_unlocked; unsigned int pasid; - bool reserved_vmid[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; @@ -482,15 +500,14 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); -int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid); - long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, unsigned int num_fences); +int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences); bool amdgpu_vm_ready(struct amdgpu_vm *vm); uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index e69db0a93378..a5adb2ed9b3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -425,45 +425,6 @@ out: return ret; } -static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man, - struct drm_printer *printer) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr debug\n"); -} - -static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man, - struct ttm_resource *res, - const struct ttm_place *place, - size_t size) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n"); - return false; -} - -static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man, - struct ttm_resource *res, - const struct ttm_place *place, - size_t size) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n"); - return true; -} - -static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *res) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n"); -} - -static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man, - struct ttm_buffer_object *tbo, - const struct ttm_place *place, - struct ttm_resource **res) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr new\n"); - return -ENOSPC; -} - /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -932,14 +893,6 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, mutex_unlock(&mgr->lock); } -static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = { - .alloc = amdgpu_dummy_vram_mgr_new, - .free = amdgpu_dummy_vram_mgr_del, - .intersects = amdgpu_dummy_vram_mgr_intersects, - .compatible = amdgpu_dummy_vram_mgr_compatible, - .debug = amdgpu_dummy_vram_mgr_debug -}; - static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .alloc = amdgpu_vram_mgr_new, .free = amdgpu_vram_mgr_del, @@ -973,16 +926,10 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&mgr->allocated_vres_list); mgr->default_page_size = PAGE_SIZE; - if (!adev->gmc.is_app_apu) { - man->func = &amdgpu_vram_mgr_func; - - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; - } else { - man->func = &amdgpu_dummy_vram_mgr_func; - DRM_INFO("Setup dummy vram mgr\n"); - } + man->func = &amdgpu_vram_mgr_func; + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index bba0b26fee8f..5f36aff17e79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -126,4 +126,8 @@ uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev); void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev, uint16_t max_speed, uint8_t max_width); + +/* Cleanup macro for use with __free(xgmi_put_hive) */ +DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T)) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 33edad1f9dcd..3a79ed7d8031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -405,12 +405,17 @@ struct amd_sriov_ras_cper_dump { uint32_t buf[]; }; +struct amd_sriov_ras_chk_criti { + uint32_t hit; +}; + struct amdsriov_ras_telemetry { struct amd_sriov_ras_telemetry_header header; union { struct amd_sriov_ras_telemetry_error_count error_count; struct amd_sriov_ras_cper_dump cper_dump; + struct amd_sriov_ras_chk_criti chk_criti; } body; }; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 1c994d0cc50b..7a063e44d429 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1246,6 +1246,10 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.last_jump_jiffies = 0; if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); + if (!ectx.ws) { + ret = -ENOMEM; + goto free; + } ectx.ws_size = ws; } else { ectx.ws = NULL; @@ -1498,7 +1502,7 @@ static void atom_get_vbios_build(struct atom_context *ctx) { unsigned char *atom_rom_hdr; unsigned char *str; - uint16_t base; + uint16_t base, len; base = CU16(ATOM_ROM_TABLE_PTR); atom_rom_hdr = CSTR(base); @@ -1511,8 +1515,9 @@ static void atom_get_vbios_build(struct atom_context *ctx) while (str < atom_rom_hdr && *str++) ; - if ((str + STRLEN_NORMAL) < atom_rom_hdr) - strscpy(ctx->build_num, str, STRLEN_NORMAL); + len = min(atom_rom_hdr - str, STRLEN_NORMAL); + if (len) + strscpy(ctx->build_num, str, len); } struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c deleted file mode 100644 index e84608891300..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ /dev/null @@ -1,3817 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include <drm/drm_edid.h> -#include <drm/drm_fourcc.h> -#include <drm/drm_modeset_helper.h> -#include <drm/drm_modeset_helper_vtables.h> -#include <drm/drm_vblank.h> - -#include "amdgpu.h" -#include "amdgpu_pm.h" -#include "amdgpu_i2c.h" -#include "vid.h" -#include "atom.h" -#include "amdgpu_atombios.h" -#include "atombios_crtc.h" -#include "atombios_encoders.h" -#include "amdgpu_pll.h" -#include "amdgpu_connectors.h" -#include "amdgpu_display.h" -#include "dce_v11_0.h" - -#include "dce/dce_11_0_d.h" -#include "dce/dce_11_0_sh_mask.h" -#include "dce/dce_11_0_enum.h" -#include "oss/oss_3_0_d.h" -#include "oss/oss_3_0_sh_mask.h" -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" - -#include "ivsrcid/ivsrcid_vislands30.h" - -static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev); -static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); -static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); - -static const u32 crtc_offsets[] = -{ - CRTC0_REGISTER_OFFSET, - CRTC1_REGISTER_OFFSET, - CRTC2_REGISTER_OFFSET, - CRTC3_REGISTER_OFFSET, - CRTC4_REGISTER_OFFSET, - CRTC5_REGISTER_OFFSET, - CRTC6_REGISTER_OFFSET -}; - -static const u32 hpd_offsets[] = -{ - HPD0_REGISTER_OFFSET, - HPD1_REGISTER_OFFSET, - HPD2_REGISTER_OFFSET, - HPD3_REGISTER_OFFSET, - HPD4_REGISTER_OFFSET, - HPD5_REGISTER_OFFSET -}; - -static const uint32_t dig_offsets[] = { - DIG0_REGISTER_OFFSET, - DIG1_REGISTER_OFFSET, - DIG2_REGISTER_OFFSET, - DIG3_REGISTER_OFFSET, - DIG4_REGISTER_OFFSET, - DIG5_REGISTER_OFFSET, - DIG6_REGISTER_OFFSET, - DIG7_REGISTER_OFFSET, - DIG8_REGISTER_OFFSET -}; - -static const struct { - uint32_t reg; - uint32_t vblank; - uint32_t vline; - uint32_t hpd; - -} interrupt_status_offsets[] = { { - .reg = mmDISP_INTERRUPT_STATUS, - .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK -} }; - -static const u32 cz_golden_settings_a11[] = -{ - mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000, - mmFBC_MISC, 0x1f311fff, 0x14300000, -}; - -static const u32 cz_mgcg_cgcg_init[] = -{ - mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100, - mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000, -}; - -static const u32 stoney_golden_settings_a11[] = -{ - mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000, - mmFBC_MISC, 0x1f311fff, 0x14302000, -}; - -static const u32 polaris11_golden_settings_a11[] = -{ - mmDCI_CLK_CNTL, 0x00000080, 0x00000000, - mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, - mmFBC_DEBUG1, 0xffffffff, 0x00000008, - mmFBC_MISC, 0x9f313fff, 0x14302008, - mmHDMI_CONTROL, 0x313f031f, 0x00000011, -}; - -static const u32 polaris10_golden_settings_a11[] = -{ - mmDCI_CLK_CNTL, 0x00000080, 0x00000000, - mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, - mmFBC_MISC, 0x9f313fff, 0x14302008, - mmHDMI_CONTROL, 0x313f031f, 0x00000011, -}; - -static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) -{ - switch (adev->asic_type) { - case CHIP_CARRIZO: - amdgpu_device_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_device_program_register_sequence(adev, - cz_golden_settings_a11, - ARRAY_SIZE(cz_golden_settings_a11)); - break; - case CHIP_STONEY: - amdgpu_device_program_register_sequence(adev, - stoney_golden_settings_a11, - ARRAY_SIZE(stoney_golden_settings_a11)); - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - amdgpu_device_program_register_sequence(adev, - polaris11_golden_settings_a11, - ARRAY_SIZE(polaris11_golden_settings_a11)); - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - amdgpu_device_program_register_sequence(adev, - polaris10_golden_settings_a11, - ARRAY_SIZE(polaris10_golden_settings_a11)); - break; - default: - break; - } -} - -static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev, - u32 block_offset, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); - WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); - r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); - - return r; -} - -static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev, - u32 block_offset, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); - WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); - WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); -} - -static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) -{ - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) - return 0; - else - return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); -} - -static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev) -{ - unsigned i; - - /* Enable pflip interrupts */ - for (i = 0; i < adev->mode_info.num_crtc; i++) - amdgpu_irq_get(adev, &adev->pageflip_irq, i); -} - -static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev) -{ - unsigned i; - - /* Disable pflip interrupts */ - for (i = 0; i < adev->mode_info.num_crtc; i++) - amdgpu_irq_put(adev, &adev->pageflip_irq, i); -} - -/** - * dce_v11_0_page_flip - pageflip callback. - * - * @adev: amdgpu_device pointer - * @crtc_id: crtc to cleanup pageflip on - * @crtc_base: new address of the crtc (GPU MC address) - * @async: asynchronous flip - * - * Triggers the actual pageflip by updating the primary - * surface base address. - */ -static void dce_v11_0_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base, bool async) -{ - struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; - struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; - u32 tmp; - - /* flip immediate for async, default is vsync */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, - GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0); - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* update pitch */ - WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, - fb->pitches[0] / fb->format->cpp[0]); - /* update the scanout addresses */ - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(crtc_base)); - /* writing to the low address triggers the update */ - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - lower_32_bits(crtc_base)); - /* post the write */ - RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset); -} - -static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, - u32 *vbl, u32 *position) -{ - if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) - return -EINVAL; - - *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]); - *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - return 0; -} - -/** - * dce_v11_0_hpd_sense - hpd sense callback. - * - * @adev: amdgpu_device pointer - * @hpd: hpd (hotplug detect) pin - * - * Checks if a digital monitor is connected (evergreen+). - * Returns true if connected, false if not connected. - */ -static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - bool connected = false; - - if (hpd >= adev->mode_info.num_hpd) - return connected; - - if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) & - DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK) - connected = true; - - return connected; -} - -/** - * dce_v11_0_hpd_set_polarity - hpd set polarity callback. - * - * @adev: amdgpu_device pointer - * @hpd: hpd (hotplug detect) pin - * - * Set the polarity of the hpd pin (evergreen+). - */ -static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - u32 tmp; - bool connected = dce_v11_0_hpd_sense(adev, hpd); - - if (hpd >= adev->mode_info.num_hpd) - return; - - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - if (connected) - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0); - else - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); -} - -/** - * dce_v11_0_hpd_init - hpd setup callback. - * - * @adev: amdgpu_device pointer - * - * Setup the hpd pins used by the card (evergreen+). - * Enable the pin, set the polarity, and enable the hpd interrupts. - */ -static void dce_v11_0_hpd_init(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_connector *connector; - struct drm_connector_list_iter iter; - u32 tmp; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - - if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) - continue; - - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP || - connector->connector_type == DRM_MODE_CONNECTOR_LVDS) { - /* don't try to enable hpd on eDP or LVDS avoid breaking the - * aux dp channel on imac and help (but not completely fix) - * https://bugzilla.redhat.com/show_bug.cgi?id=726143 - * also avoid interrupt storms during dpms. - */ - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - continue; - } - - tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1); - WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - - tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL, - DC_HPD_CONNECT_INT_DELAY, - AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS); - tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL, - DC_HPD_DISCONNECT_INT_DELAY, - AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); - WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - - dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); - dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); - amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); - } - drm_connector_list_iter_end(&iter); -} - -/** - * dce_v11_0_hpd_fini - hpd tear down callback. - * - * @adev: amdgpu_device pointer - * - * Tear down the hpd pins used by the card (evergreen+). - * Disable the hpd interrupts. - */ -static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_connector *connector; - struct drm_connector_list_iter iter; - u32 tmp; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - - if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) - continue; - - tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0); - WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - - amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); - } - drm_connector_list_iter_end(&iter); -} - -static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev) -{ - return mmDC_GPIO_HPD_A; -} - -static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev) -{ - u32 crtc_hung = 0; - u32 crtc_status[6]; - u32 i, j, tmp; - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) { - crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]); - crtc_hung |= (1 << i); - } - } - - for (j = 0; j < 10; j++) { - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (crtc_hung & (1 << i)) { - tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]); - if (tmp != crtc_status[i]) - crtc_hung &= ~(1 << i); - } - } - if (crtc_hung == 0) - return false; - udelay(100); - } - - return true; -} - -static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev, - bool render) -{ - u32 tmp; - - /* Lockout access through VGA aperture*/ - tmp = RREG32(mmVGA_HDP_CONTROL); - if (render) - tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0); - else - tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - WREG32(mmVGA_HDP_CONTROL, tmp); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - if (render) - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1); - else - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); -} - -static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) -{ - int num_crtc = 0; - - switch (adev->asic_type) { - case CHIP_CARRIZO: - num_crtc = 3; - break; - case CHIP_STONEY: - num_crtc = 2; - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - num_crtc = 6; - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - num_crtc = 5; - break; - default: - num_crtc = 0; - } - return num_crtc; -} - -void dce_v11_0_disable_dce(struct amdgpu_device *adev) -{ - /*Disable VGA render and enabled crtc, if has DCE engine*/ - if (amdgpu_atombios_has_dce_engine_info(adev)) { - u32 tmp; - int crtc_enabled, i; - - dce_v11_0_set_vga_render_state(adev, false); - - /*Disable crtc*/ - for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - } - } -} - -static void dce_v11_0_program_fmt(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); - struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); - int bpc = 0; - u32 tmp = 0; - enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE; - - if (connector) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - bpc = amdgpu_connector_get_monitor_bpc(connector); - dither = amdgpu_connector->dither; - } - - /* LVDS/eDP FMT is set up by atom */ - if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT) - return; - - /* not needed for analog */ - if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) || - (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2)) - return; - - if (bpc == 0) - return; - - switch (bpc) { - case 6: - if (dither == AMDGPU_FMT_DITHER_ENABLE) { - /* XXX sort out optimal dither settings */ - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0); - } else { - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0); - } - break; - case 8: - if (dither == AMDGPU_FMT_DITHER_ENABLE) { - /* XXX sort out optimal dither settings */ - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1); - } else { - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1); - } - break; - case 10: - if (dither == AMDGPU_FMT_DITHER_ENABLE) { - /* XXX sort out optimal dither settings */ - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2); - } else { - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2); - } - break; - default: - /* not needed */ - break; - } - - WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - - -/* display watermark setup */ -/** - * dce_v11_0_line_buffer_adjust - Set up the line buffer - * - * @adev: amdgpu_device pointer - * @amdgpu_crtc: the selected display controller - * @mode: the current display mode on the selected display - * controller - * - * Setup up the line buffer allocation for - * the selected display controller (CIK). - * Returns the line buffer size in pixels. - */ -static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev, - struct amdgpu_crtc *amdgpu_crtc, - struct drm_display_mode *mode) -{ - u32 tmp, buffer_alloc, i, mem_cfg; - u32 pipe_offset = amdgpu_crtc->crtc_id; - /* - * Line Buffer Setup - * There are 6 line buffers, one for each display controllers. - * There are 3 partitions per LB. Select the number of partitions - * to enable based on the display width. For display widths larger - * than 4096, you need use to use 2 display controllers and combine - * them using the stereo blender. - */ - if (amdgpu_crtc->base.enabled && mode) { - if (mode->crtc_hdisplay < 1920) { - mem_cfg = 1; - buffer_alloc = 2; - } else if (mode->crtc_hdisplay < 2560) { - mem_cfg = 2; - buffer_alloc = 2; - } else if (mode->crtc_hdisplay < 4096) { - mem_cfg = 0; - buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4; - } else { - DRM_DEBUG_KMS("Mode too big for LB!\n"); - mem_cfg = 0; - buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4; - } - } else { - mem_cfg = 1; - buffer_alloc = 0; - } - - tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg); - WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset); - tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc); - WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset); - if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED)) - break; - udelay(1); - } - - if (amdgpu_crtc->base.enabled && mode) { - switch (mem_cfg) { - case 0: - default: - return 4096 * 2; - case 1: - return 1920 * 2; - case 2: - return 2560 * 2; - } - } - - /* controller not enabled, so no lb used */ - return 0; -} - -/** - * cik_get_number_of_dram_channels - get the number of dram channels - * - * @adev: amdgpu_device pointer - * - * Look up the number of video ram channels (CIK). - * Used for display watermark bandwidth calculations - * Returns the number of dram channels - */ -static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev) -{ - u32 tmp = RREG32(mmMC_SHARED_CHMAP); - - switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { - case 0: - default: - return 1; - case 1: - return 2; - case 2: - return 4; - case 3: - return 8; - case 4: - return 3; - case 5: - return 6; - case 6: - return 10; - case 7: - return 12; - case 8: - return 16; - } -} - -struct dce10_wm_params { - u32 dram_channels; /* number of dram channels */ - u32 yclk; /* bandwidth per dram data pin in kHz */ - u32 sclk; /* engine clock in kHz */ - u32 disp_clk; /* display clock in kHz */ - u32 src_width; /* viewport width */ - u32 active_time; /* active display time in ns */ - u32 blank_time; /* blank time in ns */ - bool interlaced; /* mode is interlaced */ - fixed20_12 vsc; /* vertical scale ratio */ - u32 num_heads; /* number of active crtcs */ - u32 bytes_per_pixel; /* bytes per pixel display + overlay */ - u32 lb_size; /* line buffer allocated to pipe */ - u32 vtaps; /* vertical scaler taps */ -}; - -/** - * dce_v11_0_dram_bandwidth - get the dram bandwidth - * - * @wm: watermark calculation data - * - * Calculate the raw dram bandwidth (CIK). - * Used for display watermark bandwidth calculations - * Returns the dram bandwidth in MBytes/s - */ -static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate raw DRAM Bandwidth */ - fixed20_12 dram_efficiency; /* 0.7 */ - fixed20_12 yclk, dram_channels, bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - yclk.full = dfixed_const(wm->yclk); - yclk.full = dfixed_div(yclk, a); - dram_channels.full = dfixed_const(wm->dram_channels * 4); - a.full = dfixed_const(10); - dram_efficiency.full = dfixed_const(7); - dram_efficiency.full = dfixed_div(dram_efficiency, a); - bandwidth.full = dfixed_mul(dram_channels, yclk); - bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display - * - * @wm: watermark calculation data - * - * Calculate the dram bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the dram bandwidth for display in MBytes/s - */ -static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm) -{ - /* Calculate DRAM Bandwidth and the part allocated to display. */ - fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ - fixed20_12 yclk, dram_channels, bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - yclk.full = dfixed_const(wm->yclk); - yclk.full = dfixed_div(yclk, a); - dram_channels.full = dfixed_const(wm->dram_channels * 4); - a.full = dfixed_const(10); - disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ - disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); - bandwidth.full = dfixed_mul(dram_channels, yclk); - bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_data_return_bandwidth - get the data return bandwidth - * - * @wm: watermark calculation data - * - * Calculate the data return bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the data return bandwidth in MBytes/s - */ -static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the display Data return Bandwidth */ - fixed20_12 return_efficiency; /* 0.8 */ - fixed20_12 sclk, bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - sclk.full = dfixed_const(wm->sclk); - sclk.full = dfixed_div(sclk, a); - a.full = dfixed_const(10); - return_efficiency.full = dfixed_const(8); - return_efficiency.full = dfixed_div(return_efficiency, a); - a.full = dfixed_const(32); - bandwidth.full = dfixed_mul(a, sclk); - bandwidth.full = dfixed_mul(bandwidth, return_efficiency); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth - * - * @wm: watermark calculation data - * - * Calculate the dmif bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the dmif bandwidth in MBytes/s - */ -static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the DMIF Request Bandwidth */ - fixed20_12 disp_clk_request_efficiency; /* 0.8 */ - fixed20_12 disp_clk, bandwidth; - fixed20_12 a, b; - - a.full = dfixed_const(1000); - disp_clk.full = dfixed_const(wm->disp_clk); - disp_clk.full = dfixed_div(disp_clk, a); - a.full = dfixed_const(32); - b.full = dfixed_mul(a, disp_clk); - - a.full = dfixed_const(10); - disp_clk_request_efficiency.full = dfixed_const(8); - disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); - - bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_available_bandwidth - get the min available bandwidth - * - * @wm: watermark calculation data - * - * Calculate the min available bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the min available bandwidth in MBytes/s - */ -static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ - u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm); - u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm); - u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm); - - return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); -} - -/** - * dce_v11_0_average_bandwidth - get the average available bandwidth - * - * @wm: watermark calculation data - * - * Calculate the average available bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the average available bandwidth in MBytes/s - */ -static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the display mode Average Bandwidth - * DisplayMode should contain the source and destination dimensions, - * timing, etc. - */ - fixed20_12 bpp; - fixed20_12 line_time; - fixed20_12 src_width; - fixed20_12 bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - line_time.full = dfixed_const(wm->active_time + wm->blank_time); - line_time.full = dfixed_div(line_time, a); - bpp.full = dfixed_const(wm->bytes_per_pixel); - src_width.full = dfixed_const(wm->src_width); - bandwidth.full = dfixed_mul(src_width, bpp); - bandwidth.full = dfixed_mul(bandwidth, wm->vsc); - bandwidth.full = dfixed_div(bandwidth, line_time); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_latency_watermark - get the latency watermark - * - * @wm: watermark calculation data - * - * Calculate the latency watermark (CIK). - * Used for display watermark bandwidth calculations - * Returns the latency watermark in ns - */ -static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm) -{ - /* First calculate the latency in ns */ - u32 mc_latency = 2000; /* 2000 ns. */ - u32 available_bandwidth = dce_v11_0_available_bandwidth(wm); - u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; - u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; - u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ - u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + - (wm->num_heads * cursor_line_pair_return_time); - u32 latency = mc_latency + other_heads_data_return_time + dc_latency; - u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; - u32 tmp, dmif_size = 12288; - fixed20_12 a, b, c; - - if (wm->num_heads == 0) - return 0; - - a.full = dfixed_const(2); - b.full = dfixed_const(1); - if ((wm->vsc.full > a.full) || - ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || - (wm->vtaps >= 5) || - ((wm->vsc.full >= a.full) && wm->interlaced)) - max_src_lines_per_dst_line = 4; - else - max_src_lines_per_dst_line = 2; - - a.full = dfixed_const(available_bandwidth); - b.full = dfixed_const(wm->num_heads); - a.full = dfixed_div(a, b); - tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); - tmp = min(dfixed_trunc(a), tmp); - - lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); - - a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); - b.full = dfixed_const(1000); - c.full = dfixed_const(lb_fill_bw); - b.full = dfixed_div(c, b); - a.full = dfixed_div(a, b); - line_fill_time = dfixed_trunc(a); - - if (line_fill_time < wm->active_time) - return latency; - else - return latency + (line_fill_time - wm->active_time); - -} - -/** - * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check - * average and available dram bandwidth - * - * @wm: watermark calculation data - * - * Check if the display average bandwidth fits in the display - * dram bandwidth (CIK). - * Used for display watermark bandwidth calculations - * Returns true if the display fits, false if not. - */ -static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm) -{ - if (dce_v11_0_average_bandwidth(wm) <= - (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads)) - return true; - else - return false; -} - -/** - * dce_v11_0_average_bandwidth_vs_available_bandwidth - check - * average and available bandwidth - * - * @wm: watermark calculation data - * - * Check if the display average bandwidth fits in the display - * available bandwidth (CIK). - * Used for display watermark bandwidth calculations - * Returns true if the display fits, false if not. - */ -static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm) -{ - if (dce_v11_0_average_bandwidth(wm) <= - (dce_v11_0_available_bandwidth(wm) / wm->num_heads)) - return true; - else - return false; -} - -/** - * dce_v11_0_check_latency_hiding - check latency hiding - * - * @wm: watermark calculation data - * - * Check latency hiding (CIK). - * Used for display watermark bandwidth calculations - * Returns true if the display fits, false if not. - */ -static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm) -{ - u32 lb_partitions = wm->lb_size / wm->src_width; - u32 line_time = wm->active_time + wm->blank_time; - u32 latency_tolerant_lines; - u32 latency_hiding; - fixed20_12 a; - - a.full = dfixed_const(1); - if (wm->vsc.full > a.full) - latency_tolerant_lines = 1; - else { - if (lb_partitions <= (wm->vtaps + 1)) - latency_tolerant_lines = 1; - else - latency_tolerant_lines = 2; - } - - latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); - - if (dce_v11_0_latency_watermark(wm) <= latency_hiding) - return true; - else - return false; -} - -/** - * dce_v11_0_program_watermarks - program display watermarks - * - * @adev: amdgpu_device pointer - * @amdgpu_crtc: the selected display controller - * @lb_size: line buffer size - * @num_heads: number of display controllers in use - * - * Calculate and program the display watermarks for the - * selected display controller (CIK). - */ -static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, - struct amdgpu_crtc *amdgpu_crtc, - u32 lb_size, u32 num_heads) -{ - struct drm_display_mode *mode = &amdgpu_crtc->base.mode; - struct dce10_wm_params wm_low, wm_high; - u32 active_time; - u32 line_time = 0; - u32 latency_watermark_a = 0, latency_watermark_b = 0; - u32 tmp, wm_mask, lb_vblank_lead_lines = 0; - - if (amdgpu_crtc->base.enabled && num_heads && mode) { - active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000, - (u32)mode->clock); - line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, - (u32)mode->clock); - line_time = min_t(u32, line_time, 65535); - - /* watermark for high clocks */ - if (adev->pm.dpm_enabled) { - wm_high.yclk = - amdgpu_dpm_get_mclk(adev, false) * 10; - wm_high.sclk = - amdgpu_dpm_get_sclk(adev, false) * 10; - } else { - wm_high.yclk = adev->pm.current_mclk * 10; - wm_high.sclk = adev->pm.current_sclk * 10; - } - - wm_high.disp_clk = mode->clock; - wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = active_time; - wm_high.blank_time = line_time - wm_high.active_time; - wm_high.interlaced = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm_high.interlaced = true; - wm_high.vsc = amdgpu_crtc->vsc; - wm_high.vtaps = 1; - if (amdgpu_crtc->rmx_type != RMX_OFF) - wm_high.vtaps = 2; - wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm_high.lb_size = lb_size; - wm_high.dram_channels = cik_get_number_of_dram_channels(adev); - wm_high.num_heads = num_heads; - - /* set for high clocks */ - latency_watermark_a = min_t(u32, dce_v11_0_latency_watermark(&wm_high), 65535); - - /* possibly force display priority to high */ - /* should really do this at mode validation time... */ - if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || - !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) || - !dce_v11_0_check_latency_hiding(&wm_high) || - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } - - /* watermark for low clocks */ - if (adev->pm.dpm_enabled) { - wm_low.yclk = - amdgpu_dpm_get_mclk(adev, true) * 10; - wm_low.sclk = - amdgpu_dpm_get_sclk(adev, true) * 10; - } else { - wm_low.yclk = adev->pm.current_mclk * 10; - wm_low.sclk = adev->pm.current_sclk * 10; - } - - wm_low.disp_clk = mode->clock; - wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = active_time; - wm_low.blank_time = line_time - wm_low.active_time; - wm_low.interlaced = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm_low.interlaced = true; - wm_low.vsc = amdgpu_crtc->vsc; - wm_low.vtaps = 1; - if (amdgpu_crtc->rmx_type != RMX_OFF) - wm_low.vtaps = 2; - wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm_low.lb_size = lb_size; - wm_low.dram_channels = cik_get_number_of_dram_channels(adev); - wm_low.num_heads = num_heads; - - /* set for low clocks */ - latency_watermark_b = min_t(u32, dce_v11_0_latency_watermark(&wm_low), 65535); - - /* possibly force display priority to high */ - /* should really do this at mode validation time... */ - if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || - !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) || - !dce_v11_0_check_latency_hiding(&wm_low) || - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } - lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); - } - - /* select wm A */ - wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1); - WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp); - tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time); - WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* select wm B */ - tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2); - WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp); - tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time); - WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* restore original selection */ - WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask); - - /* save values for DPM */ - amdgpu_crtc->line_time = line_time; - - /* Save number of lines the linebuffer leads before the scanout */ - amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; -} - -/** - * dce_v11_0_bandwidth_update - program display watermarks - * - * @adev: amdgpu_device pointer - * - * Calculate and program the display watermarks and line - * buffer allocation (CIK). - */ -static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev) -{ - struct drm_display_mode *mode = NULL; - u32 num_heads = 0, lb_size; - int i; - - amdgpu_display_update_priority(adev); - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->mode_info.crtcs[i]->base.enabled) - num_heads++; - } - for (i = 0; i < adev->mode_info.num_crtc; i++) { - mode = &adev->mode_info.crtcs[i]->base.mode; - lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode); - dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i], - lb_size, num_heads); - } -} - -static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev) -{ - int i; - u32 offset, tmp; - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - offset = adev->mode_info.audio.pin[i].offset; - tmp = RREG32_AUDIO_ENDPT(offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); - if (((tmp & - AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >> - AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1) - adev->mode_info.audio.pin[i].connected = false; - else - adev->mode_info.audio.pin[i].connected = true; - } -} - -static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev) -{ - int i; - - dce_v11_0_audio_get_connected_pins(adev); - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - if (adev->mode_info.audio.pin[i].connected) - return &adev->mode_info.audio.pin[i]; - } - DRM_ERROR("No connected audio pins found!\n"); - return NULL; -} - -static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder) -{ - struct amdgpu_device *adev = drm_to_adev(encoder->dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 tmp; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id); - WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp); -} - -static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, - struct drm_display_mode *mode) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *amdgpu_connector = NULL; - u32 tmp; - int interlace = 0; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - if (connector->encoder == encoder) { - amdgpu_connector = to_amdgpu_connector(connector); - break; - } - } - drm_connector_list_iter_end(&iter); - - if (!amdgpu_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return; - } - - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - interlace = 1; - if (connector->latency_present[interlace]) { - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - VIDEO_LIPSYNC, connector->video_latency[interlace]); - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - AUDIO_LIPSYNC, connector->audio_latency[interlace]); - } else { - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - VIDEO_LIPSYNC, 0); - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - AUDIO_LIPSYNC, 0); - } - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); -} - -static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *amdgpu_connector = NULL; - u32 tmp; - u8 *sadb = NULL; - int sad_count; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - if (connector->encoder == encoder) { - amdgpu_connector = to_amdgpu_connector(connector); - break; - } - } - drm_connector_list_iter_end(&iter); - - if (!amdgpu_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return; - } - - sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb); - if (sad_count < 0) { - DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); - sad_count = 0; - } - - /* program the speaker allocation */ - tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - DP_CONNECTION, 0); - /* set HDMI mode */ - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - HDMI_CONNECTION, 1); - if (sad_count) - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - SPEAKER_ALLOCATION, sadb[0]); - else - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - SPEAKER_ALLOCATION, 5); /* stereo */ - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); - - kfree(sadb); -} - -static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *amdgpu_connector = NULL; - struct cea_sad *sads; - int i, sad_count; - - static const u16 eld_reg_to_type[][2] = { - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, - }; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - if (connector->encoder == encoder) { - amdgpu_connector = to_amdgpu_connector(connector); - break; - } - } - drm_connector_list_iter_end(&iter); - - if (!amdgpu_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return; - } - - sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads); - if (sad_count < 0) - DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) - return; - BUG_ON(!sads); - - for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { - u32 tmp = 0; - u8 stereo_freqs = 0; - int max_channels = -1; - int j; - - for (j = 0; j < sad_count; j++) { - struct cea_sad *sad = &sads[j]; - - if (sad->format == eld_reg_to_type[i][1]) { - if (sad->channels > max_channels) { - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - MAX_CHANNELS, sad->channels); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - DESCRIPTOR_BYTE_2, sad->byte2); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES, sad->freq); - max_channels = sad->channels; - } - - if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) - stereo_freqs |= sad->freq; - else - break; - } - } - - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); - } - - kfree(sads); -} - -static void dce_v11_0_audio_enable(struct amdgpu_device *adev, - struct amdgpu_audio_pin *pin, - bool enable) -{ - if (!pin) - return; - - WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); -} - -static const u32 pin_offsets[] = -{ - AUD0_REGISTER_OFFSET, - AUD1_REGISTER_OFFSET, - AUD2_REGISTER_OFFSET, - AUD3_REGISTER_OFFSET, - AUD4_REGISTER_OFFSET, - AUD5_REGISTER_OFFSET, - AUD6_REGISTER_OFFSET, - AUD7_REGISTER_OFFSET, -}; - -static int dce_v11_0_audio_init(struct amdgpu_device *adev) -{ - int i; - - if (!amdgpu_audio) - return 0; - - adev->mode_info.audio.enabled = true; - - switch (adev->asic_type) { - case CHIP_CARRIZO: - case CHIP_STONEY: - adev->mode_info.audio.num_pins = 7; - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - adev->mode_info.audio.num_pins = 8; - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - adev->mode_info.audio.num_pins = 6; - break; - default: - return -EINVAL; - } - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - adev->mode_info.audio.pin[i].channels = -1; - adev->mode_info.audio.pin[i].rate = -1; - adev->mode_info.audio.pin[i].bits_per_sample = -1; - adev->mode_info.audio.pin[i].status_bits = 0; - adev->mode_info.audio.pin[i].category_code = 0; - adev->mode_info.audio.pin[i].connected = false; - adev->mode_info.audio.pin[i].offset = pin_offsets[i]; - adev->mode_info.audio.pin[i].id = i; - /* disable audio. it will be set up later */ - /* XXX remove once we switch to ip funcs */ - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - } - - return 0; -} - -static void dce_v11_0_audio_fini(struct amdgpu_device *adev) -{ - if (!amdgpu_audio) - return; - - if (!adev->mode_info.audio.enabled) - return; - - adev->mode_info.audio.enabled = false; -} - -/* - * update the N and CTS parameters for a given pixel clock rate - */ -static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 tmp; - - tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz); - WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp); - tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz); - WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz); - WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp); - tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz); - WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz); - WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp); - tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz); - WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp); - -} - -/* - * build a HDMI Video Info Frame - */ -static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, - void *buffer, size_t size) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - uint8_t *frame = buffer + 3; - uint8_t *header = buffer; - - WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset, - frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); - WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset, - frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24)); - WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset, - frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24)); - WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset, - frame[0xC] | (frame[0xD] << 8) | (header[1] << 24)); -} - -static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); - u32 dto_phase = 24 * 1000; - u32 dto_modulo = clock; - u32 tmp; - - if (!dig || !dig->afmt) - return; - - /* XXX two dtos; generally use dto0 for hdmi */ - /* Express [24MHz / target pixel clock] as an exact rational - * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE - * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator - */ - tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE); - tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, - amdgpu_crtc->crtc_id); - WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp); - WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase); - WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo); -} - -/* - * update the info frames with the data from the current display mode - */ -static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder, - struct drm_display_mode *mode) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); - u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; - struct hdmi_avi_infoframe frame; - ssize_t err; - u32 tmp; - int bpc = 8; - - if (!dig || !dig->afmt) - return; - - /* Silent, r600_hdmi_enable will raise WARN for us */ - if (!dig->afmt->enabled) - return; - - /* hdmi deep color mode general control packets setup, if bpc > 8 */ - if (encoder->crtc) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); - bpc = amdgpu_crtc->bpc; - } - - /* disable audio prior to setting up hw */ - dig->afmt->pin = dce_v11_0_audio_get_pin(adev); - dce_v11_0_audio_enable(adev, dig->afmt->pin, false); - - dce_v11_0_audio_set_dto(encoder, mode->clock); - - tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); - WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */ - - WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000); - - tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset); - switch (bpc) { - case 0: - case 6: - case 8: - case 16: - default: - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0); - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0); - DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n", - connector->name, bpc); - break; - case 10: - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1); - DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n", - connector->name); - break; - case 12: - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2); - DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n", - connector->name); - break; - } - WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */ - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */ - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */ - WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); - /* enable audio info frames (frames won't be set until audio is enabled) */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); - /* required for audio info values to be updated */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1); - WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset); - /* required for audio info values to be updated */ - tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); - WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); - /* anything other than 0 */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2); - WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); - - WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */ - - tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset); - /* set the default audio delay */ - tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); - /* should be suffient for all audio modes and small enough for all hblanks */ - tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3); - WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); - /* allow 60958 channel status fields to be updated */ - tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); - WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset); - if (bpc > 8) - /* clear SW CTS value */ - tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0); - else - /* select SW CTS value */ - tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1); - /* allow hw to sent ACR packets when required */ - tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1); - WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp); - - dce_v11_0_afmt_update_ACR(encoder, mode->clock); - - tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1); - WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); - WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8); - WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp); - - dce_v11_0_audio_write_speaker_allocation(encoder); - - WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, - (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT)); - - dce_v11_0_afmt_audio_select_pin(encoder); - dce_v11_0_audio_write_sad_regs(encoder); - dce_v11_0_audio_write_latency_fields(encoder, mode); - - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); - if (err < 0) { - DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); - return; - } - - err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); - if (err < 0) { - DRM_ERROR("failed to pack AVI infoframe: %zd\n", err); - return; - } - - dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer)); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); - /* enable AVI info frames */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1); - /* required for audio info values to be updated */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1); - WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2); - WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); - /* send audio packets */ - tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); - WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); - - WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF); - WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF); - WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001); - WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001); - - /* enable audio after to setting up hw */ - dce_v11_0_audio_enable(adev, dig->afmt->pin, true); -} - -static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - - if (!dig || !dig->afmt) - return; - - /* Silent, r600_hdmi_enable will raise WARN for us */ - if (enable && dig->afmt->enabled) - return; - if (!enable && !dig->afmt->enabled) - return; - - if (!enable && dig->afmt->pin) { - dce_v11_0_audio_enable(adev, dig->afmt->pin, false); - dig->afmt->pin = NULL; - } - - dig->afmt->enabled = enable; - - DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n", - enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id); -} - -static int dce_v11_0_afmt_init(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->mode_info.num_dig; i++) - adev->mode_info.afmt[i] = NULL; - - /* DCE11 has audio blocks tied to DIG encoders */ - for (i = 0; i < adev->mode_info.num_dig; i++) { - adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL); - if (adev->mode_info.afmt[i]) { - adev->mode_info.afmt[i]->offset = dig_offsets[i]; - adev->mode_info.afmt[i]->id = i; - } else { - int j; - for (j = 0; j < i; j++) { - kfree(adev->mode_info.afmt[j]); - adev->mode_info.afmt[j] = NULL; - } - return -ENOMEM; - } - } - return 0; -} - -static void dce_v11_0_afmt_fini(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->mode_info.num_dig; i++) { - kfree(adev->mode_info.afmt[i]); - adev->mode_info.afmt[i] = NULL; - } -} - -static const u32 vga_control_regs[6] = -{ - mmD1VGA_CONTROL, - mmD2VGA_CONTROL, - mmD3VGA_CONTROL, - mmD4VGA_CONTROL, - mmD5VGA_CONTROL, - mmD6VGA_CONTROL, -}; - -static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - u32 vga_control; - - vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1; - if (enable) - WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1); - else - WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control); -} - -static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - - if (enable) - WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1); - else - WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0); -} - -static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, int atomic) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct drm_framebuffer *target_fb; - struct drm_gem_object *obj; - struct amdgpu_bo *abo; - uint64_t fb_location, tiling_flags; - uint32_t fb_format, fb_pitch_pixels; - u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE); - u32 pipe_config; - u32 tmp, viewport_w, viewport_h; - int r; - bool bypass_lut = false; - - /* no fb bound */ - if (!atomic && !crtc->primary->fb) { - DRM_DEBUG_KMS("No FB bound\n"); - return 0; - } - - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; - - /* If atomic, assume fb object is pinned & idle & fenced and - * just update base pointers - */ - obj = target_fb->obj[0]; - abo = gem_to_amdgpu_bo(obj); - r = amdgpu_bo_reserve(abo, false); - if (unlikely(r != 0)) - return r; - - if (!atomic) { - abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(abo); - return -EINVAL; - } - } - fb_location = amdgpu_bo_gpu_offset(abo); - - amdgpu_bo_get_tiling_flags(abo, &tiling_flags); - amdgpu_bo_unreserve(abo); - - pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); - - switch (target_fb->format->format) { - case DRM_FORMAT_C8: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); - break; - case DRM_FORMAT_XRGB4444: - case DRM_FORMAT_ARGB4444: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_XRGB1555: - case DRM_FORMAT_ARGB1555: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_BGRX5551: - case DRM_FORMAT_BGRA5551: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_RGB565: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - break; - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ - bypass_lut = true; - break; - case DRM_FORMAT_BGRX1010102: - case DRM_FORMAT_BGRA1010102: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ - bypass_lut = true; - break; - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2); - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - break; - default: - DRM_ERROR("Unsupported screen format %p4cc\n", - &target_fb->format->format); - return -EINVAL; - } - - if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) { - unsigned bankw, bankh, mtaspect, tile_split, num_banks; - - bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); - bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); - mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); - tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); - num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE, - ARRAY_2D_TILED_THIN1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT, - tile_split); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT, - mtaspect); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE, - ADDR_SURF_MICRO_TILING_DISPLAY); - } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) { - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE, - ARRAY_1D_TILED_THIN1); - } - - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG, - pipe_config); - - dce_v11_0_vga_enable(crtc, false); - - /* Make sure surface address is updated at vertical blank rather than - * horizontal blank - */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, - GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0); - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(fb_location)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(fb_location)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK); - WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); - WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap); - - /* - * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT - * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to - * retain the full precision throughout the pipeline. - */ - tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset); - if (bypass_lut) - tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1); - else - tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0); - WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp); - - if (bypass_lut) - DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n"); - - WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width); - WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height); - - fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0]; - WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels); - - dce_v11_0_grph_enable(crtc, true); - - WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset, - target_fb->height); - - x &= ~3; - y &= ~1; - WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset, - (x << 16) | y); - viewport_w = crtc->mode.hdisplay; - viewport_h = (crtc->mode.vdisplay + 1) & ~1; - WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset, - (viewport_w << 16) | viewport_h); - - /* set pageflip to happen anywhere in vblank interval */ - WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); - - if (!atomic && fb && fb != crtc->primary->fb) { - abo = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r != 0)) - return r; - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - - /* Bytes per pixel may have changed */ - dce_v11_0_bandwidth_update(adev); - - return 0; -} - -static void dce_v11_0_set_interleave(struct drm_crtc *crtc, - struct drm_display_mode *mode) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - u32 tmp; - - tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1); - else - tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0); - WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp); -} - -static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - u16 *r, *g, *b; - int i; - u32 tmp; - - DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); - - tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0); - WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1); - WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0); - WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0); - - WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0); - WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0); - WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0); - - WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff); - WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff); - WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff); - - WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0); - WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); - - WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); - r = crtc->gamma_store; - g = r + crtc->gamma_size; - b = g + crtc->gamma_size; - for (i = 0; i < 256; i++) { - WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - ((*r++ & 0xffc0) << 14) | - ((*g++ & 0xffc0) << 4) | - (*b++ >> 6)); - } - - tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0); - tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0); - tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0); - WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0); - WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0); - WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0); - WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - /* XXX match this to the depth of the crtc fmt block, move to modeset? */ - WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0); - /* XXX this only needs to be programmed once per crtc at startup, - * not sure where the best place for it is - */ - tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1); - WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - -static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - - switch (amdgpu_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - if (dig->linkb) - return 1; - else - return 0; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - if (dig->linkb) - return 3; - else - return 2; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - if (dig->linkb) - return 5; - else - return 4; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: - return 6; - default: - DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); - return 0; - } -} - -/** - * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc. - * - * @crtc: drm crtc - * - * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors - * a single PPLL can be used for all DP crtcs/encoders. For non-DP - * monitors a dedicated PPLL must be used. If a particular board has - * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming - * as there is no need to program the PLL itself. If we are not able to - * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to - * avoid messing up an existing monitor. - * - * Asic specific PLL information - * - * DCE 10.x - * Tonga - * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) - * CI - * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC - * - */ -static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - u32 pll_in_use; - int pll; - - if ((adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) { - struct amdgpu_encoder *amdgpu_encoder = - to_amdgpu_encoder(amdgpu_crtc->encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - - if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) - return ATOM_DP_DTO; - - switch (amdgpu_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - if (dig->linkb) - return ATOM_COMBOPHY_PLL1; - else - return ATOM_COMBOPHY_PLL0; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - if (dig->linkb) - return ATOM_COMBOPHY_PLL3; - else - return ATOM_COMBOPHY_PLL2; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - if (dig->linkb) - return ATOM_COMBOPHY_PLL5; - else - return ATOM_COMBOPHY_PLL4; - default: - DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); - return ATOM_PPLL_INVALID; - } - } - - if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) { - if (adev->clock.dp_extclk) - /* skip PPLL programming if using ext clock */ - return ATOM_PPLL_INVALID; - else { - /* use the same PPLL for all DP monitors */ - pll = amdgpu_pll_get_shared_dp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } - } else { - /* use the same PPLL for all monitors with the same clock */ - pll = amdgpu_pll_get_shared_nondp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } - - /* XXX need to determine what plls are available on each DCE11 part */ - pll_in_use = amdgpu_pll_get_use_mask(crtc); - if (adev->flags & AMD_IS_APU) { - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - if (!(pll_in_use & (1 << ATOM_PPLL0))) - return ATOM_PPLL0; - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; - } else { - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - if (!(pll_in_use & (1 << ATOM_PPLL0))) - return ATOM_PPLL0; - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; - } - return ATOM_PPLL_INVALID; -} - -static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock) -{ - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - uint32_t cur_lock; - - cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset); - if (lock) - cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1); - else - cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0); - WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock); -} - -static void dce_v11_0_hide_cursor(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - u32 tmp; - - tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0); - WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - -static void dce_v11_0_show_cursor(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - u32 tmp; - - WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(amdgpu_crtc->cursor_addr)); - WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - lower_32_bits(amdgpu_crtc->cursor_addr)); - - tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1); - tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2); - WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - -static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, - int x, int y) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - int xorigin = 0, yorigin = 0; - - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - - /* avivo cursor are offset into the total surface */ - x += crtc->x; - y += crtc->y; - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - - if (x < 0) { - xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { - yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); - y = 0; - } - - WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); - WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - - return 0; -} - -static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc, - int x, int y) -{ - int ret; - - dce_v11_0_lock_cursor(crtc, true); - ret = dce_v11_0_cursor_move_locked(crtc, x, y); - dce_v11_0_lock_cursor(crtc, false); - - return ret; -} - -static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, - struct drm_file *file_priv, - uint32_t handle, - uint32_t width, - uint32_t height, - int32_t hot_x, - int32_t hot_y) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_gem_object *obj; - struct amdgpu_bo *aobj; - int ret; - - if (!handle) { - /* turn off cursor */ - dce_v11_0_hide_cursor(crtc); - obj = NULL; - goto unpin; - } - - if ((width > amdgpu_crtc->max_cursor_width) || - (height > amdgpu_crtc->max_cursor_height)) { - DRM_ERROR("bad cursor width or height %d x %d\n", width, height); - return -EINVAL; - } - - obj = drm_gem_object_lookup(file_priv, handle); - if (!obj) { - DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id); - return -ENOENT; - } - - aobj = gem_to_amdgpu_bo(obj); - ret = amdgpu_bo_reserve(aobj, false); - if (ret != 0) { - drm_gem_object_put(obj); - return ret; - } - - aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); - amdgpu_bo_unreserve(aobj); - if (ret) { - DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_put(obj); - return ret; - } - amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); - - dce_v11_0_lock_cursor(crtc, true); - - if (width != amdgpu_crtc->cursor_width || - height != amdgpu_crtc->cursor_height || - hot_x != amdgpu_crtc->cursor_hot_x || - hot_y != amdgpu_crtc->cursor_hot_y) { - int x, y; - - x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x; - y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y; - - dce_v11_0_cursor_move_locked(crtc, x, y); - - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - amdgpu_crtc->cursor_hot_x = hot_x; - amdgpu_crtc->cursor_hot_y = hot_y; - } - - dce_v11_0_show_cursor(crtc); - dce_v11_0_lock_cursor(crtc, false); - -unpin: - if (amdgpu_crtc->cursor_bo) { - struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - ret = amdgpu_bo_reserve(aobj, true); - if (likely(ret == 0)) { - amdgpu_bo_unpin(aobj); - amdgpu_bo_unreserve(aobj); - } - drm_gem_object_put(amdgpu_crtc->cursor_bo); - } - - amdgpu_crtc->cursor_bo = obj; - return 0; -} - -static void dce_v11_0_cursor_reset(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - if (amdgpu_crtc->cursor_bo) { - dce_v11_0_lock_cursor(crtc, true); - - dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x, - amdgpu_crtc->cursor_y); - - dce_v11_0_show_cursor(crtc); - - dce_v11_0_lock_cursor(crtc, false); - } -} - -static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size, - struct drm_modeset_acquire_ctx *ctx) -{ - dce_v11_0_crtc_load_lut(crtc); - - return 0; -} - -static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - drm_crtc_cleanup(crtc); - kfree(amdgpu_crtc); -} - -static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = { - .cursor_set2 = dce_v11_0_crtc_cursor_set2, - .cursor_move = dce_v11_0_crtc_cursor_move, - .gamma_set = dce_v11_0_crtc_gamma_set, - .set_config = amdgpu_display_crtc_set_config, - .destroy = dce_v11_0_crtc_destroy, - .page_flip_target = amdgpu_display_crtc_page_flip_target, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = amdgpu_enable_vblank_kms, - .disable_vblank = amdgpu_disable_vblank_kms, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -}; - -static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - unsigned type; - - switch (mode) { - case DRM_MODE_DPMS_ON: - amdgpu_crtc->enabled = true; - amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE); - dce_v11_0_vga_enable(crtc, true); - amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); - dce_v11_0_vga_enable(crtc, false); - /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_display_crtc_idx_to_irq_type(adev, - amdgpu_crtc->crtc_id); - amdgpu_irq_update(adev, &adev->crtc_irq, type); - amdgpu_irq_update(adev, &adev->pageflip_irq, type); - drm_crtc_vblank_on(crtc); - dce_v11_0_crtc_load_lut(crtc); - break; - case DRM_MODE_DPMS_STANDBY: - case DRM_MODE_DPMS_SUSPEND: - case DRM_MODE_DPMS_OFF: - drm_crtc_vblank_off(crtc); - if (amdgpu_crtc->enabled) { - dce_v11_0_vga_enable(crtc, true); - amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE); - dce_v11_0_vga_enable(crtc, false); - } - amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE); - amdgpu_crtc->enabled = false; - break; - } - /* adjust pm to dpms */ - amdgpu_dpm_compute_clocks(adev); -} - -static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc) -{ - /* disable crtc pair power gating before programming */ - amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE); - amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE); - dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); -} - -static void dce_v11_0_crtc_commit(struct drm_crtc *crtc) -{ - dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON); - amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE); -} - -static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_atom_ss ss; - int i; - - dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - if (crtc->primary->fb) { - int r; - struct amdgpu_bo *abo; - - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r)) - DRM_ERROR("failed to reserve abo before unpin\n"); - else { - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - } - /* disable the GRPH */ - dce_v11_0_grph_enable(crtc, false); - - amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE); - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->mode_info.crtcs[i] && - adev->mode_info.crtcs[i]->enabled && - i != amdgpu_crtc->crtc_id && - amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) { - /* one other crtc is using this pll don't turn - * off the pll - */ - goto done; - } - } - - switch (amdgpu_crtc->pll_id) { - case ATOM_PPLL0: - case ATOM_PPLL1: - case ATOM_PPLL2: - /* disable the ppll */ - amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id, - 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); - break; - case ATOM_COMBOPHY_PLL0: - case ATOM_COMBOPHY_PLL1: - case ATOM_COMBOPHY_PLL2: - case ATOM_COMBOPHY_PLL3: - case ATOM_COMBOPHY_PLL4: - case ATOM_COMBOPHY_PLL5: - /* disable the ppll */ - amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id, - 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); - break; - default: - break; - } -done: - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->adjusted_clock = 0; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; -} - -static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, struct drm_framebuffer *old_fb) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - - if (!amdgpu_crtc->adjusted_clock) - return -EINVAL; - - if ((adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) { - struct amdgpu_encoder *amdgpu_encoder = - to_amdgpu_encoder(amdgpu_crtc->encoder); - int encoder_mode = - amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder); - - /* SetPixelClock calculates the plls and ss values now */ - amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, - amdgpu_crtc->pll_id, - encoder_mode, amdgpu_encoder->encoder_id, - adjusted_mode->clock, 0, 0, 0, 0, - amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss); - } else { - amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); - } - amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode); - dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0); - amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); - amdgpu_atombios_crtc_scaler_setup(crtc); - dce_v11_0_cursor_reset(crtc); - /* update the hw version fpr dpm */ - amdgpu_crtc->hw_mode = *adjusted_mode; - - return 0; -} - -static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_encoder *encoder; - - /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - amdgpu_crtc->encoder = encoder; - amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder); - break; - } - } - if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) { - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; - return false; - } - if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) - return false; - if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) - return false; - /* pick pll */ - amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc); - /* if we can't get a PPLL for a non-DP encoder, fail */ - if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) && - !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) - return false; - - return true; -} - -static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) -{ - return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1); -} - -static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { - .dpms = dce_v11_0_crtc_dpms, - .mode_fixup = dce_v11_0_crtc_mode_fixup, - .mode_set = dce_v11_0_crtc_mode_set, - .mode_set_base = dce_v11_0_crtc_set_base, - .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic, - .prepare = dce_v11_0_crtc_prepare, - .commit = dce_v11_0_crtc_commit, - .disable = dce_v11_0_crtc_disable, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - -static void dce_v11_0_panic_flush(struct drm_plane *plane) -{ - struct drm_framebuffer *fb; - struct amdgpu_crtc *amdgpu_crtc; - struct amdgpu_device *adev; - uint32_t fb_format; - - if (!plane->fb) - return; - - fb = plane->fb; - amdgpu_crtc = to_amdgpu_crtc(plane->crtc); - adev = drm_to_adev(fb->dev); - - /* Disable DC tiling */ - fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); - fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; - WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); - -} - -static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = { - .get_scanout_buffer = amdgpu_display_get_scanout_buffer, - .panic_flush = dce_v11_0_panic_flush, -}; - -static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) -{ - struct amdgpu_crtc *amdgpu_crtc; - - amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + - (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); - if (amdgpu_crtc == NULL) - return -ENOMEM; - - drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs); - - drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); - amdgpu_crtc->crtc_id = index; - adev->mode_info.crtcs[index] = amdgpu_crtc; - - amdgpu_crtc->max_cursor_width = 128; - amdgpu_crtc->max_cursor_height = 128; - adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; - adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - - switch (amdgpu_crtc->crtc_id) { - case 0: - default: - amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET; - break; - case 1: - amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET; - break; - case 2: - amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET; - break; - case 3: - amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET; - break; - case 4: - amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET; - break; - case 5: - amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET; - break; - } - - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->adjusted_clock = 0; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; - drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs); - drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs); - - return 0; -} - -static int dce_v11_0_early_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg; - adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg; - - dce_v11_0_set_display_funcs(adev); - - adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev); - - switch (adev->asic_type) { - case CHIP_CARRIZO: - adev->mode_info.num_hpd = 6; - adev->mode_info.num_dig = 9; - break; - case CHIP_STONEY: - adev->mode_info.num_hpd = 6; - adev->mode_info.num_dig = 9; - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - adev->mode_info.num_hpd = 6; - adev->mode_info.num_dig = 6; - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - adev->mode_info.num_hpd = 5; - adev->mode_info.num_dig = 5; - break; - default: - /* FIXME: not supported yet */ - return -EINVAL; - } - - dce_v11_0_set_irq_funcs(adev); - - return 0; -} - -static int dce_v11_0_sw_init(struct amdgpu_ip_block *ip_block) -{ - int r, i; - struct amdgpu_device *adev = ip_block->adev; - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); - if (r) - return r; - } - - for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); - if (r) - return r; - } - - /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); - if (r) - return r; - - adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs; - - adev_to_drm(adev)->mode_config.async_page_flip = true; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; - - adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - - r = amdgpu_display_modeset_create_props(adev); - if (r) - return r; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - - /* allocate crtcs */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = dce_v11_0_crtc_init(adev, i); - if (r) - return r; - } - - if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_display_print_display_setup(adev_to_drm(adev)); - else - return -EINVAL; - - /* setup afmt */ - r = dce_v11_0_afmt_init(adev); - if (r) - return r; - - r = dce_v11_0_audio_init(adev); - if (r) - return r; - - /* Disable vblank IRQs aggressively for power-saving */ - /* XXX: can this be enabled for DC? */ - adev_to_drm(adev)->vblank_disable_immediate = true; - - r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); - if (r) - return r; - - INIT_DELAYED_WORK(&adev->hotplug_work, - amdgpu_display_hotplug_work_func); - - drm_kms_helper_poll_init(adev_to_drm(adev)); - - adev->mode_info.mode_config_initialized = true; - return 0; -} - -static int dce_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - drm_edid_free(adev->mode_info.bios_hardcoded_edid); - - drm_kms_helper_poll_fini(adev_to_drm(adev)); - - dce_v11_0_audio_fini(adev); - - dce_v11_0_afmt_fini(adev); - - drm_mode_config_cleanup(adev_to_drm(adev)); - adev->mode_info.mode_config_initialized = false; - - return 0; -} - -static int dce_v11_0_hw_init(struct amdgpu_ip_block *ip_block) -{ - int i; - struct amdgpu_device *adev = ip_block->adev; - - dce_v11_0_init_golden_registers(adev); - - /* disable vga render */ - dce_v11_0_set_vga_render_state(adev, false); - /* init dig PHYs, disp eng pll */ - amdgpu_atombios_crtc_powergate_init(adev); - amdgpu_atombios_encoder_init_dig(adev); - if ((adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) { - amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, - DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); - amdgpu_atombios_crtc_set_dce_clock(adev, 0, - DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS); - } else { - amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); - } - - /* initialize hpd */ - dce_v11_0_hpd_init(adev); - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - } - - dce_v11_0_pageflip_interrupt_init(adev); - - return 0; -} - -static int dce_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) -{ - int i; - struct amdgpu_device *adev = ip_block->adev; - - dce_v11_0_hpd_fini(adev); - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - } - - dce_v11_0_pageflip_interrupt_fini(adev); - - flush_delayed_work(&adev->hotplug_work); - - return 0; -} - -static int dce_v11_0_suspend(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int r; - - r = amdgpu_display_suspend_helper(adev); - if (r) - return r; - - adev->mode_info.bl_level = - amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - - return dce_v11_0_hw_fini(ip_block); -} - -static int dce_v11_0_resume(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int ret; - - amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, - adev->mode_info.bl_level); - - ret = dce_v11_0_hw_init(ip_block); - - /* turn on the BL */ - if (adev->mode_info.bl_encoder) { - u8 bl_level = amdgpu_display_backlight_get_level(adev, - adev->mode_info.bl_encoder); - amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder, - bl_level); - } - if (ret) - return ret; - - return amdgpu_display_resume_helper(adev); -} - -static bool dce_v11_0_is_idle(struct amdgpu_ip_block *ip_block) -{ - return true; -} - -static int dce_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) -{ - u32 srbm_soft_reset = 0, tmp; - struct amdgpu_device *adev = ip_block->adev; - - if (dce_v11_0_is_display_hung(adev)) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; - - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - /* Wait a little for things to settle down */ - udelay(50); - } - return 0; -} - -static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) -{ - u32 lb_interrupt_mask; - - if (crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VBLANK_INTERRUPT_MASK, 0); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - case AMDGPU_IRQ_STATE_ENABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VBLANK_INTERRUPT_MASK, 1); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - default: - break; - } -} - -static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) -{ - u32 lb_interrupt_mask; - - if (crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VLINE_INTERRUPT_MASK, 0); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - case AMDGPU_IRQ_STATE_ENABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VLINE_INTERRUPT_MASK, 1); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - default: - break; - } -} - -static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned hpd, - enum amdgpu_interrupt_state state) -{ - u32 tmp; - - if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hpd %d\n", hpd); - return 0; - } - - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); - break; - case AMDGPU_IRQ_STATE_ENABLE: - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); - break; - default: - break; - } - - return 0; -} - -static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - switch (type) { - case AMDGPU_CRTC_IRQ_VBLANK1: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK2: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK3: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK4: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK5: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK6: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state); - break; - case AMDGPU_CRTC_IRQ_VLINE1: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state); - break; - case AMDGPU_CRTC_IRQ_VLINE2: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state); - break; - case AMDGPU_CRTC_IRQ_VLINE3: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state); - break; - case AMDGPU_CRTC_IRQ_VLINE4: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state); - break; - case AMDGPU_CRTC_IRQ_VLINE5: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state); - break; - case AMDGPU_CRTC_IRQ_VLINE6: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state); - break; - default: - break; - } - return 0; -} - -static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned type, - enum amdgpu_interrupt_state state) -{ - u32 reg; - - if (type >= adev->mode_info.num_crtc) { - DRM_ERROR("invalid pageflip crtc %d\n", type); - return -EINVAL; - } - - reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]); - if (state == AMDGPU_IRQ_STATE_DISABLE) - WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type], - reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); - else - WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type], - reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); - - return 0; -} - -static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - unsigned long flags; - unsigned crtc_id; - struct amdgpu_crtc *amdgpu_crtc; - struct amdgpu_flip_work *works; - - crtc_id = (entry->src_id - 8) >> 1; - amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; - - if (crtc_id >= adev->mode_info.num_crtc) { - DRM_ERROR("invalid pageflip crtc %d\n", crtc_id); - return -EINVAL; - } - - if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) & - GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK) - WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id], - GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK); - - /* IRQ could occur when in initial stage */ - if(amdgpu_crtc == NULL) - return 0; - - spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){ - DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " - "AMDGPU_FLIP_SUBMITTED(%d)\n", - amdgpu_crtc->pflip_status, - AMDGPU_FLIP_SUBMITTED); - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - return 0; - } - - /* page flip completed. clean up */ - amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; - amdgpu_crtc->pflip_works = NULL; - - /* wakeup usersapce */ - if(works->event) - drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); - - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - - drm_crtc_vblank_put(&amdgpu_crtc->base); - schedule_work(&works->unpin_work); - - return 0; -} - -static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, - int hpd) -{ - u32 tmp; - - if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hpd %d\n", hpd); - return; - } - - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); -} - -static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev, - int crtc) -{ - u32 tmp; - - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]); - tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1); - WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp); -} - -static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev, - int crtc) -{ - u32 tmp; - - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]); - tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1); - WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp); -} - -static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - unsigned crtc = entry->src_id - 1; - uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, - crtc); - - switch (entry->src_data[0]) { - case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) - dce_v11_0_crtc_vblank_int_ack(adev, crtc); - else - DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); - - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev_to_drm(adev), crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); - - break; - case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) - dce_v11_0_crtc_vline_int_ack(adev, crtc); - else - DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); - - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - - break; - default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); - break; - } - - return 0; -} - -static int dce_v11_0_hpd_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - uint32_t disp_int, mask; - unsigned hpd; - - if (entry->src_data[0] >= adev->mode_info.num_hpd) { - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); - return 0; - } - - hpd = entry->src_data[0]; - disp_int = RREG32(interrupt_status_offsets[hpd].reg); - mask = interrupt_status_offsets[hpd].hpd; - - if (disp_int & mask) { - dce_v11_0_hpd_int_ack(adev, hpd); - schedule_delayed_work(&adev->hotplug_work, 0); - DRM_DEBUG("IH: HPD%d\n", hpd + 1); - } - - return 0; -} - -static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, - enum amd_clockgating_state state) -{ - return 0; -} - -static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, - enum amd_powergating_state state) -{ - return 0; -} - -static const struct amd_ip_funcs dce_v11_0_ip_funcs = { - .name = "dce_v11_0", - .early_init = dce_v11_0_early_init, - .sw_init = dce_v11_0_sw_init, - .sw_fini = dce_v11_0_sw_fini, - .hw_init = dce_v11_0_hw_init, - .hw_fini = dce_v11_0_hw_fini, - .suspend = dce_v11_0_suspend, - .resume = dce_v11_0_resume, - .is_idle = dce_v11_0_is_idle, - .soft_reset = dce_v11_0_soft_reset, - .set_clockgating_state = dce_v11_0_set_clockgating_state, - .set_powergating_state = dce_v11_0_set_powergating_state, -}; - -static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - - amdgpu_encoder->pixel_clock = adjusted_mode->clock; - - /* need to call this here rather than in prepare() since we need some crtc info */ - amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); - - /* set scaler clears this on some chips */ - dce_v11_0_set_interleave(encoder->crtc, mode); - - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { - dce_v11_0_afmt_enable(encoder, true); - dce_v11_0_afmt_setmode(encoder, adjusted_mode); - } -} - -static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder) -{ - struct amdgpu_device *adev = drm_to_adev(encoder->dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); - - if ((amdgpu_encoder->active_device & - (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) || - (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != - ENCODER_OBJECT_ID_NONE)) { - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - if (dig) { - dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder); - if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT) - dig->afmt = adev->mode_info.afmt[dig->dig_encoder]; - } - } - - amdgpu_atombios_scratch_regs_lock(adev, true); - - if (connector) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - - /* select the clock/data port if it uses a router */ - if (amdgpu_connector->router.cd_valid) - amdgpu_i2c_router_select_cd_port(amdgpu_connector); - - /* turn eDP panel on for mode set */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - amdgpu_atombios_encoder_set_edp_panel_power(connector, - ATOM_TRANSMITTER_ACTION_POWER_ON); - } - - /* this is needed for the pll/ss setup to work correctly in some cases */ - amdgpu_atombios_encoder_set_crtc_source(encoder); - /* set up the FMT blocks */ - dce_v11_0_program_fmt(encoder); -} - -static void dce_v11_0_encoder_commit(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - - /* need to call this here as we need the crtc set up */ - amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON); - amdgpu_atombios_scratch_regs_lock(adev, false); -} - -static void dce_v11_0_encoder_disable(struct drm_encoder *encoder) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig; - - amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); - - if (amdgpu_atombios_encoder_is_digital(encoder)) { - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) - dce_v11_0_afmt_enable(encoder, false); - dig = amdgpu_encoder->enc_priv; - dig->dig_encoder = -1; - } - amdgpu_encoder->active_device = 0; -} - -/* these are handled by the primary encoders */ -static void dce_v11_0_ext_prepare(struct drm_encoder *encoder) -{ - -} - -static void dce_v11_0_ext_commit(struct drm_encoder *encoder) -{ - -} - -static void -dce_v11_0_ext_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - -} - -static void dce_v11_0_ext_disable(struct drm_encoder *encoder) -{ - -} - -static void -dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode) -{ - -} - -static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = { - .dpms = dce_v11_0_ext_dpms, - .prepare = dce_v11_0_ext_prepare, - .mode_set = dce_v11_0_ext_mode_set, - .commit = dce_v11_0_ext_commit, - .disable = dce_v11_0_ext_disable, - /* no detect for TMDS/LVDS yet */ -}; - -static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = { - .dpms = amdgpu_atombios_encoder_dpms, - .mode_fixup = amdgpu_atombios_encoder_mode_fixup, - .prepare = dce_v11_0_encoder_prepare, - .mode_set = dce_v11_0_encoder_mode_set, - .commit = dce_v11_0_encoder_commit, - .disable = dce_v11_0_encoder_disable, - .detect = amdgpu_atombios_encoder_dig_detect, -}; - -static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = { - .dpms = amdgpu_atombios_encoder_dpms, - .mode_fixup = amdgpu_atombios_encoder_mode_fixup, - .prepare = dce_v11_0_encoder_prepare, - .mode_set = dce_v11_0_encoder_mode_set, - .commit = dce_v11_0_encoder_commit, - .detect = amdgpu_atombios_encoder_dac_detect, -}; - -static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder); - kfree(amdgpu_encoder->enc_priv); - drm_encoder_cleanup(encoder); - kfree(amdgpu_encoder); -} - -static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = { - .destroy = dce_v11_0_encoder_destroy, -}; - -static void dce_v11_0_encoder_add(struct amdgpu_device *adev, - uint32_t encoder_enum, - uint32_t supported_device, - u16 caps) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_encoder *encoder; - struct amdgpu_encoder *amdgpu_encoder; - - /* see if we already added it */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - amdgpu_encoder = to_amdgpu_encoder(encoder); - if (amdgpu_encoder->encoder_enum == encoder_enum) { - amdgpu_encoder->devices |= supported_device; - return; - } - - } - - /* add a new one */ - amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL); - if (!amdgpu_encoder) - return; - - encoder = &amdgpu_encoder->base; - switch (adev->mode_info.num_crtc) { - case 1: - encoder->possible_crtcs = 0x1; - break; - case 2: - default: - encoder->possible_crtcs = 0x3; - break; - case 3: - encoder->possible_crtcs = 0x7; - break; - case 4: - encoder->possible_crtcs = 0xf; - break; - case 5: - encoder->possible_crtcs = 0x1f; - break; - case 6: - encoder->possible_crtcs = 0x3f; - break; - } - - amdgpu_encoder->enc_priv = NULL; - - amdgpu_encoder->encoder_enum = encoder_enum; - amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - amdgpu_encoder->devices = supported_device; - amdgpu_encoder->rmx_type = RMX_OFF; - amdgpu_encoder->underscan_type = UNDERSCAN_OFF; - amdgpu_encoder->is_ext_encoder = false; - amdgpu_encoder->caps = caps; - - switch (amdgpu_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1: - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2: - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); - drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs); - break; - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: - if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - amdgpu_encoder->rmx_type = RMX_FULL; - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_LVDS, NULL); - amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder); - } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) { - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); - amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder); - } else { - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_TMDS, NULL); - amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder); - } - drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs); - break; - case ENCODER_OBJECT_ID_SI170B: - case ENCODER_OBJECT_ID_CH7303: - case ENCODER_OBJECT_ID_EXTERNAL_SDVOA: - case ENCODER_OBJECT_ID_EXTERNAL_SDVOB: - case ENCODER_OBJECT_ID_TITFP513: - case ENCODER_OBJECT_ID_VT1623: - case ENCODER_OBJECT_ID_HDMI_SI1930: - case ENCODER_OBJECT_ID_TRAVIS: - case ENCODER_OBJECT_ID_NUTMEG: - /* these are handled by the primary encoders */ - amdgpu_encoder->is_ext_encoder = true; - if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_LVDS, NULL); - else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); - else - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_TMDS, NULL); - drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs); - break; - } -} - -static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { - .bandwidth_update = &dce_v11_0_bandwidth_update, - .vblank_get_counter = &dce_v11_0_vblank_get_counter, - .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, - .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, - .hpd_sense = &dce_v11_0_hpd_sense, - .hpd_set_polarity = &dce_v11_0_hpd_set_polarity, - .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg, - .page_flip = &dce_v11_0_page_flip, - .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos, - .add_encoder = &dce_v11_0_encoder_add, - .add_connector = &amdgpu_connector_add, -}; - -static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev) -{ - adev->mode_info.funcs = &dce_v11_0_display_funcs; -} - -static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = { - .set = dce_v11_0_set_crtc_irq_state, - .process = dce_v11_0_crtc_irq, -}; - -static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = { - .set = dce_v11_0_set_pageflip_irq_state, - .process = dce_v11_0_pageflip_irq, -}; - -static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = { - .set = dce_v11_0_set_hpd_irq_state, - .process = dce_v11_0_hpd_irq, -}; - -static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev) -{ - if (adev->mode_info.num_crtc > 0) - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; - else - adev->crtc_irq.num_types = 0; - adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs; - - adev->pageflip_irq.num_types = adev->mode_info.num_crtc; - adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs; - - adev->hpd_irq.num_types = adev->mode_info.num_hpd; - adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs; -} - -const struct amdgpu_ip_block_version dce_v11_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_DCE, - .major = 11, - .minor = 0, - .rev = 0, - .funcs = &dce_v11_0_ip_funcs, -}; - -const struct amdgpu_ip_block_version dce_v11_2_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_DCE, - .major = 11, - .minor = 2, - .rev = 0, - .funcs = &dce_v11_0_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 264183ab24ec..8841d7213de4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4075,7 +4075,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned int index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; memset(&ib, 0, sizeof(ib)); @@ -4322,8 +4322,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; int ctx_reg_offset; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3d9c045a8a64..66c47c466532 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -603,7 +603,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; /* MES KIQ fw hasn't indirect buffer support for now */ @@ -850,8 +850,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; int ctx_reg_offset; @@ -1654,6 +1653,21 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.pfp_fw_version >= 102 && + adev->gfx.mec_fw_version >= 66 && + adev->mes.fw_version[0] >= 128) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 5dbc5dbc694a..710ec9c34e43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -497,7 +497,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; /* MES KIQ fw hasn't indirect buffer support for now */ @@ -685,8 +685,7 @@ static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0, clustercount = 0, i; const struct cs_section_def *sect = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 70d7a1f434c4..7693b7953426 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -86,7 +86,7 @@ MODULE_FIRMWARE("amdgpu/hainan_ce.bin"); MODULE_FIRMWARE("amdgpu/hainan_rlc.bin"); static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev); -static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); +static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer); //static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v6_0_init_pg(struct amdgpu_device *adev); @@ -2354,7 +2354,7 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; - volatile u32 *dst_ptr; + u32 *dst_ptr; u32 dws; u64 reg_list_mc_addr; const struct cs_section_def *cs_data; @@ -2855,8 +2855,7 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 2aa323dab34e..5976ed55d9db 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -883,7 +883,7 @@ static const u32 kalindi_rlc_save_restore_register_list[] = { }; static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); -static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); +static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer); static void gfx_v7_0_init_pg(struct amdgpu_device *adev); static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); @@ -3882,8 +3882,7 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 367449d8061b..0856ff65288c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1220,8 +1220,7 @@ out: return err; } -static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a6ff9a137a83..dd19a97436db 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1648,8 +1648,7 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 8ba66d4dfe86..77f9d5b9a556 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3560,6 +3560,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE; unsigned long flags; int r; @@ -3597,6 +3598,7 @@ pipe_reset: if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) return -EOPNOTSUPP; r = gfx_v9_4_3_reset_hw_pipe(ring); + reset_mode = AMDGPU_RESET_TYPE_PER_PIPE; dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, r ? "failed" : "successfully"); if (r) @@ -3619,10 +3621,20 @@ pipe_reset: r = amdgpu_ring_test_ring(kiq_ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) + goto pipe_reset; + dev_err(adev->dev, "fail to remap queue\n"); return r; } + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) { + r = amdgpu_ring_test_ring(ring); + if (r) + goto pipe_reset; + } + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 76d3c40735b0..f4a19357ccbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -337,7 +337,7 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, int vmid, i; if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready && - (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x81) { + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) { struct mes_inv_tlbs_pasid_input input = {0}; input.pasid = pasid; input.flush_type = flush_type; @@ -521,6 +521,7 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_NOALLOC; if (vm_flags & AMDGPU_VM_PAGE_PRT) { + *flags |= AMDGPU_PTE_PRT_GFX12; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_SYSTEM; *flags |= AMDGPU_PTE_IS_PTE; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8404695eb13f..0d1dd587db5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1834,11 +1834,19 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { + static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; + u32 vram_info; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + + if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) { + vram_info = RREG32(regBIF_BIOS_SCRATCH_4); + adev->gmc.vram_vendor = vram_info & 0xF; + } } static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 9e428e669ada..b5bb7f4d607c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -557,7 +557,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .no_user_fence = true, - .extra_dw = 64, + .extra_bytes = 256, .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, .set_wptr = jpeg_v1_0_decode_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 58239c405fda..27c76bd424cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -23,7 +23,6 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" -#include "amdgpu_cs.h" #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" @@ -806,7 +805,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -854,58 +853,3 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .rev = 0, .funcs = &jpeg_v2_0_ip_funcs, }; - -/** - * jpeg_v2_dec_ring_parse_cs - command submission parser - * - * @parser: Command submission parser context - * @job: the job to parse - * @ib: the IB to parse - * - * Parse the command stream, return -EINVAL for invalid packet, - * 0 otherwise - */ -int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - u32 i, reg, res, cond, type; - struct amdgpu_device *adev = parser->adev; - - for (i = 0; i < ib->length_dw ; i += 2) { - reg = CP_PACKETJ_GET_REG(ib->ptr[i]); - res = CP_PACKETJ_GET_RES(ib->ptr[i]); - cond = CP_PACKETJ_GET_COND(ib->ptr[i]); - type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); - - if (res) /* only support 0 at the moment */ - return -EINVAL; - - switch (type) { - case PACKETJ_TYPE0: - if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || - reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE3: - if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || - reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE6: - if (ib->ptr[i] == CP_PACKETJ_NOP) - continue; - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - default: - dev_err(adev->dev, "Unknown packet type %d !\n", type); - return -EINVAL; - } - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 63fadda7a673..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -45,9 +45,6 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 -#define JPEG_REG_RANGE_START 0x4000 -#define JPEG_REG_RANGE_END 0x41c2 - void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -60,9 +57,6 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); -int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib); extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 3e2c389242db..20983f126b49 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -696,7 +696,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -727,7 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a44eb2667664..d1a011c40ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -597,7 +597,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index da3ee69f1a3b..33db2c1ae6cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -762,7 +762,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index a78144773fab..aae7328973d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1177,7 +1177,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 5d86e1d846eb..54fd9c800c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -807,7 +807,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 34c70270ea1d..46bf15dce2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -683,7 +683,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index aee26f80bd53..2db9b2c63693 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -254,6 +254,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; struct drm_amdgpu_userq_in *mqd_user = args_in; struct amdgpu_mqd_prop *userq_props; + struct amdgpu_gfx_shadow_info shadow_info; int r; /* Structure to initialize MQD for userqueue using generic MQD init function */ @@ -263,13 +264,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, return -ENOMEM; } - if (!mqd_user->wptr_va || !mqd_user->rptr_va || - !mqd_user->queue_va || mqd_user->queue_size == 0) { - DRM_ERROR("Invalid MQD parameters for userqueue\n"); - r = -EINVAL; - goto free_props; - } - r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); if (r) { DRM_ERROR("Failed to create MQD object for userqueue\n"); @@ -286,6 +280,8 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->doorbell_index = queue->doorbell_index; userq_props->fence_address = queue->fence_drv->gpu_addr; + if (adev->gfx.funcs->get_gfx_shadow_info) + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true); if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; @@ -302,6 +298,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va, + max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE))) + goto free_mqd; + userq_props->eop_gpu_addr = compute_mqd->eop_va; userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; @@ -329,6 +329,11 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->csa_addr = mqd_gfx_v11->csa_va; userq_props->tmz_queue = mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + + if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va, + shadow_info.shadow_size)) + goto free_mqd; + kfree(mqd_gfx_v11); } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; @@ -346,6 +351,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va, + shadow_info.csa_size)) + goto free_mqd; + userq_props->csa_addr = mqd_sdma_v11->csa_va; kfree(mqd_sdma_v11); } @@ -395,10 +404,82 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); } +static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_suspend_gang_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + signed long timeout = 2100000; /* 2100 ms */ + u64 fence_gpu_addr; + u32 fence_offset; + u64 *fence_ptr; + int i, r; + + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) + return 0; + r = amdgpu_device_wb_get(adev, &fence_offset); + if (r) + return r; + + fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4); + fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; + *fence_ptr = 0; + + memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input)); + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.suspend_fence_addr = fence_gpu_addr; + queue_input.suspend_fence_value = 1; + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + DRM_ERROR("Failed to suspend gang: %d\n", r); + goto out; + } + + for (i = 0; i < timeout; i++) { + if (*fence_ptr == 1) + goto out; + udelay(1); + } + r = -ETIMEDOUT; + +out: + amdgpu_device_wb_free(adev, fence_offset); + return r; +} + +static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_resume_gang_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r; + + if (queue->state == AMDGPU_USERQ_STATE_HUNG) + return -EINVAL; + if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED) + return 0; + + memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input)); + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) + dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r); + return r; +} + const struct amdgpu_userq_funcs userq_mes_funcs = { .mqd_create = mes_userq_mqd_create, .mqd_destroy = mes_userq_mqd_destroy, .unmap = mes_userq_unmap, .map = mes_userq_map, .detect_and_reset = mes_userq_detect_and_reset, + .preempt = mes_userq_preempt, + .restore = mes_userq_restore, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 3b91ea601add..e82188431f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -713,6 +713,12 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + else + dev_info_once(mes->adev->dev, + "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); + if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 998893dff08e..aff06f06aeee 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -769,6 +769,11 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + else + dev_info_once(adev->dev, + "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 457972aa5632..e5282a5d05d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -202,6 +202,9 @@ send_request: case IDH_REQ_RAS_CPER_DUMP: event = IDH_RAS_CPER_DUMP_READY; break; + case IDH_REQ_RAS_CHK_CRITI: + event = IDH_REQ_RAS_CHK_CRITI_READY; + break; default: break; } @@ -556,6 +559,16 @@ static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev) return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES); } +static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr) +{ + uint32_t addr_hi, addr_lo; + + addr_hi = (uint32_t)(addr >> 32); + addr_lo = (uint32_t)(addr & 0xFFFFFFFF); + return xgpu_nv_send_access_requests_with_param( + adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -569,4 +582,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_ras_err_count = xgpu_nv_req_ras_err_count, .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, .req_bad_pages = xgpu_nv_req_ras_bad_pages, + .req_ras_chk_criti = xgpu_nv_check_vf_critical_region }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 5808689562cc..c1083e5e41e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -43,6 +43,7 @@ enum idh_request { IDH_REQ_RAS_ERROR_COUNT = 203, IDH_REQ_RAS_CPER_DUMP = 204, IDH_REQ_RAS_BAD_PAGES = 205, + IDH_REQ_RAS_CHK_CRITI = 206 }; enum idh_event { @@ -62,6 +63,7 @@ enum idh_event { IDH_RAS_BAD_PAGES_READY = 15, IDH_RAS_BAD_PAGES_NOTIFICATION = 16, IDH_UNRECOV_ERR_NOTIFICATION = 17, + IDH_REQ_RAS_CHK_CRITI_READY = 18, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index dd2d66090d23..68aef47254a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -743,7 +743,7 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev) adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) DRM_ERROR("Failed to register hw i2c, err: %d\n", res); @@ -752,9 +752,6 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev) void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev) { - struct i2c_adapter *control = adev->pm.ras_eeprom_i2c_bus; - - i2c_del_adapter(control); adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 1e89ba153d9d..a316797875a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -193,7 +193,7 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode; if (amdgpu_vcnfw_log) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; amdgpu_vcn_fwlog_init(adev->vcn.inst); @@ -230,11 +230,11 @@ static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) jpeg_v1_0_sw_fini(ip_block); - r = amdgpu_vcn_sw_fini(adev, 0); + amdgpu_vcn_sw_fini(adev, 0); kfree(adev->vcn.ip_dump); - return r; + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index b115137ab2d6..8897dcc9c1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -137,7 +137,7 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; int i, r; struct amdgpu_device *adev = ip_block->adev; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, @@ -252,7 +252,7 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r, idx; struct amdgpu_device *adev = ip_block->adev; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; if (drm_dev_enter(adev_to_drm(adev), &idx)) { fw_shared->present_flag_0 = 0; @@ -267,9 +267,9 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - r = amdgpu_vcn_sw_fini(adev, 0); + amdgpu_vcn_sw_fini(adev, 0); - return r; + return 0; } /** @@ -853,7 +853,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; int ret; @@ -1001,7 +1001,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1308,7 +1308,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 904b94bc8693..cebee453871c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -277,7 +277,7 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << j)) continue; @@ -420,7 +420,7 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) { int i, r, idx; struct amdgpu_device *adev = ip_block->adev; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { @@ -442,9 +442,7 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) r = amdgpu_vcn_suspend(adev, i); if (r) return r; - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; + amdgpu_vcn_sw_fini(adev, i); } return 0; @@ -1000,7 +998,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int ret; @@ -1157,7 +1155,7 @@ static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared = + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1669,7 +1667,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index f3085137ba08..d9cf8f0feeb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -191,7 +191,7 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -327,7 +327,7 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -349,9 +349,7 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; + amdgpu_vcn_sw_fini(adev, i); } return 0; @@ -1031,7 +1029,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int ret; @@ -1196,7 +1194,7 @@ static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int j, k, r; @@ -1717,7 +1715,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; @@ -1836,7 +1834,7 @@ static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring) static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index bc9dfe5ffea7..3ae666522d57 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -148,7 +148,7 @@ static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block) static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); @@ -278,7 +278,7 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -302,11 +302,8 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); return 0; } @@ -1000,7 +997,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; int ret; @@ -1140,7 +1137,7 @@ static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r; @@ -1357,8 +1354,8 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) struct mmsch_v4_0_cmd_end end = { {0} }; struct mmsch_v4_0_init_header header; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_fw_shared_rb_setup *rb_setup; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -1609,7 +1606,7 @@ static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int r = 0; @@ -1980,7 +1977,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), + .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 7b93a275ec4f..eacf4e93ba2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -212,7 +212,11 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + + /* There are no per-instance irq source IDs on 4.0.3, the IH + * packets use a separate field to differentiate instances. + */ + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0, AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); if (r) @@ -259,7 +263,7 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(&adev->ddev, &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; @@ -279,11 +283,8 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); return 0; } @@ -844,7 +845,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared = + struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; int vcn_inst, ret; @@ -1011,8 +1012,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) struct mmsch_v4_0_cmd_end end = { {0} }; struct mmsch_v4_0_3_init_header header; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_fw_shared_rb_setup *rb_setup; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -1186,7 +1187,7 @@ static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; int j, k, r, vcn_inst; uint32_t tmp; @@ -1396,7 +1397,7 @@ static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; int r = 0, vcn_inst; uint32_t tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 6dbf33b26ee2..b107ee80e472 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -149,7 +149,7 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -249,7 +249,7 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -270,9 +270,7 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; + amdgpu_vcn_sw_fini(adev, i); } return 0; @@ -912,7 +910,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; int ret; @@ -1049,7 +1047,7 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r; @@ -1268,7 +1266,7 @@ static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 536f06b81706..0202df5db1e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -129,7 +129,7 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -211,7 +211,7 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -232,11 +232,8 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); return 0; } @@ -695,7 +692,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; int ret; @@ -805,7 +802,7 @@ static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r; @@ -998,7 +995,7 @@ static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; int r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 4b01e35ad7ef..714350cabf2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -113,6 +113,25 @@ static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 12): + if ((adev->psp.sos.fw_version >= 0x00450025) && amdgpu_dpm_reset_vcn_is_supported(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + default: + break; + } + + return 0; +} + static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { struct amdgpu_vcn5_fw_shared *fw_shared; @@ -187,10 +206,6 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) vcn_v5_0_1_fw_shared_init(adev, i); } - /* TODO: Add queue reset mask when FW fully supports it */ - adev->vcn.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -226,7 +241,7 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; @@ -245,14 +260,28 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } - amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); + + return 0; +} + +static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i) +{ + struct amdgpu_ring *ring; + int vcn_inst; + + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); + return 0; } @@ -267,7 +296,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; - int i, r, vcn_inst; + int i, r; if (amdgpu_sriov_vf(adev)) { r = vcn_v5_0_1_start_sriov(adev); @@ -285,14 +314,8 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 11 * vcn_inst), - adev->vcn.inst[i].aid_id); + vcn_v5_0_1_hw_init_inst(adev, i); /* Re-init fw_shared, if required */ vcn_v5_0_1_fw_shared_init(adev, i); @@ -643,7 +666,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared = + struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; @@ -779,8 +802,8 @@ static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev) struct mmsch_v5_0_cmd_end end = { {0} }; struct mmsch_v5_0_init_header header; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; - volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_fw_shared_rb_setup *rb_setup; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -954,7 +977,7 @@ static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r, vcn_inst; @@ -1146,7 +1169,7 @@ static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; int r = 0, vcn_inst; @@ -1276,6 +1299,31 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r = 0; + int vcn_inst; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + vcn_inst = GET_INST(VCN, ring->me); + r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); + return r; + } + + vcn_v5_0_1_hw_init_inst(adev, ring->me); + vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram); + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1304,6 +1352,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v5_0_1_ring_reset, }; /** @@ -1507,7 +1556,7 @@ static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { .name = "vcn_v5_0_1", .early_init = vcn_v5_0_1_early_init, - .late_init = NULL, + .late_init = vcn_v5_0_1_late_init, .sw_init = vcn_v5_0_1_sw_init, .sw_fini = vcn_v5_0_1_sw_fini, .hw_init = vcn_v5_0_1_hw_init, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9b3510e53112..a611a7345125 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -67,7 +67,6 @@ #include "sdma_v2_4.h" #include "sdma_v3_0.h" #include "dce_v10_0.h" -#include "dce_v11_0.h" #include "iceland_ih.h" #include "tonga_ih.h" #include "cz_ih.h" @@ -2124,8 +2123,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif - else - amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); break; @@ -2142,8 +2139,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif - else - amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block); #if defined(CONFIG_DRM_AMD_ACP) @@ -2163,8 +2158,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif - else - amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); #if defined(CONFIG_DRM_AMD_ACP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 8535a52a62ca..0f0719528bcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -521,15 +521,10 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); } - minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL); - if (!minfo.cu_mask.ptr) - return -ENOMEM; - - retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size); - if (retval) { + minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size); + if (IS_ERR(minfo.cu_mask.ptr)) { pr_debug("Could not copy CU mask from userspace"); - retval = -EFAULT; - goto out; + return PTR_ERR(minfo.cu_mask.ptr); } mutex_lock(&p->mutex); @@ -538,7 +533,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, mutex_unlock(&p->mutex); -out: kfree(minfo.cu_mask.ptr); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7e749f9b6d69..e9cfb80bd436 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -495,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) mutex_init(&kfd->doorbell_mutex); ida_init(&kfd->doorbell_ida); + atomic_set(&kfd->kfd_processes_count, 0); return kfd; } @@ -1133,7 +1134,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) } for (i = 0; i < kfd->num_nodes; i++) { - node = kfd->nodes[i]; + /* Race if another thread in b/w + * kfd_cleanup_nodes and kfree(kfd), + * when kfd->nodes[i] = NULL + */ + if (kfd->nodes[i]) + node = kfd->nodes[i]; + else + return; + spin_lock_irqsave(&node->interrupt_lock, flags); if (node->interrupts_active @@ -1485,6 +1494,15 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd) mutex_lock(&kfd_processes_mutex); + /* kfd_processes_count is per kfd_dev, return -EBUSY without + * further check + */ + if (!!atomic_read(&kfd->kfd_processes_count)) { + pr_debug("process_wq_release not finished\n"); + r = -EBUSY; + goto out; + } + if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd)) goto out; @@ -1550,6 +1568,25 @@ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return ret; } +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.unhalt(node->dqm); + if (r) { + dev_err(kfd_device, "Error in starting scheduler\n"); + return r; + } + } + return 0; +} + int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; @@ -1567,6 +1604,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) return node->dqm->ops.halt(node->dqm); } +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.halt(node->dqm); + if (r) + return r; + } + return 0; +} + bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 86315ecb6f1d..59a5a3fea65d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -39,22 +39,22 @@ #endif #define dev_fmt(fmt) "kfd_migrate: " fmt -static uint64_t -svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) +static u64 +svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr) { return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); } static int -svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, - dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) +svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages, + dma_addr_t *addr, u64 *gart_addr, u64 flags) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; unsigned int num_dw, num_bytes; struct dma_fence *fence; - uint64_t src_addr, dst_addr; - uint64_t pte_flags; + u64 src_addr, dst_addr; + u64 pte_flags; void *cpu_addr; int r; @@ -123,15 +123,15 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, static int svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, - uint64_t *vram, uint64_t npages, + u64 *vram, u64 npages, enum MIGRATION_COPY_DIR direction, struct dma_fence **mfence) { - const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - uint64_t gart_s, gart_d; + u64 gart_s, gart_d; struct dma_fence *next; - uint64_t size; + u64 size; int r; mutex_lock(&adev->mman.gtt_window_lock); @@ -261,30 +261,42 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static long +static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) +{ + unsigned long mpages = 0; + unsigned long i; + + for (i = 0; i < migrate->npages; i++) { + if (migrate->dst[i] & MIGRATE_PFN_VALID && + migrate->src[i] & MIGRATE_PFN_MIGRATE) + mpages++; + } + return mpages; +} + +static int svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch, uint64_t ttm_res_offset) + dma_addr_t *scratch, u64 ttm_res_offset) { - uint64_t npages = migrate->npages; + u64 npages = migrate->npages; struct amdgpu_device *adev = node->adev; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; - long mpages; + u64 mpages = 0; dma_addr_t *src; - uint64_t *dst; - uint64_t i, j; + u64 *dst; + u64 i, j; int r; pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start, prange->last, ttm_res_offset); src = scratch; - dst = (uint64_t *)(scratch + npages); + dst = (u64 *)(scratch + npages); amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); - mpages = 0; for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) { struct page *spage; @@ -345,14 +357,13 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, out_free_vram_pages: if (r) { pr_debug("failed %d to copy memory to vram\n", r); - while (i-- && mpages) { + for (i = 0; i < npages && mpages; i++) { if (!dst[i]) continue; svm_migrate_put_vram_page(adev, dst[i]); migrate->dst[i] = 0; mpages--; } - mpages = r; } #ifdef DEBUG_FORCE_MIXED_DOMAINS @@ -370,22 +381,22 @@ out_free_vram_pages: } #endif - return mpages; + return r; } static long svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) + struct vm_area_struct *vma, u64 start, + u64 end, uint32_t trigger, u64 ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); - uint64_t npages = (end - start) >> PAGE_SHIFT; + u64 npages = (end - start) >> PAGE_SHIFT; struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; - long mpages = 0; + unsigned long mpages = 0; dma_addr_t *scratch; void *buf; int r = -ENOMEM; @@ -398,7 +409,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); buf = kvcalloc(npages, - 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t), GFP_KERNEL); if (!buf) goto out; @@ -431,17 +442,15 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, else pr_debug("0x%lx pages collected\n", cpages); - mpages = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); + r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); - if (mpages >= 0) - pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", - mpages, cpages, migrate.npages); - else - r = mpages; + mpages = svm_migrate_successful_pages(&migrate); + pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages); @@ -451,13 +460,14 @@ out_free: start >> PAGE_SHIFT, end >> PAGE_SHIFT, 0, node->id, trigger, r); out: - if (!r && mpages > 0) { + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_in, pdd->page_in + mpages); - } - return r ? r : mpages; + return mpages; + } + return r; } /** @@ -481,7 +491,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, { unsigned long addr, start, end; struct vm_area_struct *vma; - uint64_t ttm_res_offset; + u64 ttm_res_offset; struct kfd_node *node; unsigned long mpages = 0; long r = 0; @@ -568,18 +578,17 @@ static void svm_migrate_page_free(struct page *page) } } -static long +static int svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch, uint64_t npages) + dma_addr_t *scratch, u64 npages) { struct device *dev = adev->dev; - uint64_t *src; + u64 *src; dma_addr_t *dst; struct page *dpage; - long mpages; - uint64_t i = 0, j; - uint64_t addr; + u64 i = 0, j; + u64 addr; int r = 0; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, @@ -587,10 +596,9 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, addr = migrate->start; - src = (uint64_t *)(scratch + npages); + src = (u64 *)(scratch + npages); dst = scratch; - mpages = 0; for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) { struct page *spage; @@ -639,7 +647,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); - mpages++; j++; } @@ -649,17 +656,13 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, out_oom: if (r) { pr_debug("failed %d copy to ram\n", r); - while (i-- && mpages) { - if (!migrate->dst[i]) - continue; + while (i--) { svm_migrate_put_sys_page(dst[i]); migrate->dst[i] = 0; - mpages--; } - mpages = r; } - return mpages; + return r; } /** @@ -681,13 +684,13 @@ out_oom: */ static long svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, uint64_t end, + struct vm_area_struct *vma, u64 start, u64 end, uint32_t trigger, struct page *fault_page) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); - uint64_t npages = (end - start) >> PAGE_SHIFT; + u64 npages = (end - start) >> PAGE_SHIFT; unsigned long cpages = 0; - long mpages = 0; + unsigned long mpages = 0; struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; @@ -707,7 +710,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; buf = kvcalloc(npages, - 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t), GFP_KERNEL); if (!buf) goto out; @@ -741,15 +744,13 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, else pr_debug("0x%lx pages collected\n", cpages); - mpages = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, + r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, scratch, npages); migrate_vma_pages(&migrate); - if (mpages >= 0) - pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", + mpages = svm_migrate_successful_pages(&migrate); + pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", mpages, cpages, migrate.npages); - else - r = mpages; svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); @@ -762,7 +763,7 @@ out_free: start >> PAGE_SHIFT, end >> PAGE_SHIFT, node->id, 0, trigger, r); out: - if (!r && mpages > 0) { + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_out, pdd->page_out + mpages); @@ -846,8 +847,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, if (r >= 0) { WARN_ONCE(prange->vram_pages < mpages, - "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).", - prange->vram_pages, mpages); + "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).", + prange->vram_pages, mpages); prange->vram_pages -= mpages; /* prange does not have vram page set its actual_loc to system diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d01ef5ac0766..70ef051511bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -382,6 +382,8 @@ struct kfd_dev { /* for dynamic partitioning */ int kfd_dev_lock; + + atomic_t kfd_processes_count; }; enum kfd_mempool { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5be28c6c4f6a..ddfe30c13e9d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1088,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pdd->runtime_inuse = false; } + atomic_dec(&pdd->dev->kfd->kfd_processes_count); + kfree(pdd); p->pdds[i] = NULL; } @@ -1649,6 +1651,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, /* Init idr used for memory handle translation */ idr_init(&pdd->alloc_idr); + atomic_inc(&dev->kfd->kfd_processes_count); + return pdd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 68ba239b2e5d..9d72411c3379 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1738,7 +1738,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, WRITE_ONCE(p->svms.faulting_task, current); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, - readonly, owner, NULL, + readonly, owner, &hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) @@ -3045,6 +3045,8 @@ retry_write_locked: if (svms->checkpoint_ts[gpuidx] != 0) { if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + if (write_locked) + mmap_write_downgrade(mm); r = -EAGAIN; goto out_unlock_svms; } else { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 62defeccbb5c..0d03e324d5b9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -233,6 +233,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static int amdgpu_dm_connector_get_modes(struct drm_connector *connector); +static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state); static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, @@ -417,8 +418,7 @@ static inline bool update_planes_and_stream_adapter(struct dc *dc, /* * Previous frame finished and HW is ready for optimization. */ - if (update_type == UPDATE_TYPE_FAST) - dc_post_update_surfaces_to_stream(dc); + dc_post_update_surfaces_to_stream(dc); return dc_update_planes_and_stream(dc, array_of_surface_update, @@ -2000,6 +2000,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.disable_ips_in_vpb = 0; + /* DCN35 and above supports dynamic DTBCLK switch */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) + init_data.flags.allow_0_dtb_clk = true; + /* Enable DWB for tested platforms only */ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; @@ -2081,6 +2085,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); + adev->dm.restore_backlight = true; + adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n"); @@ -2945,7 +2951,7 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) return -ENOMEM; } - r = i2c_add_adapter(&oem_i2c->base); + r = devm_i2c_add_adapter(adev->dev, &oem_i2c->base); if (r) { drm_info(adev_to_drm(adev), "Failed to register oem i2c\n"); kfree(oem_i2c); @@ -2957,17 +2963,6 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) return 0; } -static void dm_oem_i2c_hw_fini(struct amdgpu_device *adev) -{ - struct amdgpu_display_manager *dm = &adev->dm; - - if (dm->oem_i2c) { - i2c_del_adapter(&dm->oem_i2c->base); - kfree(dm->oem_i2c); - dm->oem_i2c = NULL; - } -} - /** * dm_hw_init() - Initialize DC device * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. @@ -3018,8 +3013,6 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - dm_oem_i2c_hw_fini(adev); - amdgpu_dm_hpd_fini(adev); amdgpu_dm_irq_fini(adev); @@ -3047,14 +3040,20 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n", enable ? "enable" : "disable"); - if (enable) { - if (amdgpu_dm_crtc_vrr_active(to_dm_crtc_state(acrtc->base.state))) - rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, true); - } else - rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false); - - if (rc) - drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); + if (dc_supports_vrr(adev->dm.dc->ctx->dce_version)) { + if (enable) { + if (amdgpu_dm_crtc_vrr_active( + to_dm_crtc_state(acrtc->base.state))) + rc = amdgpu_dm_crtc_set_vupdate_irq( + &acrtc->base, true); + } else + rc = amdgpu_dm_crtc_set_vupdate_irq( + &acrtc->base, false); + + if (rc) + drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", + enable ? "en" : "dis"); + } irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; /* During gpu-reset we disable and then enable vblank irq, so @@ -3443,6 +3442,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); + adev->dm.restore_backlight = true; amdgpu_dm_irq_resume_early(adev); @@ -3641,7 +3641,7 @@ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { .atomic_commit_tail = amdgpu_dm_atomic_commit_tail, - .atomic_commit_setup = drm_dp_mst_atomic_setup_commit, + .atomic_commit_setup = amdgpu_dm_atomic_setup_commit, }; static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) @@ -4833,6 +4833,16 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap if (!caps->data_points) return; + /* + * Handle the case where brightness is below the first data point + * Interpolate between (0,0) and (first_signal, first_lum) + */ + if (brightness < caps->luminance_data[0].input_signal) { + lum = DIV_ROUND_CLOSEST(caps->luminance_data[0].luminance * brightness, + caps->luminance_data[0].input_signal); + goto scale; + } + left = 0; right = caps->data_points - 1; while (left <= right) { @@ -6427,6 +6437,10 @@ static void fill_stream_properties_from_drm_display_mode( && aconnector && aconnector->force_yuv420_output) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; + else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR422) + && aconnector + && aconnector->force_yuv422_output) + timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR422; else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444) && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444; @@ -7384,10 +7398,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux); drm_connector_unregister(connector); drm_connector_cleanup(connector); - if (aconnector->i2c) { - i2c_del_adapter(&aconnector->i2c->base); - kfree(aconnector->i2c); - } kfree(aconnector->dm_dp_aux.aux.name); kfree(connector); @@ -7687,6 +7697,7 @@ create_validate_stream_for_sink(struct drm_connector *connector, bpc_limit = 8; do { + drm_dbg_kms(connector->dev, "Trying with %d bpc\n", requested_bpc); stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, requested_bpc); @@ -7722,16 +7733,41 @@ create_validate_stream_for_sink(struct drm_connector *connector, } while (stream == NULL && requested_bpc >= bpc_limit); - if ((dc_result == DC_FAIL_ENC_VALIDATE || - dc_result == DC_EXCEED_DONGLE_CAP) && - !aconnector->force_yuv420_output) { - DRM_DEBUG_KMS("%s:%d Retry forcing yuv420 encoding\n", - __func__, __LINE__); - - aconnector->force_yuv420_output = true; + switch (dc_result) { + /* + * If we failed to validate DP bandwidth stream with the requested RGB color depth, + * we try to fallback and configure in order: + * YUV422 (8bpc, 6bpc) + * YUV420 (8bpc, 6bpc) + */ + case DC_FAIL_ENC_VALIDATE: + case DC_EXCEED_DONGLE_CAP: + case DC_NO_DP_LINK_BANDWIDTH: + /* recursively entered twice and already tried both YUV422 and YUV420 */ + if (aconnector->force_yuv422_output && aconnector->force_yuv420_output) + break; + /* first failure; try YUV422 */ + if (!aconnector->force_yuv422_output) { + drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV422\n", + __func__, __LINE__, dc_result); + aconnector->force_yuv422_output = true; + /* recursively entered and YUV422 failed, try YUV420 */ + } else if (!aconnector->force_yuv420_output) { + drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV420\n", + __func__, __LINE__, dc_result); + aconnector->force_yuv420_output = true; + } stream = create_validate_stream_for_sink(connector, drm_mode, - dm_state, old_stream); + dm_state, old_stream); + aconnector->force_yuv422_output = false; aconnector->force_yuv420_output = false; + break; + case DC_OK: + break; + default: + drm_dbg_kms(connector->dev, "%s:%d Unhandled validation failure %d\n", + __func__, __LINE__, dc_result); + break; } return stream; @@ -8239,6 +8275,10 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, {"1920x1200", 1920, 1200} }; + if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) && + (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) + return; + n = ARRAY_SIZE(common_modes); for (i = 0; i < n; i++) { @@ -8719,7 +8759,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, } aconnector->i2c = i2c; - res = i2c_add_adapter(&i2c->base); + res = devm_i2c_add_adapter(dm->adev->dev, &i2c->base); if (res) { drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index); @@ -8817,7 +8857,16 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dm_crtc_state *acrtc_state) -{ +{ /* + * We cannot be sure that the frontend index maps to the same + * backend index - some even map to more than one. + * So we have to go through the CRTC to find the right IRQ. + */ + int irq_type = amdgpu_display_crtc_idx_to_irq_type( + adev, + acrtc->crtc_id); + struct drm_device *dev = adev_to_drm(adev); + struct drm_vblank_crtc_config config = {0}; struct dc_crtc_timing *timing; int offdelay; @@ -8870,7 +8919,35 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, drm_crtc_vblank_on_config(&acrtc->base, &config); + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): + if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n"); +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n"); +#endif + } + } else { + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n"); +#endif + if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n"); + } + drm_crtc_vblank_off(&acrtc->base); } } @@ -9892,7 +9969,6 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; - bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -10012,7 +10088,6 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; - set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -10069,13 +10144,16 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, * to fix a flicker issue. * It will cause the dm->actual_brightness is not the current panel brightness * level. (the dm->brightness is the correct panel level) - * So we set the backlight level with dm->brightness value after set mode + * So we set the backlight level with dm->brightness value after initial + * set mode. Use restore_backlight flag to avoid setting backlight level + * for every subsequent mode set. */ - if (set_backlight_level) { + if (dm->restore_backlight) { for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i]) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } + dm->restore_backlight = false; } } @@ -10293,6 +10371,39 @@ static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state) } } +static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + int i, ret; + + ret = drm_dp_mst_atomic_setup_commit(state); + if (ret) + return ret; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + /* + * Color management settings. We also update color properties + * when a modeset is needed, to ensure it gets reprogrammed. + */ + if (dm_new_crtc_state->base.active && dm_new_crtc_state->stream && + (dm_new_crtc_state->base.color_mgmt_changed || + dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || + drm_atomic_crtc_needs_modeset(new_crtc_state))) { + ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); + if (ret) { + drm_dbg_atomic(state->dev, "Failed to update color state\n"); + return ret; + } + } + } + + return 0; +} + /** * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. * @state: The atomic state to commit @@ -10788,6 +10899,8 @@ static void get_freesync_config_for_crtc( } else { config.state = VRR_STATE_INACTIVE; } + } else { + config.state = VRR_STATE_UNSUPPORTED; } out: new_crtc_state->freesync_config = config; @@ -11105,7 +11218,7 @@ skip_modeset: if (dm_new_crtc_state->base.color_mgmt_changed || dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || drm_atomic_crtc_needs_modeset(new_crtc_state)) { - ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); + ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true); if (ret) goto fail; } @@ -12689,7 +12802,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, dm_con_state = to_dm_connector_state(connector->state); - if (!adev->dm.freesync_module) + if (!adev->dm.freesync_module || !dc_supports_vrr(sink->ctx->dce_version)) goto update; edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw() diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 159f8ded0439..009f206226f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -631,6 +631,13 @@ struct amdgpu_display_manager { u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; /** + * @restore_backlight: + * + * Flag to indicate whether to restore backlight after modeset. + */ + bool restore_backlight; + + /** * @aux_hpd_discon_quirk: * * quirk for hpd discon while aux is on-going. @@ -799,6 +806,7 @@ struct amdgpu_dm_connector { bool fake_enable; bool force_yuv420_output; + bool force_yuv422_output; struct dsc_preferred_settings dsc_settings; union dp_downstream_port_present mst_downstream_port_present; /* Cached display modes */ @@ -1046,6 +1054,8 @@ void amdgpu_dm_init_color_mod(void); int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc, + bool check_only); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index c7387af725d6..a4ac6d442278 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -566,12 +566,11 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, +static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf, const struct drm_color_lut *regamma_lut, uint32_t regamma_size, bool has_rom, enum dc_transfer_func_predefined tf) { - struct dc_transfer_func *out_tf = &stream->out_transfer_func; int ret = 0; if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { @@ -821,7 +820,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); const struct drm_color_lut *shaper = NULL, *lut3d = NULL; uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE; - bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; /* shaper LUT is only available if 3D LUT color caps */ exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; @@ -885,33 +884,33 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) } /** - * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC. * @crtc: amdgpu_dm crtc state + * @check_only: only check color state without update dc stream * - * With no plane level color management properties we're free to use any - * of the HW blocks as long as the CRTC CTM always comes before the - * CRTC RGM and after the CRTC DGM. - * - * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. - * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. - * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * This function just verifies CRTC LUT sizes, if there is enough space for + * output transfer function and if its parameters can be calculated by AMD + * color module. It also adjusts some settings for programming CRTC degamma at + * plane stage, using plane DGM block. * * The RGM block is typically more fully featured and accurate across * all ASICs - DCE can't support a custom non-linear CRTC DGM. * * For supporting both plane level color management and CRTC level color - * management at once we have to either restrict the usage of CRTC properties - * or blend adjustments together. + * management at once we have to either restrict the usage of some CRTC + * properties or blend adjustments together. * * Returns: - * 0 on success. Error code if setup fails. + * 0 on success. Error code if validation fails. */ -int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) + +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc, + bool check_only) { struct dc_stream_state *stream = crtc->stream; struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); bool has_rom = adev->asic_type <= CHIP_RAVEN; - struct drm_color_ctm *ctm = NULL; + struct dc_transfer_func *out_tf; const struct drm_color_lut *degamma_lut, *regamma_lut; uint32_t degamma_size, regamma_size; bool has_regamma, has_degamma; @@ -940,6 +939,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) crtc->cm_has_degamma = false; crtc->cm_is_degamma_srgb = false; + if (check_only) { + out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL); + if (!out_tf) + return -ENOMEM; + } else { + out_tf = &stream->out_transfer_func; + } + /* Setup regamma and degamma. */ if (is_legacy) { /* @@ -954,8 +961,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * inverse color ramp in legacy userspace. */ crtc->cm_is_degamma_srgb = true; - stream->out_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB; + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + out_tf->tf = TRANSFER_FUNCTION_SRGB; /* * Note: although we pass has_rom as parameter here, we never * actually use ROM because the color module only takes the ROM @@ -963,16 +970,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * * See more in mod_color_calculate_regamma_params() */ - r = __set_legacy_tf(&stream->out_transfer_func, regamma_lut, + r = __set_legacy_tf(out_tf, regamma_lut, regamma_size, has_rom); - if (r) - return r; } else { regamma_size = has_regamma ? regamma_size : 0; - r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, + r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut, regamma_size, has_rom, tf); - if (r) - return r; } /* @@ -981,6 +984,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * have to place the CTM in the OCSC in that case. */ crtc->cm_has_degamma = has_degamma; + if (check_only) + kvfree(out_tf); + + return r; +} + +/** + * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * @crtc: amdgpu_dm crtc state + * + * With no plane level color management properties we're free to use any + * of the HW blocks as long as the CRTC CTM always comes before the + * CRTC RGM and after the CRTC DGM. + * + * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. + * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. + * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * + * The RGM block is typically more fully featured and accurate across + * all ASICs - DCE can't support a custom non-linear CRTC DGM. + * + * For supporting both plane level color management and CRTC level color + * management at once we have to either restrict the usage of CRTC properties + * or blend adjustments together. + * + * Returns: + * 0 on success. Error code if setup fails. + */ +int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) +{ + struct dc_stream_state *stream = crtc->stream; + struct drm_color_ctm *ctm = NULL; + int ret; + + ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false); + if (ret) + return ret; /* Setup CRTC CTM. */ if (crtc->base.ctm) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 45feb404b097..1ec9d03ad747 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -218,8 +218,10 @@ static void amdgpu_dm_idle_worker(struct work_struct *work) break; } - if (idle_work->enable) + if (idle_work->enable) { + dc_post_update_surfaces_to_stream(idle_work->dm->dc); dc_allow_idle_optimizations(idle_work->dm->dc, true); + } mutex_unlock(&idle_work->dm->dc_lock); } idle_work->dm->idle_workqueue->running = false; @@ -273,8 +275,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->acrtc->dm_irq_params.allow_sr_entry); } - if (dm->active_vblank_irq_count == 0) + if (dm->active_vblank_irq_count == 0) { + dc_post_update_surfaces_to_stream(dm->dc); dc_allow_idle_optimizations(dm->dc, true); + } mutex_unlock(&dm->dc_lock); @@ -317,13 +321,17 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) dc->config.disable_ips != DMUB_IPS_DISABLE_ALL && sr_supported && vblank->config.disable_immediate) drm_crtc_vblank_restore(crtc); + } - /* vblank irq on -> Only need vupdate irq in vrr mode */ - if (amdgpu_dm_crtc_vrr_active(acrtc_state)) - rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); - } else { - /* vblank irq off -> vupdate irq off */ - rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false); + if (dc_supports_vrr(dm->dc->ctx->dce_version)) { + if (enable) { + /* vblank irq on -> Only need vupdate irq in vrr mode */ + if (amdgpu_dm_crtc_vrr_active(acrtc_state)) + rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); + } else { + /* vblank irq off -> vupdate irq off */ + rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false); + } } if (rc) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 58e084f52526..19038f336155 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -768,14 +768,18 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs; config->psp.handle = &adev->psp; - if (dc->ctx->dce_version == DCN_VERSION_3_1 || + if (dc->ctx->dce_version == DCN_VERSION_3_1 || dc->ctx->dce_version == DCN_VERSION_3_14 || dc->ctx->dce_version == DCN_VERSION_3_15 || - dc->ctx->dce_version == DCN_VERSION_3_5 || + dc->ctx->dce_version == DCN_VERSION_3_16 || + dc->ctx->dce_version == DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_21 || + dc->ctx->dce_version == DCN_VERSION_3_5 || dc->ctx->dce_version == DCN_VERSION_3_51 || - dc->ctx->dce_version == DCN_VERSION_3_6 || - dc->ctx->dce_version == DCN_VERSION_3_16) + dc->ctx->dce_version == DCN_VERSION_3_6 || + dc->ctx->dce_version == DCN_VERSION_4_01) config->psp.caps.dtm_v3_supported = 1; + config->ddc.handle = dc_get_link_at_index(dc, i); ddc_funcs->write_i2c = lp_write_i2c; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index eef51652ca35..e027798ece03 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -146,7 +146,7 @@ static void amdgpu_dm_plane_add_modifier(uint64_t **mods, uint64_t *size, uint64 if (*cap - *size < 1) { uint64_t new_cap = *cap * 2; - uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); + uint64_t *new_mods = kmalloc_array(new_cap, sizeof(uint64_t), GFP_KERNEL); if (!new_mods) { kfree(*mods); @@ -732,7 +732,7 @@ static int amdgpu_dm_plane_get_plane_modifiers(struct amdgpu_device *adev, unsig if (adev->family < AMDGPU_FAMILY_AI) return 0; - *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); + *mods = kmalloc_array(capacity, sizeof(uint64_t), GFP_KERNEL); if (plane_type == DRM_PLANE_TYPE_CURSOR) { amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); @@ -1633,7 +1633,7 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, drm_object_attach_property(&plane->base, dm->adev->mode_info.plane_ctm_property, 0); - if (dpp_color_caps.hw_3d_lut) { + if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) { drm_object_attach_property(&plane->base, mode_info.plane_shaper_lut_property, 0); drm_object_attach_property(&plane->base, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index e5771f490f2e..11b2ea6edf95 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -98,6 +98,7 @@ bool dm_pp_apply_display_requirements( const struct dm_pp_single_disp_config *dc_cfg = &pp_display_cfg->disp_configs[i]; adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1; + adev->pm.pm_display_cfg.displays[i].pixel_clock = dc_cfg->pixel_clock; } amdgpu_dpm_display_configuration_change(adev, &adev->pm.pm_display_cfg); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c index 82ea3fe5e764..80704d709e44 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c @@ -31,7 +31,7 @@ #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" #include "dmub/inc/dmub_cmd.h" -#include "dc/inc/link.h" +#include "dc/inc/link_service.h" /* * amdgpu_dm_link_supports_replay() - check if the link supports replay diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 132de4071efd..8550d5e8b753 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -53,11 +53,11 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc func_name, line); } -void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx) +void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx) { } -void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx) +void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx) { } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 4071851f9e86..15cf13ec5302 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -28,7 +28,7 @@ #include "dccg.h" #include "clk_mgr_internal.h" #include "dc_state_priv.h" -#include "link.h" +#include "link_service.h" #include "dce100/dce_clk_mgr.h" #include "dce110/dce110_clk_mgr.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index dbd6ef1b60a0..6131ede2db7a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -463,6 +463,9 @@ void dce_clk_mgr_construct( clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID; + base->clks.max_supported_dispclk_khz = + clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + dce_clock_read_integrated_info(clk_mgr); dce_clock_read_ss_info(clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 13cf415e38e5..d50b9440210e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -164,7 +164,7 @@ void dce110_fill_display_configs( stream->link->cur_link_settings.link_rate; cfg->link_settings.link_spread = stream->link->cur_link_settings.link_spread; - cfg->sym_clock = stream->phy_pix_clk; + cfg->pixel_clock = stream->phy_pix_clk; /* Round v_refresh*/ cfg->v_refresh = stream->timing.pix_clk_100hz * 100; cfg->v_refresh /= stream->timing.h_total; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index a39641a0ff09..69dd80d9f738 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -147,6 +147,8 @@ void dce60_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_internal *clk_mgr) { + struct clk_mgr *base = &clk_mgr->base; + dce_clk_mgr_construct(ctx, clk_mgr); memcpy(clk_mgr->max_clks_by_state, @@ -157,5 +159,8 @@ void dce60_clk_mgr_construct( clk_mgr->clk_mgr_shift = &disp_clk_shift; clk_mgr->clk_mgr_mask = &disp_clk_mask; clk_mgr->base.funcs = &dce60_funcs; + + base->clks.max_supported_dispclk_khz = + clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c index 3253115a153d..827bc2431d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c @@ -69,7 +69,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un /* handle DALSMC_Result_CmdRejectedBusy? */ - TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); return reg; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index bc123f1884da..051052bd10c9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -47,7 +47,7 @@ #include "dcn30/dcn30_clk_mgr.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" #include "logger_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 91d872d6d392..9e63fa72101c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -48,7 +48,7 @@ #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" #include "dcn314_smu.h" @@ -77,6 +77,7 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger + #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 @@ -87,8 +88,70 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define regCLK1_CLK0_DFS_CNTL 0x0269 +#define regCLK1_CLK0_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_DFS_CNTL 0x026c +#define regCLK1_CLK1_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK2_DFS_CNTL 0x026f +#define regCLK1_CLK2_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_DFS_CNTL 0x0272 +#define regCLK1_CLK3_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_DFS_CNTL 0x0275 +#define regCLK1_CLK4_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_DFS_CNTL 0x0278 +#define regCLK1_CLK5_DFS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_CURRENT_CNT 0x02fb +#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK1_CURRENT_CNT 0x02fc +#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK2_CURRENT_CNT 0x02fd +#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK3_CURRENT_CNT 0x02fe +#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK4_CURRENT_CNT 0x02ff +#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK5_CURRENT_CNT 0x0300 +#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0 + +#define regCLK1_CLK0_BYPASS_CNTL 0x028a +#define regCLK1_CLK0_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_BYPASS_CNTL 0x0293 +#define regCLK1_CLK1_BYPASS_CNTL_BASE_IDX 0 #define regCLK1_CLK2_BYPASS_CNTL 0x029c #define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_BYPASS_CNTL 0x02a5 +#define regCLK1_CLK3_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_BYPASS_CNTL 0x02ae +#define regCLK1_CLK4_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_BYPASS_CNTL 0x02b7 +#define regCLK1_CLK5_BYPASS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_DS_CNTL 0x0283 +#define regCLK1_CLK0_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_DS_CNTL 0x028c +#define regCLK1_CLK1_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK2_DS_CNTL 0x0295 +#define regCLK1_CLK2_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_DS_CNTL 0x029e +#define regCLK1_CLK3_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_DS_CNTL 0x02a7 +#define regCLK1_CLK4_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_DS_CNTL 0x02b0 +#define regCLK1_CLK5_DS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_ALLOW_DS 0x0284 +#define regCLK1_CLK0_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK1_ALLOW_DS 0x028d +#define regCLK1_CLK1_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK2_ALLOW_DS 0x0296 +#define regCLK1_CLK2_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK3_ALLOW_DS 0x029f +#define regCLK1_CLK3_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK4_ALLOW_DS 0x02a8 +#define regCLK1_CLK4_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK5_ALLOW_DS 0x02b1 +#define regCLK1_CLK5_ALLOW_DS_BASE_IDX 0 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 @@ -185,6 +248,8 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr_int); + struct clk_log_info log_info = {0}; memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate @@ -200,6 +265,9 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr) dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + + dcn314_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn314->base.base, &log_info); + clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000; } void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, @@ -218,6 +286,8 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->work_arounds.skip_clock_update) return; + display_count = dcn314_get_active_display_cnt_wa(dc, context); + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -236,7 +306,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } /* check that we're not already in lower */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn314_get_active_display_cnt_wa(dc, context); /* if we can go lower, go lower */ if (display_count == 0) { union display_idle_optimization_u idle_info = { 0 }; @@ -293,11 +362,19 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn314_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; @@ -385,10 +462,65 @@ bool dcn314_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + +static void dcn314_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) { - return; + + struct dcn35_clk_internal internal = {0}; + + dcn314_dump_clk_registers_internal(&internal, clk_mgr_base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + } static struct clk_bw_params dcn314_bw_params = { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index 002c28e80720..0577eb527bc3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -65,4 +65,9 @@ void dcn314_clk_mgr_construct(struct dc_context *ctx, void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); + +void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info); + + #endif //__DCN314_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index e4d22f74f986..b315ed91e010 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -46,7 +46,7 @@ #define DC_LOGGER \ clk_mgr->base.base.ctx->logger -#include "link.h" +#include "link_service.h" #define TO_CLK_MGR_DCN315(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn315, base) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 49efea0c8fcf..1769b1f26e75 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -39,7 +39,7 @@ #include "dcn316_smu.h" #include "dm_helpers.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" // DCN316 this is CLK1 instance #define MAX_INSTANCE 7 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 8376e2b0e73d..7da7b41bd092 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -33,7 +33,7 @@ #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #include "atomfirmware.h" #include "dcn32_smu13_driver_if.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c index cf2d35363e8b..5d80fdf63ffc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c @@ -63,7 +63,8 @@ static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un udelay(delay_us); } while (max_retries--); - TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + return reg; } @@ -120,7 +121,7 @@ static uint32_t dcn32_smu_wait_for_response_delay(struct clk_mgr_internal *clk_m *total_delay_us += delay_us; } while (max_retries--); - TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, *total_delay_us, clk_mgr->base.ctx); return reg; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index bb1ac12a2b09..b11383fba35f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -44,7 +44,7 @@ #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" #include "logger_types.h" #undef DC_LOGGER @@ -587,9 +587,118 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, +static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr_dcn35 *clk_mgr) { + struct dcn35_clk_internal internal = {0}; + char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"}; + + dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + + if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { + DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n"); + + DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n", + regs_and_bypass->dcfclk, + regs_and_bypass->dcf_deep_sleep_divider, + regs_and_bypass->dcf_deep_sleep_allow, + bypass_clks[(int) regs_and_bypass->dcfclk_bypass]); + + DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dprefclk, + bypass_clks[(int) regs_and_bypass->dprefclk_bypass]); + + DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dispclk, + bypass_clks[(int) regs_and_bypass->dispclk_bypass]); + + // REGISTER VALUES + DC_LOG_SMU("reg_name,value,clk_type"); + + DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk", + internal.CLK1_CLK3_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk", + internal.CLK1_CLK4_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider", + internal.CLK1_CLK3_DS_CNTL); + + DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow", + internal.CLK1_CLK3_ALLOW_DS); + + DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk", + internal.CLK1_CLK2_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk", + internal.CLK1_CLK0_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk", + internal.CLK1_CLK1_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass", + internal.CLK1_CLK3_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass", + internal.CLK1_CLK2_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass", + internal.CLK1_CLK0_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass", + internal.CLK1_CLK1_BYPASS_CNTL); + + } } static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) @@ -623,6 +732,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int); init_clk_states(clk_mgr); @@ -633,6 +743,13 @@ void dcn35_init_clocks(struct clk_mgr *clk_mgr) else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35); + + clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10; + if (clk_mgr->boot_snapshot.dtbclk > 59000) { + /*dtbclk enabled based on */ + clk_mgr->clks.dtbclk_en = true; + } } static struct clk_bw_params dcn35_bw_params = { .vram_type = Ddr4MemType, @@ -1323,7 +1440,7 @@ void dcn35_clk_mgr_construct( dcn35_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ - dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); + dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 47ff4c965d76..306016c1f109 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -13,7 +13,7 @@ #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #include "atomfirmware.h" @@ -162,7 +162,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c unsigned int i; char *entry_i = (char *)entry_0; - uint32_t ret = dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); + uint32_t ret = dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); if (ret & (1 << 31)) /* fine-grained, only min and max */ @@ -174,7 +174,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c /* if the initial message failed, num_levels will be 0 */ for (i = 0; i < *num_levels && i < ARRAY_SIZE(clk_mgr->base.bw_params->clk_table.entries); i++) { - *((unsigned int *)entry_i) = (dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); + *((unsigned int *)entry_i) = (dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]); } } @@ -231,20 +231,20 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) clk_mgr->smu_present = false; clk_mgr->dpm_present = false; - if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) + if (!clk_mgr_base->force_smu_not_present && dcn401_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) clk_mgr->smu_present = true; if (!clk_mgr->smu_present) return; - dcn30_smu_check_driver_if_version(clk_mgr); - dcn30_smu_check_msg_header_version(clk_mgr); + dcn401_smu_check_driver_if_version(clk_mgr); + dcn401_smu_check_msg_header_version(clk_mgr); /* DCFCLK */ dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz, &num_entries_per_clk->num_dcfclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); + clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); if (num_entries_per_clk->num_dcfclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dcfclk_levels - 1].dcfclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = 0; @@ -253,7 +253,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK, &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, &num_entries_per_clk->num_socclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); + clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); if (num_entries_per_clk->num_socclk_levels && clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_socclk_levels - 1].socclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = 0; @@ -263,7 +263,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_entries_per_clk->num_dtbclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); + clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); if (num_entries_per_clk->num_dtbclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dtbclk_levels - 1].dtbclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = 0; @@ -273,7 +273,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz, &num_entries_per_clk->num_dispclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); + clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); if (num_entries_per_clk->num_dispclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dispclk_levels - 1].dispclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 0; @@ -1318,8 +1318,8 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) table->Watermarks.WatermarkRow[i].WmSetting = i; table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type; } - dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); - dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); + dcn401_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); + dcn401_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr); } @@ -1390,7 +1390,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; } - clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); if (num_entries_per_clk->num_memclk_levels && clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = 0; @@ -1399,7 +1399,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_FCLK, &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz, &num_entries_per_clk->num_fclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); + clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); if (num_entries_per_clk->num_fclk_levels && clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index 21c35528f61f..3a263840893e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -57,6 +57,8 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin /* Wait for response register to be ready */ dcn401_smu_wait_for_response(clk_mgr, 10, 200000); + TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx); + /* Clear response register */ REG_WRITE(DAL_RESP_REG, 0); @@ -71,9 +73,11 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin if (param_out) *param_out = REG_READ(DAL_ARG_REG); + TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx); return true; } + TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx); return false; } @@ -102,8 +106,6 @@ static uint32_t dcn401_smu_wait_for_response_delay(struct clk_mgr_internal *clk_ *total_delay_us += delay_us; } while (max_retries--); - TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx); - return reg; } @@ -115,6 +117,8 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg /* Wait for response register to be ready */ dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us); + TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx); + /* Clear response register */ REG_WRITE(DAL_RESP_REG, 0); @@ -124,18 +128,71 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg /* Trigger the message transaction by writing the message ID */ REG_WRITE(DAL_MSG_REG, msg_id); - TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx); - /* Wait for response */ if (dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) { if (param_out) *param_out = REG_READ(DAL_ARG_REG); *total_delay_us = delay1_us + delay2_us; + TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx); return true; } *total_delay_us = delay1_us + 2000000; + TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx); + return false; +} + +bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version) +{ + smu_print("SMU Get SMU version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetSmuVersion, 0, version)) { + + smu_print("SMU version: %d\n", *version); + + return true; + } + + return false; +} + +/* Message output should match SMU11_DRIVER_IF_VERSION in smu11_driver_if.h */ +bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr) +{ + uint32_t response = 0; + + smu_print("SMU Check driver if version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDriverIfVersion, 0, &response)) { + + smu_print("SMU driver if version: %d\n", response); + + if (response == SMU14_DRIVER_IF_VERSION) + return true; + } + + return false; +} + +/* Message output should match DALSMC_VERSION in dalsmc.h */ +bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr) +{ + uint32_t response = 0; + + smu_print("SMU Check msg header version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) { + + smu_print("SMU msg header version: %d\n", response); + + if (response == DALSMC_VERSION) + return true; + } + return false; } @@ -163,6 +220,22 @@ void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsi smu_print("Numways for SubVP : %d\n", num_ways); } +void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high) +{ + smu_print("SMU Set DRAM addr high: %d\n", addr_high); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL); +} + +void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low) +{ + smu_print("SMU Set DRAM addr low: %d\n", addr_low); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL); +} + void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) { smu_print("SMU Transfer WM table DRAM 2 SMU\n"); @@ -348,3 +421,52 @@ unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr return response; } + +/* + * Frequency in MHz returned in lower 16 bits for valid DPM level + * + * Call with dpm_level = 0xFF to query features, return value will be: + * Bits 7:0 - number of DPM levels + * Bit 28 - 1 = auto DPM on + * Bit 29 - 1 = sweep DPM on + * Bit 30 - 1 = forced DPM on + * Bit 31 - 0 = discrete, 1 = fine-grained + * + * With fine-grained DPM, only min and max frequencies will be reported + * + * Returns 0 on failure + */ +unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type, lower 8 bits for DPM level */ + uint32_t param = (clk << 16) | dpm_level; + + smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDpmFreqByIndex, param, &response); + + smu_print("SMU dpm freq: %d MHz\n", response); + + return response; +} + +/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */ +unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type */ + uint32_t param = clk << 16; + + smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response); + + smu_print("SMU DC mode max DMP freq: %d MHz\n", response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h index e02eb1294b37..4f5ac603e822 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -7,11 +7,17 @@ #include "os_types.h" #include "core_types.h" -#include "dcn32/dcn32_clk_mgr_smu_msg.h" +struct clk_mgr_internal; + +bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version); +bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr); +bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr); void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support); void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support); void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways); +void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high); +void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low); void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr); unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz); @@ -29,5 +35,7 @@ bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr); +unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk); +unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level); #endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5963019d1e74..5f2d5638c819 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -60,7 +60,7 @@ #include "link_encoder.h" #include "link_enc_cfg.h" -#include "link.h" +#include "link_service.h" #include "dm_helpers.h" #include "mem_input.h" @@ -460,7 +460,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) { - if ((dc->optimized_required || dc->wm_optimized_required) && + if (dc->optimized_required && (stream->adjust.v_total_max != adjust->v_total_max || stream->adjust.v_total_min != adjust->v_total_min)) { stream->adjust.timing_adjust_pending = true; @@ -2577,7 +2577,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; - dc->wm_optimized_required = false; } bool dc_set_generic_gpio_for_stereo(bool enable, @@ -3056,8 +3055,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } - - dc->optimized_required |= dc->wm_optimized_required; } return type; @@ -3313,6 +3310,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->adaptive_sync_infopacket) stream->adaptive_sync_infopacket = *update->adaptive_sync_infopacket; + if (update->avi_infopacket) + stream->avi_infopacket = *update->avi_infopacket; + if (update->dither_option) stream->dither_option = *update->dither_option; @@ -3607,7 +3607,8 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->vsp_infopacket || stream_update->hfvsif_infopacket || stream_update->adaptive_sync_infopacket || - stream_update->vtem_infopacket) { + stream_update->vtem_infopacket || + stream_update->avi_infopacket) { resource_build_info_frame(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -5079,6 +5080,7 @@ static bool full_update_required(struct dc *dc, stream_update->hfvsif_infopacket || stream_update->vtem_infopacket || stream_update->adaptive_sync_infopacket || + stream_update->avi_infopacket || stream_update->dpms_off || stream_update->allow_freesync || stream_update->vrr_active_variable || @@ -5622,8 +5624,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); } } - - DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", + if (!dc->caps.is_apu) + DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2], subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 814f68d76257..a180f68f711c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -24,7 +24,7 @@ #include "link_enc_cfg.h" #include "resource.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER dc->ctx->logger diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index b7a5de4ecb61..9acd30019717 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -33,8 +33,9 @@ * dc.h with detail interface documentation, then add function implementation * in this file which calls link functions. */ -#include "link.h" +#include "link_service.h" #include "dce/dce_i2c.h" + struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index) { if (link_index >= MAX_LINKS) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d712548b1927..bc5dedf5f60c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -40,7 +40,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dpcd_defs.h" #include "link_enc_cfg.h" -#include "link.h" +#include "link_service.h" #include "clk_mgr.h" #include "dc_state_priv.h" #include "dc_stream_priv.h" @@ -95,7 +95,6 @@ #define DC_LOGGER \ dc->ctx->logger #define DC_LOGGER_INIT(logger) - #include "dml2/dml2_wrapper.h" #define UNABLE_TO_SPLIT -1 @@ -2149,7 +2148,7 @@ int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master, h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right + - otg_master->hblank_borrow; + otg_master->dsc_padding_params.dsc_hactive_padding; width = h_active / count; if (otg_master->stream_res.tg) @@ -4267,39 +4266,33 @@ fail: return res; } +#if defined(CONFIG_DRM_AMD_DC_FP) +#endif /* CONFIG_DRM_AMD_DC_FP */ + /** - * decide_hblank_borrow - Decides the horizontal blanking borrow value for a given pipe context. + * calculate_timing_params_for_dsc_with_padding - Calculates timing parameters for DSC with padding. * @pipe_ctx: Pointer to the pipe context structure. * - * This function calculates the horizontal blanking borrow value for a given pipe context based on the + * This function calculates the timing parameters for a given pipe context based on the * display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less - * than the total width of the DSC slices, it sets the hblank_borrow value to the difference. If the - * total horizontal timing minus the hblank_borrow value is less than 32, it resets the hblank_borrow + * than the total width of the DSC slices, it sets the dsc_hactive_padding value to the difference. If the + * total horizontal timing minus the dsc_hactive_padding value is less than 32, it resets the dsc_hactive_padding * value to 0. */ -static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx) +static void calculate_timing_params_for_dsc_with_padding(struct pipe_ctx *pipe_ctx) { - uint32_t hactive; - uint32_t ceil_slice_width; struct dc_stream_state *stream = NULL; if (!pipe_ctx) return; stream = pipe_ctx->stream; + pipe_ctx->dsc_padding_params.dsc_hactive_padding = 0; + pipe_ctx->dsc_padding_params.dsc_htotal_padding = 0; - if (stream->timing.flags.DSC) { - hactive = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; - - /* Assume if determined slices does not divide Hactive evenly, Hborrow is needed for padding*/ - if (hactive % stream->timing.dsc_cfg.num_slices_h != 0) { - ceil_slice_width = (hactive / stream->timing.dsc_cfg.num_slices_h) + 1; - pipe_ctx->hblank_borrow = ceil_slice_width * stream->timing.dsc_cfg.num_slices_h - hactive; + if (stream) + pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz = stream->timing.pix_clk_100hz; - if (stream->timing.h_total - hactive - pipe_ctx->hblank_borrow < 32) - pipe_ctx->hblank_borrow = 0; - } - } } /** @@ -4342,7 +4335,7 @@ enum dc_status dc_validate_global_state( /* Decide whether hblank borrow is needed and save it in pipe_ctx */ if (dc->debug.enable_hblank_borrow) - decide_hblank_borrow(pipe_ctx); + calculate_timing_params_for_dsc_with_padding(pipe_ctx); if (dc->res_pool->funcs->patch_unknown_plane_state && pipe_ctx->plane_state && @@ -4417,8 +4410,14 @@ static void set_avi_info_frame( unsigned int fr_ind = pipe_ctx->stream->timing.fr_index; enum dc_timing_3d_format format; + if (stream->avi_infopacket.valid) { + *info_packet = stream->avi_infopacket; + return; + } + memset(&hdmi_info, 0, sizeof(union hdmi_info_packet)); + color_space = pipe_ctx->stream->output_color_space; if (color_space == COLOR_SPACE_UNKNOWN) color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 883054bb18e7..c61300a7cb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -211,7 +211,7 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p return NULL; } - if (!dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) { + if (dc->caps.dcmode_power_limits_present && !dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) { dc_state_release(state); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 954a2786fbe2..98f0b6b3c213 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.349" +#define DC_VER "3.2.351" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -1163,6 +1163,7 @@ struct dc_debug_options { unsigned int auxless_alpm_lfps_silence_ns; unsigned int auxless_alpm_lfps_t1t2_us; short auxless_alpm_lfps_t1t2_offset_us; + bool disable_stutter_for_wm_program; }; @@ -1391,7 +1392,6 @@ union surface_update_flags { uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; uint32_t coeff_reduction_change:1; - uint32_t output_tf_change:1; uint32_t pixel_format_change:1; uint32_t plane_size_change:1; uint32_t gamut_remap_change:1; @@ -1735,7 +1735,6 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; - bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 51e41aed7316..5a365bd19933 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -755,3 +755,8 @@ char *dce_version_to_string(const int version) return "Unknown"; } } + +bool dc_supports_vrr(const enum dce_version v) +{ + return v >= DCE_VERSION_8_0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 7f57661433eb..55704d4457ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -128,7 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx); // Make spl input basic out info output_size width point to stream h active spl_in->basic_out.output_size.width = - stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->hblank_borrow; + stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; // Make spl input basic out info output_size height point to v active spl_in->basic_out.output_size.height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 5fc6fea211de..76cf9fdedab0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -203,6 +203,7 @@ struct dc_stream_state { struct dc_info_packet hfvsif_infopacket; struct dc_info_packet vtem_infopacket; struct dc_info_packet adaptive_sync_infopacket; + struct dc_info_packet avi_infopacket; uint8_t dsc_packed_pps[128]; struct rect src; /* composition area */ struct rect dst; /* stream addressable area */ @@ -335,6 +336,8 @@ struct dc_stream_update { struct dc_info_packet *hfvsif_infopacket; struct dc_info_packet *vtem_infopacket; struct dc_info_packet *adaptive_sync_infopacket; + struct dc_info_packet *avi_infopacket; + bool *dpms_off; bool integer_scaling_update; bool *allow_freesync; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 619834a328a3..b5aa03a3e39c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1217,6 +1217,7 @@ struct dc_panel_config { bool rc_disable; bool rc_allow_static_screen; bool rc_allow_fullscreen_VPB; + bool read_psrcap_again; unsigned int replay_enable_option; } psr; /* ABM */ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 0ce9489ac6b7..de6d62401362 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -39,6 +39,7 @@ #define CTX \ dccg_dcn->base.ctx +#include "logger_types.h" #define DC_LOGGER \ dccg->ctx->logger @@ -1136,7 +1137,7 @@ static void dcn35_set_dppclk_enable(struct dccg *dccg, default: break; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); + DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); } @@ -1406,6 +1407,10 @@ static void dccg35_set_dtbclk_dto( * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the * programming is handled in program_pix_clk() regardless, so it can be removed from here. */ + DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO enabled: pixclk_khz=%d, ref_dtbclk_khz=%d, req_dtbclk_khz=%d, phase=%d, modulo=%d\n", + __func__, params->otg_inst, params->pixclk_khz, + params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo); + } else { switch (params->otg_inst) { case 0: @@ -1431,6 +1436,8 @@ static void dccg35_set_dtbclk_dto( REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + + DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO disabled\n", __func__, params->otg_inst); } } @@ -1475,6 +1482,8 @@ static void dccg35_set_dpstreamclk( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_EN = %d, DPSTREAMCLK_SRC_SEL = %d\n", + __func__, dp_hpo_inst, (src == REFCLK) ? 0 : 1, otg_inst); } @@ -1514,6 +1523,8 @@ static void dccg35_set_dpstreamclk_root_clock_gating( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_ROOT_GATE_DISABLE = %d\n", + __func__, dp_hpo_inst, enable ? 1 : 0); } @@ -1553,7 +1564,7 @@ static void dccg35_set_physymclk_root_clock_gating( BREAK_TO_DEBUGGER(); return; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE:\n", __func__, phy_inst, enable ? 0 : 1); + DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE: %d\n", __func__, phy_inst, enable ? 0 : 1); } @@ -1626,6 +1637,8 @@ static void dccg35_set_physymclk( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: phy_inst(%d) PHYxSYMCLK_EN = %d, PHYxSYMCLK_SRC_SEL = %d\n", + __func__, phy_inst, force_enable ? 1 : 0, clk_src); } static void dccg35_set_valid_pixel_rate( @@ -1673,6 +1686,7 @@ static void dccg35_dpp_root_clock_control( } dccg->dpp_clock_gated[dpp_inst] = !clock_on; + DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on); } static void dccg35_disable_symclk32_se( @@ -1731,6 +1745,7 @@ static void dccg35_disable_symclk32_se( BREAK_TO_DEBUGGER(); return; } + } static void dccg35_init_cb(struct dccg *dccg) @@ -1738,7 +1753,6 @@ static void dccg35_init_cb(struct dccg *dccg) (void)dccg; /* Any RCG should be done when driver enter low power mode*/ } - void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -1753,6 +1767,8 @@ void dccg35_init(struct dccg *dccg) for (otg_inst = 0; otg_inst < 2; otg_inst++) { dccg31_disable_symclk32_le(dccg, otg_inst); dccg31_set_symclk32_le_root_clock_gating(dccg, otg_inst, false); + DC_LOG_DEBUG("%s: OTG%d SYMCLK32_LE disabled and root clock gating disabled\n", + __func__, otg_inst); } // if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) @@ -1765,6 +1781,8 @@ void dccg35_init(struct dccg *dccg) dccg35_set_dpstreamclk(dccg, REFCLK, otg_inst, otg_inst); dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false); + DC_LOG_DEBUG("%s: OTG%d DPSTREAMCLK disabled and root clock gating disabled\n", + __func__, otg_inst); } /* diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c index 2b1673d69ea8..1ab5ae9b5ea5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c @@ -154,10 +154,13 @@ static bool dce60_setup_scaling_configuration( REG_SET(SCL_BYPASS_CONTROL, 0, SCL_BYPASS_MODE, 0); if (data->taps.h_taps + data->taps.v_taps <= 2) { - /* Set bypass */ - - /* DCE6 has no SCL_MODE register, skip scale mode programming */ + /* Disable scaler functionality */ + REG_WRITE(SCL_SCALER_ENABLE, 0); + /* Clear registers that can cause glitches even when the scaler is off */ + REG_WRITE(SCL_TAP_CONTROL, 0); + REG_WRITE(SCL_AUTOMATIC_MODE_CONTROL, 0); + REG_WRITE(SCL_F_SHARP_CONTROL, 0); return false; } @@ -165,7 +168,7 @@ static bool dce60_setup_scaling_configuration( SCL_H_NUM_OF_TAPS, data->taps.h_taps - 1, SCL_V_NUM_OF_TAPS, data->taps.v_taps - 1); - /* DCE6 has no SCL_MODE register, skip scale mode programming */ + REG_WRITE(SCL_SCALER_ENABLE, 1); /* DCE6 has no SCL_BOUNDARY_MODE bit, skip replace out of bound pixels */ @@ -502,6 +505,8 @@ static void dce60_transform_set_scaler( REG_SET(DC_LB_MEM_SIZE, 0, DC_LB_MEM_SIZE, xfm_dce->lb_memory_size); + REG_WRITE(SCL_UPDATE, 0x00010000); + /* Clear SCL_F_SHARP_CONTROL value to 0 */ REG_WRITE(SCL_F_SHARP_CONTROL, 0); @@ -527,8 +532,7 @@ static void dce60_transform_set_scaler( if (coeffs_v != xfm_dce->filter_v || coeffs_h != xfm_dce->filter_h) { /* 4. Program vertical filters */ if (xfm_dce->filter_v == NULL) - REG_SET(SCL_VERT_FILTER_CONTROL, 0, - SCL_V_2TAP_HARDCODE_COEF_EN, 0); + REG_WRITE(SCL_VERT_FILTER_CONTROL, 0); program_multi_taps_filter( xfm_dce, data->taps.v_taps, @@ -542,8 +546,7 @@ static void dce60_transform_set_scaler( /* 5. Program horizontal filters */ if (xfm_dce->filter_h == NULL) - REG_SET(SCL_HORZ_FILTER_CONTROL, 0, - SCL_H_2TAP_HARDCODE_COEF_EN, 0); + REG_WRITE(SCL_HORZ_FILTER_CONTROL, 0); program_multi_taps_filter( xfm_dce, data->taps.h_taps, @@ -566,6 +569,8 @@ static void dce60_transform_set_scaler( /* DCE6 has no SCL_COEF_UPDATE_COMPLETE bit to flip to new coefficient memory */ /* DCE6 DATA_FORMAT register does not support ALPHA_EN */ + + REG_WRITE(SCL_UPDATE, 0); } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h index cbce194ec7b8..eb716e8337e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h @@ -155,6 +155,9 @@ SRI(SCL_COEF_RAM_TAP_DATA, SCL, id), \ SRI(VIEWPORT_START, SCL, id), \ SRI(VIEWPORT_SIZE, SCL, id), \ + SRI(SCL_SCALER_ENABLE, SCL, id), \ + SRI(SCL_HORZ_FILTER_INIT_RGB_LUMA, SCL, id), \ + SRI(SCL_HORZ_FILTER_INIT_CHROMA, SCL, id), \ SRI(SCL_HORZ_FILTER_SCALE_RATIO, SCL, id), \ SRI(SCL_VERT_FILTER_SCALE_RATIO, SCL, id), \ SRI(SCL_VERT_FILTER_INIT, SCL, id), \ @@ -590,6 +593,7 @@ struct dce_transform_registers { uint32_t SCL_VERT_FILTER_SCALE_RATIO; uint32_t SCL_HORZ_FILTER_INIT; #if defined(CONFIG_DRM_AMD_DC_SI) + uint32_t SCL_SCALER_ENABLE; uint32_t SCL_HORZ_FILTER_INIT_RGB_LUMA; uint32_t SCL_HORZ_FILTER_INIT_CHROMA; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 65b979617b0c..f9542edff14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -3,7 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc.h" -#include "link.h" +#include "link_service.h" #include "dc_dmub_srv.h" #include "dmub/dmub_srv.h" #include "core_types.h" @@ -169,6 +169,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line; copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->replay_support_fast_resync_in_ultra_sleep_mode = link->replay_settings.config.replay_support_fast_resync_in_ultra_sleep_mode; copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c index 22e66b375a7f..d928b4dcf6b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c @@ -28,7 +28,7 @@ #include "dcn10_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #include "dcn30/dcn30_afmt.h" diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c index 0b47aeb60e79..bec0b4aaeb2b 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c @@ -29,7 +29,7 @@ #include "dcn20_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c index 9a92f73d5b7f..84cc2ddc52fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c @@ -37,7 +37,7 @@ #include "link_enc_cfg.h" #include "dc_dmub_srv.h" #include "dal_asic_id.h" -#include "link.h" +#include "link_service.h" #define CTX \ enc10->base.ctx diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c index ae81451a3a72..3e85e9c3d2cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c @@ -30,7 +30,7 @@ #include "dcn314_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c index 1a9bb614c41e..3523d1cdc1a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c @@ -29,7 +29,7 @@ #include "dcn32_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6f30b6cc3c76..fd5d1dbf9dc6 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -29,7 +29,7 @@ #include "dcn35_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c index d5fa551dd3c9..99aab70ef3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c @@ -32,7 +32,7 @@ #include "dcn401_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 7b9c22c45453..fbbf9c757b3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -277,12 +277,13 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc /* * SMU message tracing */ -void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx); -void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx); - -#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_msg(msg_id, param_in, ctx) -#define TRACE_SMU_DELAY(response_delay, ctx) dm_trace_smu_delay(response_delay, ctx) +void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx); +void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx); +#define TRACE_SMU_MSG_DELAY(msg_id, param_in, delay, ctx) dm_trace_smu_enter(msg_id, param_in, delay, ctx) +#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx) +#define TRACE_SMU_MSG_ENTER(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx) +#define TRACE_SMU_MSG_EXIT(success, response, ctx) dm_trace_smu_exit(success, response, ctx) /* * DMUB Interfaces @@ -311,4 +312,6 @@ void dm_dtn_log_end(struct dc_context *ctx, char *dce_version_to_string(const int version); +bool dc_supports_vrr(const enum dce_version v); + #endif /* __DM_SERVICES_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h index bf63da266a18..3b093b8699ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h @@ -127,7 +127,7 @@ struct dm_pp_single_disp_config { uint32_t src_height; uint32_t src_width; uint32_t v_refresh; - uint32_t sym_clock; /* HDMI only */ + uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */ struct dc_link_settings link_settings; /* DP only */ }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 2a2eaf6adf26..7aaf13bbd4e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -30,8 +30,7 @@ #include "dcn20/dcn20_resource.h" #include "dcn21/dcn21_resource.h" #include "clk_mgr/dcn21/rn_clk_mgr.h" - -#include "link.h" +#include "link_service.h" #include "dcn20_fpu.h" #include "dc_state_priv.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 17a21bcbde17..1a28061bb9ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -808,6 +808,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc) { + dc_assert_fp_enabled(); + return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0); } @@ -815,6 +817,8 @@ int dcn_get_approx_det_segs_required_for_pstate( struct _vcs_dpi_soc_bounding_box_st *soc, int pix_clk_100hz, int bpp, int seg_size_kb) { + dc_assert_fp_enabled(); + /* Roughly calculate required crb to hide latency. In practice there is slightly * more buffer available for latency hiding */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 18388fb00be8..8a0f128722b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -31,7 +31,7 @@ // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" #include "dcn30/dcn30_resource.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 5d73efa2f0c9..817a370e80a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -31,7 +31,7 @@ #include "dml/dcn31/dcn31_fpu.h" #include "dml/dml_inline_defs.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) @@ -445,6 +445,8 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, bool upscaled = false; const unsigned int max_allowed_vblank_nom = 1023; + dc_assert_fp_enabled(); + dcn31_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); @@ -498,9 +500,7 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - DC_FP_START(); dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; @@ -581,6 +581,8 @@ void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context) unsigned int i, plane_count = 0; DC_LOGGER_INIT(dc->ctx->logger); + dc_assert_fp_enabled(); + for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index 6f516af82956..77023b619f1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -10,7 +10,7 @@ #include "dml/dcn35/dcn35_fpu.h" #include "dml/dml_inline_defs.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) @@ -478,6 +478,8 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc, bool upscaled = false; const unsigned int max_allowed_vblank_nom = 1023; + dc_assert_fp_enabled(); + dcn31_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); @@ -531,9 +533,7 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc, pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - DC_FP_START(); dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 715f9019a33e..4b9b2e84d381 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6529,7 +6529,7 @@ static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mo mode_lib->ms.TotImmediateFlipBytes = 0; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]; + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]); if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index f6879e622271..bf5e7f4e0416 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -84,25 +84,29 @@ static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stre static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, struct dc_stream_state *stream, + struct pipe_ctx *pipe_ctx, struct dml2_context *dml_ctx) { unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; + uint32_t pix_clk_100hz; - timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; + timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; timing->h_front_porch = stream->timing.h_front_porch; timing->v_front_porch = stream->timing.v_front_porch; timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10; + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) + timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10; if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) timing->pixel_clock_khz *= 2; - timing->h_total = stream->timing.h_total; + timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; timing->v_total = stream->timing.v_total; timing->h_sync_width = stream->timing.h_sync_width; timing->interlaced = stream->timing.flags.INTERLACE; hblank_start = stream->timing.h_total - stream->timing.h_front_porch; - timing->h_blank_end = hblank_start - stream->timing.h_addressable + timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding - stream->timing.h_border_left - stream->timing.h_border_right; if (hblank_start < stream->timing.h_addressable) @@ -121,8 +125,13 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf /* limit min refresh rate to DC cap */ min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; if (stream->ctx->dc->caps.max_v_total != 0) { - min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL), - (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) { + pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + } else { + pix_clk_100hz = stream->timing.pix_clk_100hz; + } + min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL), + (timing->h_total * (long long)calc_max_hardware_v_total(stream))); } timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz); @@ -173,21 +182,6 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf timing->vblank_nom = timing->v_total - timing->v_active; } -/** - * adjust_dml21_hblank_timing_config_from_pipe_ctx - Adjusts the horizontal blanking timing configuration - * based on the pipe context. - * @timing: Pointer to the dml2_timing_cfg structure to be adjusted. - * @pipe: Pointer to the pipe_ctx structure containing the horizontal blanking borrow value. - * - * This function modifies the horizontal active and blank end timings by adding and subtracting - * the horizontal blanking borrow value from the pipe context, respectively. - */ -static void adjust_dml21_hblank_timing_config_from_pipe_ctx(struct dml2_timing_cfg *timing, struct pipe_ctx *pipe) -{ - timing->h_active += pipe->hblank_borrow; - timing->h_blank_end -= pipe->hblank_borrow; -} - static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, struct dc_stream_state *stream, const struct pipe_ctx *pipe) { @@ -487,7 +481,9 @@ static const struct scaler_data *get_scaler_data_for_plane( temp_pipe->plane_state = pipe->plane_state; temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; temp_pipe->stream_res = pipe->stream_res; - temp_pipe->hblank_borrow = pipe->hblank_borrow; + temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding; + temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding; + temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz; dml_ctx->config.callbacks.build_scaling_params(temp_pipe); break; } @@ -755,8 +751,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s disp_cfg_stream_location = dml_dispcfg->num_streams++; ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); - populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); - adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); + populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 798abb2b2e67..08f7f03b1023 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -224,7 +224,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); /* Populate stream, plane mappings and other fields in display config. */ + DC_FP_START(); result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + DC_FP_END(); if (!result) return false; @@ -279,7 +281,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); mode_support->dml2_instance = dml_init->dml2_instance; + DC_FP_START(); dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + DC_FP_END(); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 7de10a95cfdb..41adb1104d0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -16,9 +16,9 @@ struct dml2_instance; enum dml2_project_id { dml2_project_invalid = 0, - dml2_project_dcn4x_stage1 = 1, - dml2_project_dcn4x_stage2 = 2, - dml2_project_dcn4x_stage2_auto_drr_svp = 3, + dml2_project_dcn4x_stage1, + dml2_project_dcn4x_stage2, + dml2_project_dcn4x_stage2_auto_drr_svp, }; enum dml2_pstate_change_support { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index e763c8e45da8..1b9579a32ff2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -48,18 +48,19 @@ static void set_reserved_time_on_all_planes_with_stream_index(struct display_con static void remove_duplicates(double *list_a, int *list_a_size) { - int cur_element = 0; - // For all elements b[i] in list_b[] - while (cur_element < *list_a_size - 1) { - if (list_a[cur_element] == list_a[cur_element + 1]) { - for (int j = cur_element + 1; j < *list_a_size - 1; j++) { - list_a[j] = list_a[j + 1]; - } - *list_a_size = *list_a_size - 1; - } else { - cur_element++; + int j = 0; + + if (*list_a_size == 0) + return; + + for (int i = 1; i < *list_a_size; i++) { + if (list_a[j] != list_a[i]) { + j++; + list_a[j] = list_a[i]; } } + + *list_a_size = j + 1; } static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index bd1b9aef6d5c..89f0d999bf35 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -406,9 +406,10 @@ bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1; dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0; + // Need to find the ceiling value for the slice width + dsc_reg_vals->pps.slice_width = (dsc_cfg->pic_width + dsc_cfg->dc_dsc_cfg.num_slices_h - 1) / dsc_cfg->dc_dsc_cfg.num_slices_h; // TODO: in addition to validating slice height (pic height must be divisible by slice height), // see what happens when the same condition doesn't apply for slice_width/pic_width. - dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h; dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v; ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height); diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c index 1313a7c5d87b..73a1e6a03719 100644 --- a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c +++ b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c @@ -28,7 +28,7 @@ #include "include/hdcp_msg_types.h" #include "include/signal_types.h" #include "core_types.h" -#include "link.h" +#include "link_service.h" #include "link_hwss.h" #include "link/protocols/link_dpcd.h" diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 92957398ac0a..4d4ca6d77bbd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -28,6 +28,7 @@ #include "dcn32_hubbub.h" #include "dm_services.h" #include "reg_helper.h" +#include "dal_asic_id.h" #define CTX \ @@ -72,6 +73,14 @@ static void dcn32_init_crb(struct hubbub *hubbub) REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F); } +static void hubbub32_set_sdp_control(struct hubbub *hubbub, bool dc_control) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + REG_UPDATE(DCHUBBUB_SDPIF_CFG0, + SDPIF_PORT_CONTROL, dc_control); +} + void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); @@ -754,8 +763,18 @@ static bool hubbub32_program_watermarks( unsigned int refclk_mhz, bool safe_to_lower) { + struct dc *dc = hubbub->ctx->dc; bool wm_pending = false; + if (!safe_to_lower && dc->debug.disable_stutter_for_wm_program && + (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || + ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { + /* before raising watermarks, SDP control give to DF, stutter must be disabled */ + wm_pending = true; + hubbub32_set_sdp_control(hubbub, false); + hubbub1_allow_self_refresh_control(hubbub, false); + } + if (hubbub32_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) wm_pending = true; @@ -786,10 +805,20 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower) { + /* after lowering watermarks, stutter setting is restored, SDP control given to DC */ + hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter); + + if (dc->debug.disable_stutter_for_wm_program && + (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || + ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { + hubbub32_set_sdp_control(hubbub, true); + } + } else if (dc->debug.disable_stutter) { + hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter); + } - hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); + hubbub32_force_usr_retraining_allow(hubbub, dc->debug.force_usr_allow); return wm_pending; } @@ -974,8 +1003,7 @@ void hubbub32_init(struct hubbub *hubbub) ignore the "df_pre_cstate_req" from the SDP port control. only the DCN will determine when to connect the SDP port */ - REG_UPDATE(DCHUBBUB_SDPIF_CFG0, - SDPIF_PORT_CONTROL, 1); + hubbub32_set_sdp_control(hubbub, true); /*Set SDP's max outstanding request to 512 must set the register back to 0 (max outstanding = 256) in zero frame buffer mode*/ REG_UPDATE(DCHUBBUB_SDPIF_CFG1, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 153d68375fa3..24184b4eb352 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -48,7 +48,7 @@ #include "link_encoder.h" #include "link_enc_cfg.h" #include "link_hwss.h" -#include "link.h" +#include "link_service.h" #include "dccg.h" #include "clock_source.h" #include "clk_mgr.h" @@ -1601,17 +1601,19 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( } if (pipe_ctx->stream_res.audio != NULL) { - build_audio_output(context, pipe_ctx, &pipe_ctx->stream_res.audio_output); + struct audio_output audio_output = {0}; - link_hwss->setup_audio_output(pipe_ctx, &pipe_ctx->stream_res.audio_output, + build_audio_output(context, pipe_ctx, &audio_output); + + link_hwss->setup_audio_output(pipe_ctx, &audio_output, pipe_ctx->stream_res.audio->inst); pipe_ctx->stream_res.audio->funcs->az_configure( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &pipe_ctx->stream_res.audio_output.crtc_info, + &audio_output.crtc_info, &pipe_ctx->stream->audio_info, - &pipe_ctx->stream_res.audio_output.dp_link_info); + &audio_output.dp_link_info); if (dc->config.disable_hbr_audio_dp2) if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio && @@ -1923,10 +1925,8 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) get_edp_streams(context, edp_streams, &edp_stream_num); - // Check fastboot support, disable on DCE8 because of blank screens - if (edp_num && edp_stream_num && dc->ctx->dce_version != DCE_VERSION_8_0 && - dc->ctx->dce_version != DCE_VERSION_8_1 && - dc->ctx->dce_version != DCE_VERSION_8_3) { + /* Check fastboot support, disable on DCE 6-8 because of blank screens */ + if (edp_num && edp_stream_num && dc->ctx->dce_version < DCE_VERSION_10_0) { for (i = 0; i < edp_num; i++) { edp_link = edp_links[i]; if (edp_link != edp_streams[0]->link) @@ -2385,7 +2385,9 @@ static void dce110_setup_audio_dto( if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A) continue; if (pipe_ctx->stream_res.audio != NULL) { - build_audio_output(context, pipe_ctx, &pipe_ctx->stream_res.audio_output); + struct audio_output audio_output; + + build_audio_output(context, pipe_ctx, &audio_output); if (dc->res_pool->dccg && dc->res_pool->dccg->funcs->set_audio_dtbclk_dto) { struct dtbclk_dto_params dto_params = {0}; @@ -2396,14 +2398,14 @@ static void dce110_setup_audio_dto( pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &pipe_ctx->stream_res.audio_output.crtc_info, - &pipe_ctx->stream_res.audio_output.pll_info); + &audio_output.crtc_info, + &audio_output.pll_info); } else pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &pipe_ctx->stream_res.audio_output.crtc_info, - &pipe_ctx->stream_res.audio_output.pll_info); + &audio_output.crtc_info, + &audio_output.pll_info); break; } } @@ -2423,15 +2425,15 @@ static void dce110_setup_audio_dto( continue; if (pipe_ctx->stream_res.audio != NULL) { - build_audio_output(context, - pipe_ctx, - &pipe_ctx->stream_res.audio_output); + struct audio_output audio_output = {0}; + + build_audio_output(context, pipe_ctx, &audio_output); pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, - &pipe_ctx->stream_res.audio_output.crtc_info, - &pipe_ctx->stream_res.audio_output.pll_info); + &audio_output.crtc_info, + &audio_output.pll_info); break; } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 506c3bbbf221..e9fe97f0c4ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -55,7 +55,7 @@ #include "dce/dmub_hw_lock_mgr.h" #include "dc_trace.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #define DC_LOGGER \ @@ -3347,7 +3347,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required = hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index cc377fcda6ff..9477c9f9e196 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -54,7 +54,7 @@ #include "dpcd_defs.h" #include "inc/link_enc_cfg.h" #include "link_hwss.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #define DC_LOGGER \ @@ -1982,10 +1982,8 @@ static void dcn20_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -2390,10 +2388,10 @@ void dcn20_prepare_bandwidth( } /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2406,10 +2404,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); @@ -3131,7 +3129,8 @@ void dcn20_fpga_init_hw(struct dc *dc) res_pool->dccg->funcs->dccg_init(res_pool->dccg); //Enable ability to power gate / don't force power on permanently - hws->funcs.enable_power_gating_plane(hws, true); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(hws, true); // Specific to FPGA dccg and registers REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 61efb15572ff..e2269211553c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -35,7 +35,7 @@ #include "hw/clk_mgr.h" #include "dc_dmub_srv.h" #include "abm.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 139a63101488..e47ed5571dfd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -50,7 +50,7 @@ #include "dpcd_defs.h" #include "dcn20/dcn20_hwseq.h" #include "dcn30/dcn30_resource.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 8ba934b83957..b822f2dffff0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -45,7 +45,7 @@ #include "link_hwss.h" #include "dpcd_defs.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dcn10/dcn10_hwseq.h" #include "dcn21/dcn21_hwseq.h" #include "inc/link_enc_cfg.h" diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 560984533950..f925f669f2a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -46,7 +46,7 @@ #include "link_hwss.h" #include "dpcd_defs.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dcn10/dcn10_hwseq.h" #include "inc/link_enc_cfg.h" #include "dcn30/dcn30_vpg.h" diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 416b1dca3dac..f39292952702 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -49,7 +49,7 @@ #include "dcn20/dcn20_optc.h" #include "dce/dmub_hw_lock_mgr.h" #include "dcn32/dcn32_resource.h" -#include "link.h" +#include "link_service.h" #include "../dcn20/dcn20_hwseq.h" #include "dc_state_priv.h" @@ -1052,7 +1052,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* Enable DSC hw block */ - dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow + + dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 764eff6a4ec6..05011061822c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -46,7 +46,7 @@ #include "link_hwss.h" #include "dpcd_defs.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dcn10/dcn10_hwseq.h" #include "inc/link_enc_cfg.h" #include "dcn30/dcn30_vpg.h" diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index d5b5e2ce6ff6..7c276c319086 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -25,7 +25,7 @@ #include "dpcd_defs.h" #include "clk_mgr.h" #include "dsc.h" -#include "link.h" +#include "link_service.h" #include "dce/dmub_hw_lock_mgr.h" #include "dcn10/dcn10_cm_common.h" @@ -810,9 +810,12 @@ enum dc_status dcn401_enable_stream_timing( if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal))) dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx); - /* if we are borrowing from hblank, h_addressable needs to be adjusted */ - if (dc->debug.enable_hblank_borrow) - patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow; + /* if we are padding, h_addressable needs to be adjusted */ + if (dc->debug.enable_hblank_borrow) { + patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding; + patched_crtc_timing.h_total = patched_crtc_timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; + patched_crtc_timing.pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + } pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, @@ -1380,22 +1383,22 @@ void dcn401_prepare_bandwidth(struct dc *dc, false); /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); /* update timeout thresholds */ if (hubbub->funcs->program_arbiter) { - dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); + dc->optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); } /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + dc->optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } @@ -2029,10 +2032,8 @@ void dcn401_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d30f94c35f11..d11893f8c916 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -228,7 +228,8 @@ struct resource_funcs { enum dc_status (*update_dc_state_for_encoder_switch)(struct dc_link *link, struct dc_link_settings *link_setting, uint8_t pipe_count, - struct pipe_ctx *pipes); + struct pipe_ctx *pipes, + struct audio_output *audio_output); }; struct audio_support{ @@ -360,8 +361,6 @@ struct stream_resource { uint8_t gsl_group; struct test_pattern_params test_pattern_params; - - struct audio_output audio_output; }; struct plane_resource { @@ -437,6 +436,13 @@ enum p_state_switch_method { P_STATE_V_BLANK_SUB_VP, }; +struct dsc_padding_params { + /* pixels borrowed from hblank to hactive */ + uint8_t dsc_hactive_padding; + uint32_t dsc_htotal_padding; + uint32_t dsc_pix_clk_100hz; +}; + struct pipe_ctx { struct dc_plane_state *plane_state; struct dc_stream_state *stream; @@ -494,8 +500,7 @@ struct pipe_ctx { /* subvp_index: only valid if the pipe is a SUBVP_MAIN*/ uint8_t subvp_index; struct pixel_rate_divider pixel_rate_divider; - /* pixels borrowed from hblank to hactive */ - uint8_t hblank_borrow; + struct dsc_padding_params dsc_padding_params; /* next vupdate */ uint32_t next_vupdate; uint32_t wait_frame_count; diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link_service.h index 0cce49d95e26..1e34e84160aa 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_service.h @@ -42,8 +42,8 @@ * dc_link_exports.c or other dc files implement dc.h * * DC to Link: - * dc_link_exports.c or other dc files include link.h - * link_factory.c implements link.h + * dc_link_exports.c or other dc files include link_service.h + * link_factory.c implements link_service.h * * Link sub-component to Link sub-component: * link_factory.c includes --> link_xxx.h @@ -73,7 +73,7 @@ * 2. Implement your function in the suitable link_xxx.c file. * 3. Assign the function to link_service in link_factory.c * 4. NEVER include link_xxx.h headers outside link component. - * 5. NEVER include link.h on DM side. + * 5. NEVER include link_service.h on DM side. */ #include "core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index a890f581f4e8..4e26a16a8743 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -45,6 +45,7 @@ enum dce_version resource_parse_asic_id( struct resource_caps { int num_timing_generator; int num_opp; + int num_dpp; int num_video_plane; int num_audio; int num_stream_encoder; diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 23f41c99fa38..9e33bf937a69 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -75,7 +75,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, bool is_hpo_acquired; uint8_t count; int i; - + struct audio_output audio_output[MAX_PIPES]; struct dc_stream_state *streams_on_link[MAX_PIPES]; int num_streams_on_link = 0; @@ -101,7 +101,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, if (needs_divider_update && link->dc->res_pool->funcs->update_dc_state_for_encoder_switch) { link->dc->res_pool->funcs->update_dc_state_for_encoder_switch(link, link_setting, count, - *pipes); + *pipes, &audio_output[0]); for (i = 0; i < count; i++) { pipes[i]->clock_source->funcs->program_pix_clk( pipes[i]->clock_source, @@ -113,16 +113,15 @@ static void dp_retrain_link_dp_test(struct dc_link *link, const struct link_hwss *link_hwss = get_link_hwss( link, &pipes[i]->link_res); - link_hwss->setup_audio_output(pipes[i], - &pipes[i]->stream_res.audio_output, - pipes[i]->stream_res.audio->inst); + link_hwss->setup_audio_output(pipes[i], &audio_output[i], + pipes[i]->stream_res.audio->inst); pipes[i]->stream_res.audio->funcs->az_configure( pipes[i]->stream_res.audio, pipes[i]->stream->signal, - &pipes[i]->stream_res.audio_output.crtc_info, + &audio_output[i].crtc_info, &pipes[i]->stream->audio_info, - &pipes[i]->stream_res.audio_output.dp_link_info); + &audio_output[i].dp_link_info); if (link->dc->config.disable_hbr_audio_dp2 && pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio && diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h index eae23ea7f6ec..033650cdb811 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_DP_CTS_H__ #define __LINK_DP_CTS_H__ -#include "link.h" +#include "link_service.h" void dp_handle_automated_test(struct dc_link *link); bool dp_set_test_pattern( struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h index ab437a0c9101..9ff4a6c46a2b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_DP_TRACE_H__ #define __LINK_DP_TRACE_H__ -#include "link.h" +#include "link_service.h" void dp_trace_init(struct dc_link *link); void dp_trace_reset(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h index 45f0e091fcb0..4a25210a344f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h @@ -27,7 +27,7 @@ #define __LINK_HWSS_DIO_H__ #include "link_hwss.h" -#include "link.h" +#include "link_service.h" const struct link_hwss *get_dio_link_hwss(void); bool can_use_dio_link_hwss(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h index 9ac08a332540..cf578a8662a4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h @@ -25,7 +25,7 @@ #ifndef __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__ #define __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__ -#include "link.h" +#include "link_service.h" uint32_t dp_dio_fixed_vs_pe_retimer_get_lttpr_write_address(struct dc_link *link); uint8_t dp_dio_fixed_vs_pe_retimer_lane_cfg_to_hw_cfg(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h index 1d3ed8ca83b5..7c9005bc2587 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h @@ -26,7 +26,7 @@ #define __LINK_HWSS_HPO_DP_H__ #include "link_hwss.h" -#include "link.h" +#include "link_service.h" void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h index 82301187bc7c..8bf36827ecfb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h @@ -25,7 +25,7 @@ #ifndef __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__ #define __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__ -#include "link.h" +#include "link_service.h" bool requires_fixed_vs_pe_retimer_hpo_link_hwss(const struct dc_link *link); const struct link_hwss *get_hpo_fixed_vs_pe_retimer_dp_link_hwss(void); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index b717e430051a..85303167a553 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1140,6 +1140,10 @@ static bool detect_link_and_local_sink(struct dc_link *link, if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A && !sink->edid_caps.edid_hdmi) sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; + else if (dc_is_dvi_signal(sink->sink_signal) && + aud_support->hdmi_audio_native && + sink->edid_caps.edid_hdmi) + sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A; if (link->local_sink && dc_is_dp_signal(sink_caps.signal)) dp_trace_init(link); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.h b/drivers/gpu/drm/amd/display/dc/link/link_detection.h index 7da05078721e..1ab29476060b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.h @@ -25,7 +25,7 @@ #ifndef __DC_LINK_DETECTION_H__ #define __DC_LINK_DETECTION_H__ -#include "link.h" +#include "link_service.h" bool link_detect(struct dc_link *link, enum dc_detect_reason reason); bool link_detect_connection_type(struct dc_link *link, enum dc_connection_type *type); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 08ee8d2f777b..83419e1a9036 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -832,7 +832,7 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) enum optc_dsc_mode optc_dsc_mode; /* Enable DSC hw block */ - dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow + + dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h index 9398f9c1666a..bd6fc63064a3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DPMS_H__ #define __DC_LINK_DPMS_H__ -#include "link.h" +#include "link_service.h" void link_set_dpms_on( struct dc_state *state, struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.h b/drivers/gpu/drm/amd/display/dc/link/link_factory.h index e96220d48d03..aad36ca1a31c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_FACTORY_H__ #define __LINK_FACTORY_H__ -#include "link.h" +#include "link_service.h" struct dc_link *link_create(const struct link_init_data *init_params); void link_destroy(struct dc_link **link); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_resource.h b/drivers/gpu/drm/amd/display/dc/link/link_resource.h index 1907bda3cb6e..f7aa3bc3a93a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_resource.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_resource.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_RESOURCE_H__ #define __LINK_RESOURCE_H__ -#include "link.h" +#include "link_service.h" void link_get_cur_res_map(const struct dc *dc, uint32_t *map); void link_restore_res_map(const struct dc *dc, uint32_t *map); void link_get_cur_link_res(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h index 9553c81053fe..595774e76453 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_VALIDATION_H__ #define __LINK_VALIDATION_H__ -#include "link.h" +#include "link_service.h" enum dc_status link_validate_mode_timing( const struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h index a3e25e55bed6..d3e6f01a6a90 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h @@ -26,7 +26,7 @@ #ifndef __DAL_DDC_SERVICE_H__ #define __DAL_DDC_SERVICE_H__ -#include "link.h" +#include "link_service.h" #define AUX_POWER_UP_WA_DELAY 500 #define I2C_OVER_AUX_DEFER_WA_DELAY 70 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h index 7170db5a1c13..6e17f72a752f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_CAPABILITY_H__ #define __DC_LINK_DP_CAPABILITY_H__ -#include "link.h" +#include "link_service.h" bool detect_dp_sink_caps(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h index a61edfc9ca7a..7cd03fa4892b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h @@ -27,7 +27,7 @@ #ifndef __DC_LINK_DPIA_H__ #define __DC_LINK_DPIA_H__ -#include "link.h" +#include "link_service.h" /* Read tunneling device capability from DPCD and update link capability * accordingly. diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 41efcb3e44e2..30cd8e2b9d35 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -26,7 +26,7 @@ #ifndef DC_INC_LINK_DP_DPIA_BW_H_ #define DC_INC_LINK_DP_DPIA_BW_H_ -#include "link.h" +#include "link_service.h" /* diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h index ac33730fedd4..87516fb3b45a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_IRQ_HANDLER_H__ #define __DC_LINK_DP_IRQ_HANDLER_H__ -#include "link.h" +#include "link_service.h" bool dp_parse_link_loss_status( struct dc_link *link, union hpd_irq_data *hpd_irq_dpcd_data); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h index ab1c1f8f1f8b..58e154494582 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_PHY_H__ #define __DC_LINK_DP_PHY_H__ -#include "link.h" +#include "link_service.h" void dp_enable_link_phy( struct dc_link *link, const struct link_resource *link_res, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 134093ce5a8e..08e2b572e0ff 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1729,6 +1729,15 @@ bool perform_link_training_with_retries( break; } + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && + !link->dc->config.enable_dpia_pre_training) { + if (j == (attempts - 1)) + do_fallback = true; + else + do_fallback = false; + } + if (j == (attempts - 1)) { DC_LOG_WARNING( "%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) @ spread = %x : fail reason:(%d)\n", diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 574b083e0936..ce52de22ab7a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_TRAINING_H__ #define __DC_LINK_DP_TRAINING_H__ -#include "link.h" +#include "link_service.h" bool perform_link_training_with_retries( const struct dc_link_settings *link_setting, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h index 08d787a1e451..c2717c678c72 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h @@ -25,7 +25,7 @@ #ifndef __LINK_DPCD_H__ #define __LINK_DPCD_H__ -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" enum dc_status core_link_read_dpcd( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 8b7b87b21c2e..5e806edbb9f6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -703,6 +703,20 @@ bool edp_setup_psr(struct dc_link *link, if (!link) return false; + /* This is a workaround: some vendors require the source to + * read the PSR cap; otherwise, the vendor's PSR feature will + * fall back to its default behavior, causing a misconfiguration + * of this feature. + */ + if (link->panel_config.psr.read_psrcap_again) { + dm_helpers_dp_read_dpcd( + link->ctx, + link, + DP_PSR_SUPPORT, + &link->dpcd_caps.psr_info.psr_version, + sizeof(link->dpcd_caps.psr_info.psr_version)); + } + //Clear PSR cfg memset(&psr_configuration, 0, sizeof(psr_configuration)); dm_helpers_dp_write_dpcd( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 4a475d5b9dde..62a6344e613e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -25,7 +25,7 @@ #ifndef __DC_LINK_EDP_PANEL_CONTROL_H__ #define __DC_LINK_EDP_PANEL_CONTROL_H__ -#include "link.h" +#include "link_service.h" enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h index 4fb526b264f9..af529328ba17 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_HPD_H__ #define __DC_LINK_HPD_H__ -#include "link.h" +#include "link_service.h" enum hpd_source_id get_hpd_line(struct dc_link *link); /* diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 3a51be63f020..c4b4dc3ad8c9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -29,6 +29,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" @@ -836,17 +837,24 @@ static enum dc_status build_mapped_resource( return DC_OK; } -static enum dc_status dce100_validate_bandwidth( +enum dc_status dce100_validate_bandwidth( struct dc *dc, struct dc_state *context, enum dc_validate_mode validate_mode) { int i; bool at_least_one_pipe = false; + struct dc_stream_state *stream = NULL; + const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) + stream = context->res_ctx.pipe_ctx[i].stream; + if (stream) { at_least_one_pipe = true; + + if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) + return DC_FAIL_BANDWIDTH_VALIDATE; + } } if (at_least_one_pipe) { @@ -854,7 +862,16 @@ static enum dc_status dce100_validate_bandwidth( context->bw_ctx.bw.dce.dispclk_khz = 681000; context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; + /* On DCE 6.0 and 6.4 the PLL0 is both the display engine clock and + * the DP clock, and shouldn't be turned off. Just select the display + * clock value from its low power mode. + */ + if (dc->ctx->dce_version == DCE_VERSION_6_0 || + dc->ctx->dce_version == DCE_VERSION_6_4) + context->bw_ctx.bw.dce.dispclk_khz = 352000; + else + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; } @@ -881,7 +898,7 @@ static bool dce100_validate_surface_sets( return true; } -static enum dc_status dce100_validate_global( +enum dc_status dce100_validate_global( struct dc *dc, struct dc_state *context) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h index fecab7c560f5..dd150a4b4610 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h @@ -41,6 +41,15 @@ struct resource_pool *dce100_create_resource_pool( enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps); +enum dc_status dce100_validate_global( + struct dc *dc, + struct dc_state *context); + +enum dc_status dce100_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + enum dc_validate_mode validate_mode); + enum dc_status dce100_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index 2f23cc6df571..540e04ec1e2d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -67,7 +67,7 @@ #include "reg_helper.h" #include "dce100/dce100_resource.h" -#include "link.h" +#include "link_service.h" #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 53b60044653f..b75be6ad64f6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -34,6 +34,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "irq/dce60/irq_service_dce60.h" #include "dce110/dce110_timing_generator.h" @@ -403,13 +404,13 @@ static const struct dc_plane_cap plane_cap = { }, .max_upscale_factor = { - .argb8888 = 16000, + .argb8888 = 1, .nv12 = 1, .fp16 = 1 }, .max_downscale_factor = { - .argb8888 = 250, + .argb8888 = 1, .nv12 = 1, .fp16 = 1 } @@ -863,61 +864,6 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool) } } -static enum dc_status dce60_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - enum dc_validate_mode validate_mode) -{ - int i; - bool at_least_one_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - at_least_one_pipe = true; - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return DC_OK; -} - -static bool dce60_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce60_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce60_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - static void dce60_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -931,10 +877,10 @@ static const struct resource_funcs dce60_res_pool_funcs = { .destroy = dce60_destroy_resource_pool, .link_enc_create = dce60_link_encoder_create, .panel_cntl_create = dce60_panel_cntl_create, - .validate_bandwidth = dce60_validate_bandwidth, + .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce60_validate_global, + .validate_global = dce100_validate_global, .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 3e8b0ac11d90..5b7769745202 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -32,6 +32,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "irq/dce80/irq_service_dce80.h" #include "dce110/dce110_timing_generator.h" @@ -869,61 +870,6 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool) } } -static enum dc_status dce80_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - enum dc_validate_mode validate_mode) -{ - int i; - bool at_least_one_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - at_least_one_pipe = true; - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return DC_OK; -} - -static bool dce80_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce80_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce80_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - static void dce80_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -937,10 +883,10 @@ static const struct resource_funcs dce80_res_pool_funcs = { .destroy = dce80_destroy_resource_pool, .link_enc_create = dce80_link_encoder_create, .panel_cntl_create = dce80_panel_cntl_create, - .validate_bandwidth = dce80_validate_bandwidth, + .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce80_validate_global, + .validate_global = dce100_validate_global, .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index f9cbdad3ef37..84b38d2d6967 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -85,7 +85,7 @@ #include "vm_helper.h" #include "link_enc_cfg.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 201ed863b69e..ff63f59ff928 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -60,7 +60,7 @@ #include "dml/display_mode_vba.h" #include "dcn30/dcn30_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dce/dce_panel_cntl.h" #include "dcn30/dcn30_dwb.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 3345068a878c..61623cb518d9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -47,7 +47,8 @@ #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" + #include "dce/dce_abm.h" #include "dce/dce_audio.h" #include "dce/dce_aux.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 3479e1eab4cd..02b9a84f2db3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -47,7 +47,7 @@ #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dce/dce_abm.h" #include "dce/dce_audio.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index ca17e5d8fdc2..3ed7f50554e2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -2239,7 +2239,8 @@ struct resource_pool *dcn31_create_resource_pool( enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, struct dc_link_settings *link_setting, uint8_t pipe_count, - struct pipe_ctx *pipes) + struct pipe_ctx *pipes, + struct audio_output *audio_output) { struct dc_state *state = link->dc->current_state; int i; @@ -2254,7 +2255,7 @@ enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, // Setup audio if (pipes[i].stream_res.audio != NULL) - build_audio_output(state, &pipes[i], &pipes[i].stream_res.audio_output); + build_audio_output(state, &pipes[i], &audio_output[i]); } #else /* This DCN requires rate divider updates and audio reprogramming to allow DP1<-->DP2 link rate switching, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h index 7e8fde65528f..c32c85ef0ba4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h @@ -69,7 +69,8 @@ unsigned int dcn31_get_det_buffer_size( enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, struct dc_link_settings *link_setting, uint8_t pipe_count, - struct pipe_ctx *pipes); + struct pipe_ctx *pipes, + struct audio_output *audio_output); /*temp: B0 specific before switch to dcn313 headers*/ #ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 663c49cce4aa..d4917a35b991 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -927,6 +927,7 @@ static const struct dc_debug_options debug_defaults_drv = { .enable_legacy_fast_update = true, .using_dml2 = false, .disable_dsc_power_gate = true, + .min_disp_clk_khz = 100000, }; static const struct dc_panel_config panel_config_defaults = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 9917b366f00c..3965a7f1b64b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -69,7 +69,7 @@ #include "dml/display_mode_vba.h" #include "dcn32/dcn32_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_panel_cntl.h" #include "dcn30/dcn30_dwb.h" @@ -739,6 +739,7 @@ static const struct dc_debug_options debug_defaults_drv = { .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, .enable_legacy_fast_update = false, + .disable_stutter_for_wm_program = true }; static struct dce_aux *dcn32_aux_engine_create( @@ -2852,7 +2853,7 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx]; free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx]; free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst; - free_pipe->hblank_borrow = otg_master->hblank_borrow; + free_pipe->dsc_padding_params = otg_master->dsc_padding_params; if (free_pipe->stream->timing.flags.DSC == 1) { dcn20_acquire_dsc(free_pipe->stream->ctx->dc, &new_ctx->res_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 20d714596021..99f0432288b4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -1230,7 +1230,8 @@ unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, SR(DCHUBBUB_ARB_MALL_CNTL), \ SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ - SR(SDPIF_REQUEST_RATE_LIMIT) + SR(SDPIF_REQUEST_RATE_LIMIT), \ + SR(DCHUBBUB_SDPIF_CFG0) /* DCCG */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 061c0907d802..ad214986f7ac 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -72,7 +72,7 @@ #include "dml/display_mode_vba.h" #include "dcn32/dcn32_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_panel_cntl.h" #include "dcn30/dcn30_dwb.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 8475c6eec547..fff57f23f4f7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -61,7 +61,7 @@ #include "dcn31/dcn31_hpo_dp_stream_encoder.h" #include "dcn31/dcn31_hpo_dp_link_encoder.h" #include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_apg.h" #include "dcn32/dcn32_dio_link_encoder.h" #include "dcn31/dcn31_vpg.h" @@ -1760,6 +1760,20 @@ enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_stat } +static int populate_dml_pipes_from_context_fpu(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + enum dc_validate_mode validate_mode) +{ + int ret; + + DC_FP_START(); + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); + DC_FP_END(); + + return ret; +} + static struct resource_funcs dcn35_res_pool_funcs = { .destroy = dcn35_destroy_resource_pool, .link_enc_create = dcn35_link_encoder_create, @@ -1770,7 +1784,7 @@ static struct resource_funcs dcn35_res_pool_funcs = { .validate_bandwidth = dcn35_validate_bandwidth, .calculate_wm_and_dlg = NULL, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, .release_pipe = dcn20_release_pipe, .add_stream_to_ctx = dcn30_add_stream_to_ctx, @@ -1900,9 +1914,6 @@ static bool dcn35_resource_construct( dc->caps.num_of_host_routers = 2; dc->caps.num_of_dpias_per_host_router = 2; - dc->caps.num_of_host_routers = 2; - dc->caps.num_of_dpias_per_host_router = 2; - /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 0971c0f74186..0abd163b425e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -40,7 +40,7 @@ #include "dcn31/dcn31_hpo_dp_stream_encoder.h" #include "dcn31/dcn31_hpo_dp_link_encoder.h" #include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_apg.h" #include "dcn32/dcn32_dio_link_encoder.h" #include "dcn31/dcn31_vpg.h" @@ -1732,6 +1732,21 @@ static enum dc_status dcn351_validate_bandwidth(struct dc *dc, return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } +static int populate_dml_pipes_from_context_fpu(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + enum dc_validate_mode validate_mode) +{ + int ret; + + DC_FP_START(); + ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); + DC_FP_END(); + + return ret; + +} + static struct resource_funcs dcn351_res_pool_funcs = { .destroy = dcn351_destroy_resource_pool, .link_enc_create = dcn35_link_encoder_create, @@ -1742,7 +1757,7 @@ static struct resource_funcs dcn351_res_pool_funcs = { .validate_bandwidth = dcn351_validate_bandwidth, .calculate_wm_and_dlg = NULL, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu, + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, .release_pipe = dcn20_release_pipe, .add_stream_to_ctx = dcn30_add_stream_to_ctx, @@ -1872,9 +1887,6 @@ static bool dcn351_resource_construct( dc->caps.num_of_host_routers = 2; dc->caps.num_of_dpias_per_host_router = 2; - dc->caps.num_of_host_routers = 2; - dc->caps.num_of_dpias_per_host_router = 2; - /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index 8bae7fcedc22..ca125ee6c2fb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -40,7 +40,7 @@ #include "dcn31/dcn31_hpo_dp_stream_encoder.h" #include "dcn31/dcn31_hpo_dp_link_encoder.h" #include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_apg.h" #include "dcn32/dcn32_dio_link_encoder.h" #include "dcn31/dcn31_vpg.h" @@ -1734,6 +1734,20 @@ static enum dc_status dcn35_validate_bandwidth(struct dc *dc, } +static int populate_dml_pipes_from_context_fpu(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + enum dc_validate_mode validate_mode) +{ + int ret; + + DC_FP_START(); + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); + DC_FP_END(); + + return ret; +} + static struct resource_funcs dcn36_res_pool_funcs = { .destroy = dcn36_destroy_resource_pool, .link_enc_create = dcn35_link_encoder_create, @@ -1744,7 +1758,7 @@ static struct resource_funcs dcn36_res_pool_funcs = { .validate_bandwidth = dcn35_validate_bandwidth, .calculate_wm_and_dlg = NULL, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, .release_pipe = dcn20_release_pipe, .add_stream_to_ctx = dcn30_add_stream_to_ctx, @@ -1873,9 +1887,6 @@ static bool dcn36_resource_construct( dc->caps.num_of_host_routers = 2; dc->caps.num_of_dpias_per_host_router = 2; - dc->caps.num_of_host_routers = 2; - dc->caps.num_of_dpias_per_host_router = 2; - /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 068c123ea8a8..1d18807e4749 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -50,7 +50,7 @@ #include "dml/display_mode_vba.h" #include "dcn401/dcn401_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "link_enc_cfg.h" #include "dcn31/dcn31_panel_cntl.h" @@ -1699,6 +1699,9 @@ static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) + pixel_clk_params->requested_pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment) link_enc = link_enc_cfg_get_link_enc(link); if (link_enc) diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c index 55b929ca7982..b1fb0f8a253a 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c @@ -641,16 +641,16 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, /* this gives the direction of the cositing (negative will move * left, right otherwise) */ - int sign = 1; + int h_sign = flip_horz_scan_dir ? -1 : 1; + int v_sign = flip_vert_scan_dir ? -1 : 1; switch (spl_in->basic_in.cositing) { - case CHROMA_COSITING_TOPLEFT: - init_adj_h = spl_fixpt_from_fraction(sign, 4); - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_from_fraction(h_sign, 4); + init_adj_v = spl_fixpt_from_fraction(v_sign, 4); break; case CHROMA_COSITING_LEFT: - init_adj_h = spl_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_from_fraction(h_sign, 4); init_adj_v = spl_fixpt_zero; break; case CHROMA_COSITING_NONE: diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index e65747f7f12f..92248224b713 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4143,9 +4143,13 @@ struct dmub_cmd_replay_copy_settings_data { */ uint8_t hpo_link_enc_inst; /** + * Determines if fast resync in ultra sleep mode is enabled/disabled. + */ + uint8_t replay_support_fast_resync_in_ultra_sleep_mode; + /** * @pad: Align structure to 4 byte boundary. */ - uint8_t pad[2]; + uint8_t pad[1]; }; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index e7056205b050..ce041f6239dc 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -89,44 +89,50 @@ static inline void dmub_dcn32_translate_addr(const union dmub_addr *addr_in, void dmub_dcn32_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 30; - uint32_t in_reset, scratch, i; + const uint32_t timeout = 100000; + uint32_t in_reset, is_enabled, scratch, i, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); - if (in_reset == 0) { + if (in_reset == 0 && is_enabled != 0) { cmd.bits.status = 1; cmd.bits.command_code = DMUB_GPINT__STOP_FW; cmd.bits.param = 0; dmub->hw_funcs.set_gpint(dmub, cmd); - /** - * Timeout covers both the ACK and the wait - * for remaining work to finish. - * - * This is mostly bound by the PHY disable sequence. - * Each register check will be greater than 1us, so - * don't bother using udelay. - */ - for (i = 0; i < timeout; ++i) { if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) break; + + udelay(1); } for (i = 0; i < timeout; ++i) { - scratch = dmub->hw_funcs.get_gpint_response(dmub); + scratch = REG_READ(DMCUB_SCRATCH7); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; + + udelay(1); } + for (i = 0; i < timeout; ++i) { + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode); + if (pwait_mode & (1 << 0)) + break; + + udelay(1); + } /* Force reset in case we timed out, DMCUB is likely hung. */ } - REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + if (is_enabled) { + REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); + udelay(1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + } + REG_WRITE(DMCUB_INBOX1_RPTR, 0); REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); @@ -135,7 +141,7 @@ void dmub_dcn32_reset(struct dmub_srv *dmub) REG_WRITE(DMCUB_OUTBOX0_WPTR, 0); REG_WRITE(DMCUB_SCRATCH0, 0); - /* Clear the GPINT command manually so we don't reset again. */ + /* Clear the GPINT command manually so we don't send anything during boot. */ cmd.all = 0; dmub->hw_funcs.set_gpint(dmub, cmd); } @@ -419,8 +425,8 @@ uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub) void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub) { - uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; - uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + uint32_t is_dmub_enabled, is_soft_reset, is_pwait; + uint32_t is_traceport_enabled, is_cw6_enabled; struct dmub_timeout_info timeout = {0}; if (!dmub) @@ -470,18 +476,15 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub) REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); dmub->debug.is_dmcub_enabled = is_dmub_enabled; + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait); + dmub->debug.is_pwait = is_pwait; + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); dmub->debug.is_dmcub_soft_reset = is_soft_reset; - REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - dmub->debug.is_dmcub_secure_reset = is_sec_reset; - REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); dmub->debug.is_traceport_en = is_traceport_enabled; - REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); - dmub->debug.is_cw0_enabled = is_cw0_enabled; - REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); dmub->debug.is_cw6_enabled = is_cw6_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h index 1a229450c53d..daf81027d663 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h @@ -89,6 +89,9 @@ struct dmub_srv; DMUB_SR(DMCUB_REGION5_OFFSET) \ DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \ DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION6_OFFSET) \ + DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \ DMUB_SR(DMCUB_SCRATCH0) \ DMUB_SR(DMCUB_SCRATCH1) \ DMUB_SR(DMCUB_SCRATCH2) \ @@ -155,6 +158,8 @@ struct dmub_srv; DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \ + DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \ DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ @@ -162,7 +167,8 @@ struct dmub_srv; DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \ DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \ DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \ - DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) + DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \ + DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS) struct dmub_srv_dcn32_reg_offset { #define DMUB_SR(reg) uint32_t reg; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index bfb446736ca8..75efda2969cf 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -239,18 +239,51 @@ enum amd_harvest_ip_mask { AMD_HARVEST_IP_DMU_MASK = 0x4, }; +/** + * enum DC_FEATURE_MASK - Bits that control DC feature defaults + */ enum DC_FEATURE_MASK { //Default value can be found at "uint amdgpu_dc_feature_mask" - DC_FBC_MASK = (1 << 0), //0x1, disabled by default - DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default - DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default - DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1 - DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default - DC_DISABLE_LTTPR_DP1_4A = (1 << 5), //0x20, disabled by default - DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default - DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default - DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default - DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4 + /** + * @DC_FBC_MASK: (0x1) disabled by default + */ + DC_FBC_MASK = (1 << 0), + /** + * @DC_MULTI_MON_PP_MCLK_SWITCH_MASK: (0x2) enabled by default + */ + DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), + /** + * @DC_DISABLE_FRACTIONAL_PWM_MASK: (0x4) disabled by default + */ + DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), + /** + * @DC_PSR_MASK: (0x8) disabled by default for DCN < 3.1 + */ + DC_PSR_MASK = (1 << 3), + /** + * @DC_EDP_NO_POWER_SEQUENCING: (0x10) disabled by default + */ + DC_EDP_NO_POWER_SEQUENCING = (1 << 4), + /** + * @DC_DISABLE_LTTPR_DP1_4A: (0x20) disabled by default + */ + DC_DISABLE_LTTPR_DP1_4A = (1 << 5), + /** + * @DC_DISABLE_LTTPR_DP2_0: (0x40) disabled by default + */ + DC_DISABLE_LTTPR_DP2_0 = (1 << 6), + /** + * @DC_PSR_ALLOW_SMU_OPT: (0x80) disabled by default + */ + DC_PSR_ALLOW_SMU_OPT = (1 << 7), + /** + * @DC_PSR_ALLOW_MULTI_DISP_OPT: (0x100) disabled by default + */ + DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), + /** + * @DC_REPLAY_MASK: (0x200) disabled by default for DCN < 3.1.4 + */ + DC_REPLAY_MASK = (1 << 9), }; /** @@ -258,64 +291,64 @@ enum DC_FEATURE_MASK { */ enum DC_DEBUG_MASK { /** - * @DC_DISABLE_PIPE_SPLIT: If set, disable pipe-splitting + * @DC_DISABLE_PIPE_SPLIT: (0x1) If set, disable pipe-splitting */ DC_DISABLE_PIPE_SPLIT = 0x1, /** - * @DC_DISABLE_STUTTER: If set, disable memory stutter mode + * @DC_DISABLE_STUTTER: (0x2) If set, disable memory stutter mode */ DC_DISABLE_STUTTER = 0x2, /** - * @DC_DISABLE_DSC: If set, disable display stream compression + * @DC_DISABLE_DSC: (0x4) If set, disable display stream compression */ DC_DISABLE_DSC = 0x4, /** - * @DC_DISABLE_CLOCK_GATING: If set, disable clock gating optimizations + * @DC_DISABLE_CLOCK_GATING: (0x8) If set, disable clock gating optimizations */ DC_DISABLE_CLOCK_GATING = 0x8, /** - * @DC_DISABLE_PSR: If set, disable Panel self refresh v1 and PSR-SU + * @DC_DISABLE_PSR: (0x10) If set, disable Panel self refresh v1 and PSR-SU */ DC_DISABLE_PSR = 0x10, /** - * @DC_FORCE_SUBVP_MCLK_SWITCH: If set, force mclk switch in subvp, even + * @DC_FORCE_SUBVP_MCLK_SWITCH: (0x20) If set, force mclk switch in subvp, even * if mclk switch in vblank is possible */ DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, /** - * @DC_DISABLE_MPO: If set, disable multi-plane offloading + * @DC_DISABLE_MPO: (0x40) If set, disable multi-plane offloading */ DC_DISABLE_MPO = 0x40, /** - * @DC_ENABLE_DPIA_TRACE: If set, enable trace logging for DPIA + * @DC_ENABLE_DPIA_TRACE: (0x80) If set, enable trace logging for DPIA */ DC_ENABLE_DPIA_TRACE = 0x80, /** - * @DC_ENABLE_DML2: If set, force usage of DML2, even if the DCN version + * @DC_ENABLE_DML2: (0x100) If set, force usage of DML2, even if the DCN version * does not default to it. */ DC_ENABLE_DML2 = 0x100, /** - * @DC_DISABLE_PSR_SU: If set, disable PSR SU + * @DC_DISABLE_PSR_SU: (0x200) If set, disable PSR SU */ DC_DISABLE_PSR_SU = 0x200, /** - * @DC_DISABLE_REPLAY: If set, disable Panel Replay + * @DC_DISABLE_REPLAY: (0x400) If set, disable Panel Replay */ DC_DISABLE_REPLAY = 0x400, /** - * @DC_DISABLE_IPS: If set, disable all Idle Power States, all the time. + * @DC_DISABLE_IPS: (0x800) If set, disable all Idle Power States, all the time. * If more than one IPS debug bit is set, the lowest bit takes * precedence. For example, if DC_FORCE_IPS_ENABLE and * DC_DISABLE_IPS_DYNAMIC are set, then DC_DISABLE_IPS_DYNAMIC takes @@ -324,56 +357,57 @@ enum DC_DEBUG_MASK { DC_DISABLE_IPS = 0x800, /** - * @DC_DISABLE_IPS_DYNAMIC: If set, disable all IPS, all the time, + * @DC_DISABLE_IPS_DYNAMIC: (0x1000) If set, disable all IPS, all the time, * *except* when driver goes into suspend. */ DC_DISABLE_IPS_DYNAMIC = 0x1000, /** - * @DC_DISABLE_IPS2_DYNAMIC: If set, disable IPS2 (IPS1 allowed) if + * @DC_DISABLE_IPS2_DYNAMIC: (0x2000) If set, disable IPS2 (IPS1 allowed) if * there is an enabled display. Otherwise, enable all IPS. */ DC_DISABLE_IPS2_DYNAMIC = 0x2000, /** - * @DC_FORCE_IPS_ENABLE: If set, force enable all IPS, all the time. + * @DC_FORCE_IPS_ENABLE: (0x4000) If set, force enable all IPS, all the time. */ DC_FORCE_IPS_ENABLE = 0x4000, /** - * @DC_DISABLE_ACPI_EDID: If set, don't attempt to fetch EDID for + * @DC_DISABLE_ACPI_EDID: (0x8000) If set, don't attempt to fetch EDID for * eDP display from ACPI _DDC method. */ DC_DISABLE_ACPI_EDID = 0x8000, /** - * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver. + * @DC_DISABLE_HDMI_CEC: (0x10000) If set, disable HDMI-CEC feature in amdgpu driver. */ DC_DISABLE_HDMI_CEC = 0x10000, /** - * @DC_DISABLE_SUBVP_FAMS: If set, disable DCN Sub-Viewport & Firmware Assisted + * @DC_DISABLE_SUBVP_FAMS: (0x20000) If set, disable DCN Sub-Viewport & Firmware Assisted * Memory Clock Switching (FAMS) feature in amdgpu driver. */ DC_DISABLE_SUBVP_FAMS = 0x20000, /** - * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: If set, disable support for custom brightness curves + * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: (0x40000) If set, disable support for custom + * brightness curves */ DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE = 0x40000, /** - * @DC_HDCP_LC_FORCE_FW_ENABLE: If set, use HDCP Locality Check FW + * @DC_HDCP_LC_FORCE_FW_ENABLE: (0x80000) If set, use HDCP Locality Check FW * path regardless of reported HW capabilities. */ DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000, /** - * @DC_HDCP_LC_ENABLE_SW_FALLBACK: If set, upon HDCP Locality Check FW + * @DC_HDCP_LC_ENABLE_SW_FALLBACK: (0x100000) If set, upon HDCP Locality Check FW * path failure, retry using legacy SW path. */ DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000, /** - * @DC_SKIP_DETECTION_LT: If set, skip detection link training + * @DC_SKIP_DETECTION_LT: (0x200000) If set, skip detection link training */ DC_SKIP_DETECTION_LT = 0x200000, }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h index 9de01ae574c0..067eddd9c62d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h @@ -4115,6 +4115,7 @@ #define mmSCL0_SCL_COEF_RAM_CONFLICT_STATUS 0x1B55 #define mmSCL0_SCL_COEF_RAM_SELECT 0x1B40 #define mmSCL0_SCL_COEF_RAM_TAP_DATA 0x1B41 +#define mmSCL0_SCL_SCALER_ENABLE 0x1B42 #define mmSCL0_SCL_CONTROL 0x1B44 #define mmSCL0_SCL_DEBUG 0x1B6A #define mmSCL0_SCL_DEBUG2 0x1B69 @@ -4144,6 +4145,7 @@ #define mmSCL1_SCL_COEF_RAM_CONFLICT_STATUS 0x1E55 #define mmSCL1_SCL_COEF_RAM_SELECT 0x1E40 #define mmSCL1_SCL_COEF_RAM_TAP_DATA 0x1E41 +#define mmSCL1_SCL_SCALER_ENABLE 0x1E42 #define mmSCL1_SCL_CONTROL 0x1E44 #define mmSCL1_SCL_DEBUG 0x1E6A #define mmSCL1_SCL_DEBUG2 0x1E69 @@ -4173,6 +4175,7 @@ #define mmSCL2_SCL_COEF_RAM_CONFLICT_STATUS 0x4155 #define mmSCL2_SCL_COEF_RAM_SELECT 0x4140 #define mmSCL2_SCL_COEF_RAM_TAP_DATA 0x4141 +#define mmSCL2_SCL_SCALER_ENABLE 0x4142 #define mmSCL2_SCL_CONTROL 0x4144 #define mmSCL2_SCL_DEBUG 0x416A #define mmSCL2_SCL_DEBUG2 0x4169 @@ -4202,6 +4205,7 @@ #define mmSCL3_SCL_COEF_RAM_CONFLICT_STATUS 0x4455 #define mmSCL3_SCL_COEF_RAM_SELECT 0x4440 #define mmSCL3_SCL_COEF_RAM_TAP_DATA 0x4441 +#define mmSCL3_SCL_SCALER_ENABLE 0x4442 #define mmSCL3_SCL_CONTROL 0x4444 #define mmSCL3_SCL_DEBUG 0x446A #define mmSCL3_SCL_DEBUG2 0x4469 @@ -4231,6 +4235,7 @@ #define mmSCL4_SCL_COEF_RAM_CONFLICT_STATUS 0x4755 #define mmSCL4_SCL_COEF_RAM_SELECT 0x4740 #define mmSCL4_SCL_COEF_RAM_TAP_DATA 0x4741 +#define mmSCL4_SCL_SCALER_ENABLE 0x4742 #define mmSCL4_SCL_CONTROL 0x4744 #define mmSCL4_SCL_DEBUG 0x476A #define mmSCL4_SCL_DEBUG2 0x4769 @@ -4260,6 +4265,7 @@ #define mmSCL5_SCL_COEF_RAM_CONFLICT_STATUS 0x4A55 #define mmSCL5_SCL_COEF_RAM_SELECT 0x4A40 #define mmSCL5_SCL_COEF_RAM_TAP_DATA 0x4A41 +#define mmSCL5_SCL_SCALER_ENABLE 0x4A42 #define mmSCL5_SCL_CONTROL 0x4A44 #define mmSCL5_SCL_DEBUG 0x4A6A #define mmSCL5_SCL_DEBUG2 0x4A69 @@ -4287,6 +4293,7 @@ #define mmSCL_COEF_RAM_CONFLICT_STATUS 0x1B55 #define mmSCL_COEF_RAM_SELECT 0x1B40 #define mmSCL_COEF_RAM_TAP_DATA 0x1B41 +#define mmSCL_SCALER_ENABLE 0x1B42 #define mmSCL_CONTROL 0x1B44 #define mmSCL_DEBUG 0x1B6A #define mmSCL_DEBUG2 0x1B69 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h index 2d6a598a6c25..9317a7afa621 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h @@ -8650,6 +8650,8 @@ #define REGAMMA_LUT_INDEX__REGAMMA_LUT_INDEX__SHIFT 0x00000000 #define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK_MASK 0x00000007L #define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK__SHIFT 0x00000000 +#define SCL_SCALER_ENABLE__SCL_SCALE_EN_MASK 0x00000001L +#define SCL_SCALER_ENABLE__SCL_SCALE_EN__SHIFT 0x00000000 #define SCL_ALU_CONTROL__SCL_ALU_DISABLE_MASK 0x00000001L #define SCL_ALU_CONTROL__SCL_ALU_DISABLE__SHIFT 0x00000000 #define SCL_BYPASS_CONTROL__SCL_BYPASS_MODE_MASK 0x00000003L diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h b/drivers/gpu/drm/amd/include/dm_pp_interface.h index acd1cef61b7c..349544504c93 100644 --- a/drivers/gpu/drm/amd/include/dm_pp_interface.h +++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h @@ -65,6 +65,7 @@ struct single_display_configuration { uint32_t view_resolution_cy; enum amd_pp_display_config_type displayconfigtype; uint32_t vertical_refresh; /* for active display */ + uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */ }; #define MAX_NUM_DISPLAY 32 diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2f7e4b5bebf3..2b0cdb2a2775 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -162,6 +162,10 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK, AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK, AMDGPU_PP_SENSOR_VCN_LOAD, + AMDGPU_PP_SENSOR_NODEPOWERLIMIT, + AMDGPU_PP_SENSOR_NODEPOWER, + AMDGPU_PP_SENSOR_GPPTRESIDENCY, + AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, }; enum amd_pp_task { diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 15680c3f4970..ab1cfc92dbeb 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -238,7 +238,8 @@ union MESAPI_SET_HW_RESOURCES { uint32_t enable_mes_sch_stb_log : 1; uint32_t limit_single_process : 1; uint32_t is_strix_tmz_wa_enabled :1; - uint32_t reserved : 13; + uint32_t enable_lr_compute_wa : 1; + uint32_t reserved : 12; }; uint32_t uint32_t_all; }; diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index c04bd351b250..69611c7e30e3 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -287,7 +287,8 @@ union MESAPI_SET_HW_RESOURCES { uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; uint32_t enable_mes_fence_int: 1; - uint32_t reserved : 10; + uint32_t enable_lr_compute_wa : 1; + uint32_t reserved : 9; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c index 2d2d2d5e6763..b5e9c3ecf703 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c @@ -27,76 +27,69 @@ #include "amdgpu_smu.h" #include "amdgpu_dpm_internal.h" -void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev) +void amdgpu_dpm_get_display_cfg(struct amdgpu_device *adev) { struct drm_device *ddev = adev_to_drm(adev); + struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg; + struct single_display_configuration *display_cfg; struct drm_crtc *crtc; struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_connector *conn; + int num_crtcs = 0; + int vrefresh; + u32 vblank_in_pixels, vblank_time_us; + + cfg->min_vblank_time = 0xffffffff; /* if the displays are off, vblank time is max */ - adev->pm.dpm.new_active_crtcs = 0; - adev->pm.dpm.new_active_crtc_count = 0; if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { + list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { amdgpu_crtc = to_amdgpu_crtc(crtc); - if (amdgpu_crtc->enabled) { - adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); - adev->pm.dpm.new_active_crtc_count++; - } - } - } -} -u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; - u32 vblank_in_pixels; - u32 vblank_time_us = 0xffffffff; /* if the displays are off, vblank time is max */ + /* The array should only contain active displays. */ + if (!amdgpu_crtc->enabled) + continue; + + conn = to_amdgpu_connector(amdgpu_crtc->connector); + display_cfg = &adev->pm.pm_display_cfg.displays[num_crtcs++]; + + if (amdgpu_crtc->hw_mode.clock) { + vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) { vblank_in_pixels = amdgpu_crtc->hw_mode.crtc_htotal * (amdgpu_crtc->hw_mode.crtc_vblank_end - amdgpu_crtc->hw_mode.crtc_vdisplay + (amdgpu_crtc->v_border * 2)); - vblank_time_us = vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock; + vblank_time_us = + vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock; - /* we have issues with mclk switching with - * refresh rates over 120 hz on the non-DC code. + /* The legacy (non-DC) code has issues with mclk switching + * with refresh rates over 120 Hz. Disable mclk switching. */ - if (drm_mode_vrefresh(&amdgpu_crtc->hw_mode) > 120) + if (vrefresh > 120) vblank_time_us = 0; - break; - } - } - } + /* Find minimum vblank time. */ + if (vblank_time_us < cfg->min_vblank_time) + cfg->min_vblank_time = vblank_time_us; - return vblank_time_us; -} - -u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; - u32 vrefresh = 0; + /* Find vertical refresh rate of first active display. */ + if (!cfg->vrefresh) + cfg->vrefresh = vrefresh; + } - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) { - vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - break; + if (amdgpu_crtc->crtc_id < cfg->crtc_index) { + /* Find first active CRTC and its line time. */ + cfg->crtc_index = amdgpu_crtc->crtc_id; + cfg->line_time_in_us = amdgpu_crtc->line_time; } + + display_cfg->controller_id = amdgpu_crtc->crtc_id; + display_cfg->pixel_clock = conn->pixelclock_for_modeset; } } - return vrefresh; + cfg->display_clk = adev->clock.default_dispclk; + cfg->num_display = num_crtcs; } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 96590c1da553..b5fbb0fd1dc0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1421,9 +1421,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, return -EINVAL; } -static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev, - enum amd_pp_sensors sensor, - void *query) +static int amdgpu_pm_get_sensor_generic(struct amdgpu_device *adev, + enum amd_pp_sensors sensor, + void *query) { int r, size = sizeof(uint32_t); @@ -1456,7 +1456,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, unsigned int value; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value); if (r) return r; @@ -1480,7 +1480,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, unsigned int value; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, &value); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, &value); if (r) return r; @@ -1504,7 +1504,7 @@ static ssize_t amdgpu_get_vcn_busy_percent(struct device *dev, unsigned int value; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VCN_LOAD, &value); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VCN_LOAD, &value); if (r) return r; @@ -1783,7 +1783,7 @@ static int amdgpu_show_powershift_percent(struct device *dev, uint32_t ss_power; int r = 0, i; - r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&ss_power); + r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&ss_power); if (r == -EOPNOTSUPP) { /* sensor not available on dGPU, try to read from APU */ adev = NULL; @@ -1796,7 +1796,7 @@ static int amdgpu_show_powershift_percent(struct device *dev, } mutex_unlock(&mgpu_info.mutex); if (adev) - r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&ss_power); + r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&ss_power); } if (r) @@ -1906,11 +1906,11 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (!amdgpu_device_supports_smart_shift(adev)) *states = ATTR_STATE_UNSUPPORTED; - else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE, - (void *)&ss_power)) + else if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE, + (void *)&ss_power)) *states = ATTR_STATE_UNSUPPORTED; - else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE, - (void *)&ss_power)) + else if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE, + (void *)&ss_power)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -2081,8 +2081,9 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd * for user application to monitor various board reated attributes. * * The amdgpu driver provides a sysfs API for reporting board attributes. Presently, - * only two types of attributes are reported, baseboard temperature and - * gpu board temperature. Both of them are reported as binary files. + * seven types of attributes are reported. Baseboard temperature and + * gpu board temperature are reported as binary files. Npm status, current node power limit, + * max node power limit, node power and global ppt residency is reported as ASCII text file. * * * .. code-block:: console * @@ -2090,6 +2091,15 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd * * hexdump /sys/bus/pci/devices/.../board/gpuboard_temp * + * hexdump /sys/bus/pci/devices/.../board/npm_status + * + * hexdump /sys/bus/pci/devices/.../board/cur_node_power_limit + * + * hexdump /sys/bus/pci/devices/.../board/max_node_power_limit + * + * hexdump /sys/bus/pci/devices/.../board/node_power + * + * hexdump /sys/bus/pci/devices/.../board/global_ppt_resid */ /** @@ -2168,8 +2178,129 @@ out: return size; } +/** + * DOC: cur_node_power_limit + * + * The amdgpu driver provides a sysfs API for retrieving current node power limit. + * The file cur_node_power_limit is used for this. + */ +static ssize_t amdgpu_show_cur_node_power_limit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 nplimit; + int r; + + /* get the current node power limit */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWERLIMIT, + (void *)&nplimit); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", nplimit); +} + +/** + * DOC: node_power + * + * The amdgpu driver provides a sysfs API for retrieving current node power. + * The file node_power is used for this. + */ +static ssize_t amdgpu_show_node_power(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 npower; + int r; + + /* get the node power */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWER, + (void *)&npower); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", npower); +} + +/** + * DOC: npm_status + * + * The amdgpu driver provides a sysfs API for retrieving current node power management status. + * The file npm_status is used for this. It shows the status as enabled or disabled based on + * current node power value. If node power is zero, status is disabled else enabled. + */ +static ssize_t amdgpu_show_npm_status(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 npower; + int r; + + /* get the node power */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWER, + (void *)&npower); + if (r) + return r; + + return sysfs_emit(buf, "%s\n", npower ? "enabled" : "disabled"); +} + +/** + * DOC: global_ppt_resid + * + * The amdgpu driver provides a sysfs API for retrieving global ppt residency. + * The file global_ppt_resid is used for this. + */ +static ssize_t amdgpu_show_global_ppt_resid(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 gpptresid; + int r; + + /* get the global ppt residency */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPPTRESIDENCY, + (void *)&gpptresid); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", gpptresid); +} + +/** + * DOC: max_node_power_limit + * + * The amdgpu driver provides a sysfs API for retrieving maximum node power limit. + * The file max_node_power_limit is used for this. + */ +static ssize_t amdgpu_show_max_node_power_limit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 max_nplimit; + int r; + + /* get the max node power limit */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, + (void *)&max_nplimit); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", max_nplimit); +} + static DEVICE_ATTR(baseboard_temp, 0444, amdgpu_get_baseboard_temp_metrics, NULL); static DEVICE_ATTR(gpuboard_temp, 0444, amdgpu_get_gpuboard_temp_metrics, NULL); +static DEVICE_ATTR(cur_node_power_limit, 0444, amdgpu_show_cur_node_power_limit, NULL); +static DEVICE_ATTR(node_power, 0444, amdgpu_show_node_power, NULL); +static DEVICE_ATTR(global_ppt_resid, 0444, amdgpu_show_global_ppt_resid, NULL); +static DEVICE_ATTR(max_node_power_limit, 0444, amdgpu_show_max_node_power_limit, NULL); +static DEVICE_ATTR(npm_status, 0444, amdgpu_show_npm_status, NULL); static struct attribute *board_attrs[] = { &dev_attr_baseboard_temp.attr, @@ -2636,18 +2767,18 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, - (void *)&temp); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp); break; case PP_TEMP_EDGE: /* get current edge temperature */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, - (void *)&temp); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp); break; case PP_TEMP_MEM: /* get current memory temperature */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_TEMP, - (void *)&temp); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp); break; default: r = -EINVAL; @@ -2909,8 +3040,8 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 min_rpm = 0; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, - (void *)&min_rpm); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, + (void *)&min_rpm); if (r) return r; @@ -2926,8 +3057,8 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 max_rpm = 0; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, - (void *)&max_rpm); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, + (void *)&max_rpm); if (r) return r; @@ -3060,8 +3191,8 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, int r; /* get the voltage */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDGFX, - (void *)&vddgfx); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDGFX, + (void *)&vddgfx); if (r) return r; @@ -3077,8 +3208,8 @@ static ssize_t amdgpu_hwmon_show_vddboard(struct device *dev, int r; /* get the voltage */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, - (void *)&vddboard); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, + (void *)&vddboard); if (r) return r; @@ -3111,8 +3242,8 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, return -EINVAL; /* get the voltage */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDNB, - (void *)&vddnb); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDNB, + (void *)&vddnb); if (r) return r; @@ -3134,7 +3265,7 @@ static int amdgpu_hwmon_get_power(struct device *dev, u32 query = 0; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&query); + r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&query); if (r) return r; @@ -3254,9 +3385,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - err = kstrtou32(buf, 10, &value); if (err) return err; @@ -3287,8 +3415,8 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, int r; /* get the sclk */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_SCLK, - (void *)&sclk); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_SCLK, + (void *)&sclk); if (r) return r; @@ -3311,8 +3439,8 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, int r; /* get the sclk */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_MCLK, - (void *)&mclk); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_MCLK, + (void *)&mclk); if (r) return r; @@ -3598,6 +3726,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; } + if (attr == &sensor_dev_attr_power1_cap.dev_attr.attr && + amdgpu_virt_cap_is_rw(&adev->virt.virt_caps, AMDGPU_VIRT_CAP_POWER_LIMIT)) + effective_mode |= S_IWUSR; + /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || ((adev->flags & AMD_IS_APU) && (gc_ver < IP_VERSION(9, 3, 0)))) && @@ -3606,10 +3738,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* not all products support both average and instantaneous */ if (attr == &sensor_dev_attr_power1_average.dev_attr.attr && - amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&tmp) == -EOPNOTSUPP) + amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, + (void *)&tmp) == -EOPNOTSUPP) return 0; if (attr == &sensor_dev_attr_power1_input.dev_attr.attr && - amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&tmp) == -EOPNOTSUPP) + amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&tmp) == -EOPNOTSUPP) return 0; /* hide max/min values if we can't both query and manage the fan */ @@ -3648,8 +3782,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* only few boards support vddboard */ if ((attr == &sensor_dev_attr_in2_input.dev_attr.attr || attr == &sensor_dev_attr_in2_label.dev_attr.attr) && - amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, - (void *)&tmp) == -EOPNOTSUPP) + amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, + (void *)&tmp) == -EOPNOTSUPP) return 0; /* no mclk on APUs other than gc 9,4,3*/ @@ -4531,6 +4665,7 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { enum amdgpu_sriov_vf_mode mode; uint32_t mask = 0; + uint32_t tmp; int ret; if (adev->pm.sysfs_initialized) @@ -4597,6 +4732,21 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) &amdgpu_board_attr_group); if (ret) goto err_out0; + if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, + (void *)&tmp) != -EOPNOTSUPP) { + sysfs_add_file_to_group(&adev->dev->kobj, + &dev_attr_cur_node_power_limit.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_node_power.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_global_ppt_resid.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, + &dev_attr_max_node_power_limit.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_npm_status.attr, + amdgpu_board_attr_group.name); + } } adev->pm.sysfs_initialized = true; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 9748744133d9..65c1d98af26c 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -263,10 +263,6 @@ struct amdgpu_dpm { u32 voltage_response_time; u32 backbias_response_time; void *priv; - u32 new_active_crtcs; - int new_active_crtc_count; - u32 current_active_crtcs; - int current_active_crtc_count; struct amdgpu_dpm_dynamic_state dyn_state; struct amdgpu_dpm_fan fan; u32 tdp_limit; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h index 5c2a89f0d5d5..cc6d7ba040e9 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h @@ -23,10 +23,6 @@ #ifndef __AMDGPU_DPM_INTERNAL_H__ #define __AMDGPU_DPM_INTERNAL_H__ -void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); - -u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); - -u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); +void amdgpu_dpm_get_display_cfg(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 307ebf7e3226..33eb85dd68e9 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -2299,7 +2299,7 @@ static void kv_apply_state_adjust_rules(struct amdgpu_device *adev, if (pi->sys_info.nb_dpm_enable) { force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) || - pi->video_start || (adev->pm.dpm.new_active_crtc_count >= 3) || + pi->video_start || (adev->pm.pm_display_cfg.num_display >= 3) || pi->disable_nb_ps3_in_battery; ps->dpm0_pg_nb_ps_lo = force_high ? 0x2 : 0x3; ps->dpm0_pg_nb_ps_hi = 0x2; @@ -2358,7 +2358,7 @@ static int kv_calculate_nbps_level_settings(struct amdgpu_device *adev) return 0; force_high = ((mclk >= pi->sys_info.nbp_memory_clock[3]) || - (adev->pm.dpm.new_active_crtc_count >= 3) || pi->video_start); + (adev->pm.pm_display_cfg.num_display >= 3) || pi->video_start); if (force_high) { for (i = pi->lowest_valid; i <= pi->highest_valid; i++) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index 52dbf6d0469d..c7ed0b457129 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -771,7 +771,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev, int i; struct amdgpu_ps *ps; u32 ui_class; - bool single_display = adev->pm.dpm.new_active_crtc_count < 2; + bool single_display = adev->pm.pm_display_cfg.num_display < 2; /* check if the vblank period is too short to adjust the mclk */ if (single_display && adev->powerplay.pp_funcs->vblank_too_short) { @@ -944,9 +944,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) amdgpu_dpm_post_set_power_state(adev); - adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; - adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; - if (pp_funcs->force_performance_level) { if (adev->pm.dpm.thermal_active) { enum amd_dpm_forced_level level = adev->pm.dpm.forced_level; @@ -967,7 +964,8 @@ void amdgpu_legacy_dpm_compute_clocks(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_dpm_get_active_displays(adev); + if (!adev->dc_enabled) + amdgpu_dpm_get_display_cfg(adev); amdgpu_dpm_change_power_state_locked(adev); } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 6595a611ce6e..cf9932e68055 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3081,7 +3081,7 @@ static int si_get_vce_clock_voltage(struct amdgpu_device *adev, static bool si_dpm_vblank_too_short(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); + u32 vblank_time = adev->pm.pm_display_cfg.min_vblank_time; /* we never hit the non-gddr5 limit so disable it */ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; @@ -3447,9 +3447,10 @@ static void rv770_get_engine_memory_ss(struct amdgpu_device *adev) static void si_apply_state_adjust_rules(struct amdgpu_device *adev, struct amdgpu_ps *rps) { + const struct amd_pp_display_configuration *display_cfg = + &adev->pm.pm_display_cfg; struct si_ps *ps = si_get_ps(rps); struct amdgpu_clock_and_voltage_limits *max_limits; - struct amdgpu_connector *conn; bool disable_mclk_switching = false; bool disable_sclk_switching = false; u32 mclk, sclk; @@ -3488,14 +3489,9 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, * For example, 4K 60Hz and 1080p 144Hz fall into this category. * Find number of such displays connected. */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (!(adev->pm.dpm.new_active_crtcs & (1 << i)) || - !adev->mode_info.crtcs[i]->enabled) - continue; - - conn = to_amdgpu_connector(adev->mode_info.crtcs[i]->connector); - - if (conn->pixelclock_for_modeset > 297000) + for (i = 0; i < display_cfg->num_display; i++) { + /* The array only contains active displays. */ + if (display_cfg->displays[i].pixel_clock > 297000) high_pixelclock_count++; } @@ -3523,7 +3519,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, rps->ecclk = 0; } - if ((adev->pm.dpm.new_active_crtc_count > 1) || + if ((adev->pm.pm_display_cfg.num_display > 1) || si_dpm_vblank_too_short(adev)) disable_mclk_switching = true; @@ -3671,7 +3667,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, ps->performance_levels[i].mclk, max_limits->vddc, &ps->performance_levels[i].vddc); btc_apply_voltage_dependency_rules(&adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk, - adev->clock.current_dispclk, + display_cfg->display_clk, max_limits->vddc, &ps->performance_levels[i].vddc); } @@ -4196,16 +4192,16 @@ static void si_program_ds_registers(struct amdgpu_device *adev) static void si_program_display_gap(struct amdgpu_device *adev) { + const struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg; u32 tmp, pipe; - int i; tmp = RREG32(mmCG_DISPLAY_GAP_CNTL) & ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK); - if (adev->pm.dpm.new_active_crtc_count > 0) + if (cfg->num_display > 0) tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; else tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; - if (adev->pm.dpm.new_active_crtc_count > 1) + if (cfg->num_display > 1) tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; else tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; @@ -4215,17 +4211,8 @@ static void si_program_display_gap(struct amdgpu_device *adev) tmp = RREG32(DCCG_DISP_SLOW_SELECT_REG); pipe = (tmp & DCCG_DISP1_SLOW_SELECT_MASK) >> DCCG_DISP1_SLOW_SELECT_SHIFT; - if ((adev->pm.dpm.new_active_crtc_count > 0) && - (!(adev->pm.dpm.new_active_crtcs & (1 << pipe)))) { - /* find the first active crtc */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->pm.dpm.new_active_crtcs & (1 << i)) - break; - } - if (i == adev->mode_info.num_crtc) - pipe = 0; - else - pipe = i; + if (cfg->num_display > 0 && pipe != cfg->crtc_index) { + pipe = cfg->crtc_index; tmp &= ~DCCG_DISP1_SLOW_SELECT_MASK; tmp |= DCCG_DISP1_SLOW_SELECT(pipe); @@ -4236,7 +4223,7 @@ static void si_program_display_gap(struct amdgpu_device *adev) * This can be a problem on PowerXpress systems or if you want to use the card * for offscreen rendering or compute if there are no crtcs enabled. */ - si_notify_smc_display_change(adev, adev->pm.dpm.new_active_crtc_count > 0); + si_notify_smc_display_change(adev, cfg->num_display > 0); } static void si_enable_spread_spectrum(struct amdgpu_device *adev, bool enable) @@ -5545,7 +5532,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev, (pl->mclk <= pi->mclk_stutter_mode_threshold) && !eg_pi->uvd_enabled && (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) && - (adev->pm.dpm.new_active_crtc_count <= 2)) { + (adev->pm.pm_display_cfg.num_display <= 2)) { level->mcFlags |= SISLANDS_SMC_MC_STUTTER_EN; } @@ -5694,7 +5681,7 @@ static bool si_is_state_ulv_compatible(struct amdgpu_device *adev, /* XXX validate against display requirements! */ for (i = 0; i < adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count; i++) { - if (adev->clock.current_dispclk <= + if (adev->pm.pm_display_cfg.display_clk <= adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[i].clk) { if (ulv->pl.vddc < adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[i].v) @@ -5848,30 +5835,22 @@ static int si_upload_ulv_state(struct amdgpu_device *adev) static int si_upload_smc_data(struct amdgpu_device *adev) { - struct amdgpu_crtc *amdgpu_crtc = NULL; - int i; + const struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg; u32 crtc_index = 0; u32 mclk_change_block_cp_min = 0; u32 mclk_change_block_cp_max = 0; - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->pm.dpm.new_active_crtcs & (1 << i)) { - amdgpu_crtc = adev->mode_info.crtcs[i]; - break; - } - } - /* When a display is plugged in, program these so that the SMC * performs MCLK switching when it doesn't cause flickering. * When no display is plugged in, there is no need to restrict * MCLK switching, so program them to zero. */ - if (adev->pm.dpm.new_active_crtc_count && amdgpu_crtc) { - crtc_index = amdgpu_crtc->crtc_id; + if (cfg->num_display) { + crtc_index = cfg->crtc_index; - if (amdgpu_crtc->line_time) { - mclk_change_block_cp_min = 200 / amdgpu_crtc->line_time; - mclk_change_block_cp_max = 100 / amdgpu_crtc->line_time; + if (cfg->line_time_in_us) { + mclk_change_block_cp_min = 200 / cfg->line_time_in_us; + mclk_change_block_cp_max = 100 / cfg->line_time_in_us; } } diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index b48a031cbba0..554492dfa3c0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1554,16 +1554,7 @@ static void pp_pm_compute_clocks(void *handle) struct amdgpu_device *adev = hwmgr->adev; if (!adev->dc_enabled) { - amdgpu_dpm_get_active_displays(adev); - adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; - adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); - adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); - /* we have issues with mclk switching with - * refresh rates over 120 hz on the non-DC code. - */ - if (adev->pm.pm_display_cfg.vrefresh > 120) - adev->pm.pm_display_cfg.min_vblank_time = 0; - + amdgpu_dpm_get_display_cfg(adev); pp_display_configuration_change(handle, &adev->pm.pm_display_cfg); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c5965924e7c6..fb8086859857 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -766,7 +766,6 @@ static int smu_set_funcs(struct amdgpu_device *adev) case IP_VERSION(13, 0, 14): case IP_VERSION(13, 0, 12): smu_v13_0_6_set_ppt_funcs(smu); - smu_v13_0_6_set_temp_funcs(smu); /* Enable pp_od_clk_voltage node */ smu->od_enabled = true; break; @@ -1316,6 +1315,33 @@ static void smu_init_power_profile(struct smu_context *smu) smu_power_profile_mode_get(smu, smu->power_profile_mode); } +void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id) +{ + struct smu_feature_cap *fea_cap = &smu->fea_cap; + + if (fea_id >= SMU_FEATURE_CAP_ID__COUNT) + return; + + set_bit(fea_id, fea_cap->cap_map); +} + +bool smu_feature_cap_test(struct smu_context *smu, enum smu_feature_cap_id fea_id) +{ + struct smu_feature_cap *fea_cap = &smu->fea_cap; + + if (fea_id >= SMU_FEATURE_CAP_ID__COUNT) + return false; + + return test_bit(fea_id, fea_cap->cap_map); +} + +static void smu_feature_cap_init(struct smu_context *smu) +{ + struct smu_feature_cap *fea_cap = &smu->fea_cap; + + bitmap_zero(fea_cap->cap_map, SMU_FEATURE_CAP_ID__COUNT); +} + static int smu_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1348,6 +1374,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) INIT_DELAYED_WORK(&smu->swctf_delayed_work, smu_swctf_delayed_work_handler); + smu_feature_cap_init(smu); + ret = smu_smc_table_sw_init(smu); if (ret) { dev_err(adev->dev, "Failed to sw init smc table!\n"); @@ -1897,7 +1925,6 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->vcn.num_vcn_inst; i++) smu_dpm_set_vcn_enable(smu, true, i); smu_dpm_set_jpeg_enable(smu, true); - smu_dpm_set_vpe_enable(smu, true); smu_dpm_set_umsch_mm_enable(smu, true); smu_set_mall_enable(smu); smu_set_gfx_cgpg(smu, true); @@ -2105,7 +2132,6 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block) } smu_dpm_set_jpeg_enable(smu, false); adev->jpeg.cur_state = AMD_PG_STATE_GATE; - smu_dpm_set_vpe_enable(smu, false); smu_dpm_set_umsch_mm_enable(smu, false); if (!smu->pm_enabled) @@ -2237,7 +2263,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) return ret; } - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) { ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); if (ret) return ret; @@ -3508,15 +3534,10 @@ bool smu_mode1_reset_is_support(struct smu_context *smu) bool smu_link_reset_is_support(struct smu_context *smu) { - bool ret = false; - if (!smu->pm_enabled) return false; - if (smu->ppt_funcs && smu->ppt_funcs->link_reset_is_support) - ret = smu->ppt_funcs->link_reset_is_support(smu); - - return ret; + return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__LINK_RESET); } int smu_mode1_reset(struct smu_context *smu) @@ -4106,12 +4127,7 @@ int smu_send_rma_reason(struct smu_context *smu) */ bool smu_reset_sdma_is_supported(struct smu_context *smu) { - bool ret = false; - - if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported) - ret = smu->ppt_funcs->reset_sdma_is_supported(smu); - - return ret; + return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__SDMA_RESET); } int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) @@ -4126,12 +4142,7 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) bool smu_reset_vcn_is_supported(struct smu_context *smu) { - bool ret = false; - - if (smu->ppt_funcs && smu->ppt_funcs->reset_vcn_is_supported) - ret = smu->ppt_funcs->reset_vcn_is_supported(smu); - - return ret; + return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__VCN_RESET); } int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 5976eda80035..582c186d8b62 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -528,6 +528,17 @@ enum smu_fw_status { */ #define SMU_WBRF_EVENT_HANDLING_PACE 10 +enum smu_feature_cap_id { + SMU_FEATURE_CAP_ID__LINK_RESET = 0, + SMU_FEATURE_CAP_ID__SDMA_RESET, + SMU_FEATURE_CAP_ID__VCN_RESET, + SMU_FEATURE_CAP_ID__COUNT, +}; + +struct smu_feature_cap { + DECLARE_BITMAP(cap_map, SMU_FEATURE_CAP_ID__COUNT); +}; + struct smu_context { struct amdgpu_device *adev; struct amdgpu_irq_src irq_source; @@ -550,6 +561,7 @@ struct smu_context { struct amd_pp_display_configuration *display_config; struct smu_baco_context smu_baco; struct smu_temperature_range thermal_range; + struct smu_feature_cap fea_cap; void *od_settings; struct smu_umd_pstate_table pstate_table; @@ -1273,11 +1285,6 @@ struct pptable_funcs { bool (*mode1_reset_is_support)(struct smu_context *smu); /** - * @link_reset_is_support: Check if GPU supports link reset. - */ - bool (*link_reset_is_support)(struct smu_context *smu); - - /** * @mode1_reset: Perform mode1 reset. * * Complete GPU reset. @@ -1427,19 +1434,11 @@ struct pptable_funcs { * @reset_sdma: message SMU to soft reset sdma instance. */ int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask); - /** - * @reset_sdma_is_supported: Check if support resets the SDMA engine. - */ - bool (*reset_sdma_is_supported)(struct smu_context *smu); /** * @reset_vcn: message SMU to soft reset vcn instance. */ int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask); - /** - * @reset_vcn_is_supported: Check if support resets vcn. - */ - bool (*reset_vcn_is_supported)(struct smu_context *smu); /** * @get_ecc_table: message SMU to get ECC INFO table. @@ -1788,4 +1787,7 @@ ssize_t smu_get_pm_policy_info(struct smu_context *smu, enum pp_pm_policy p_type, char *sysbuf); #endif + +void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id); +bool smu_feature_cap_test(struct smu_context *smu, enum smu_feature_cap_id fea_id); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h index 1c407a8e96ee..bf6aa9620911 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h @@ -191,7 +191,7 @@ typedef enum { #define SMU_METRICS_TABLE_VERSION 0x14 -#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x0 +#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x1 typedef struct __attribute__((packed, aligned(4))) { uint64_t AccumulationCounter; @@ -304,7 +304,12 @@ typedef struct { int16_t SystemTemperatures[SYSTEM_TEMP_MAX_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF int16_t NodeTemperatures[NODE_TEMP_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF int16_t VrTemperatures[SVI_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius - int16_t spare[3]; + int16_t spare[7]; + + //NPM: NODE POWER MANAGEMENT + uint32_t NodePowerLimit; + uint32_t NodePower; + uint32_t GlobalPPTResidencyAcc; } SystemMetricsTable_t; #pragma pack(pop) @@ -359,6 +364,9 @@ typedef struct { // General info uint32_t pldmVersion[2]; + + //Node Power Limit + uint32_t MaxNodePowerLimit; } StaticMetricsTable_t; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h index aff2776a8b6f..4b066c42e0ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h @@ -120,7 +120,8 @@ #define PPSMC_MSG_GetBadPageSeverity 0x5B #define PPSMC_MSG_GetSystemMetricsTable 0x5C #define PPSMC_MSG_GetSystemMetricsVersion 0x5D -#define PPSMC_Message_Count 0x5E +#define PPSMC_MSG_ResetVCN 0x5E +#define PPSMC_Message_Count 0x5F //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h index 251ed011b3b0..251ed011b3b0 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 599eddb5a67d..4fff78da81ff 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1745,10 +1745,10 @@ static int arcturus_i2c_control_init(struct smu_context *smu) snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -1756,27 +1756,12 @@ static int arcturus_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void arcturus_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index aac202d0c30e..0028f10ead42 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -3145,10 +3145,10 @@ static int navi10_i2c_control_init(struct smu_context *smu) control->quirks = &navi10_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -3156,27 +3156,12 @@ static int navi10_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void navi10_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d57591509aed..31c2c0386b1f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2648,10 +2648,10 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu) control->quirks = &sienna_cichlid_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } /* assign the buses used for the FRU EEPROM and RAS EEPROM */ @@ -2660,27 +2660,12 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void sienna_cichlid_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index b067147b7c41..18d5d0704509 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1641,33 +1641,22 @@ static int aldebaran_i2c_control_init(struct smu_context *smu) control->quirks = &aldebaran_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - i2c_del_adapter(control); - - return res; } static void aldebaran_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 1a1f2a6b2e52..a89075e25717 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -288,7 +288,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) * Considering above, we just leave user a verbal message instead * of halt driver loading. */ - if (if_version != smu->smc_driver_if_version) { + if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && + if_version != smu->smc_driver_if_version) { dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", smu->smc_driver_if_version, if_version, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e084ed99ec0e..c1062e5f0393 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2825,10 +2825,10 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) control->quirks = &smu_v13_0_0_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -2838,27 +2838,12 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 0bec12b348ce..cb3fea9e8cf3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -136,8 +136,9 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), + MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), - MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 0), + MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 1), }; int smu_v13_0_12_tables_init(struct smu_context *smu) @@ -341,6 +342,9 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) static_metrics->pldmVersion[0] != 0xFFFFFFFF) smu->adev->firmware.pldm_version = static_metrics->pldmVersion[0]; + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS))) + pptable->MaxNodePowerLimit = + SMUQ10_ROUND(static_metrics->MaxNodePowerLimit); smu_v13_0_12_init_xgmi_data(smu, static_metrics); pptable->Init = true; } @@ -580,6 +584,50 @@ static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu, return false; } +int smu_v13_0_12_get_npm_data(struct smu_context *smu, + enum amd_pp_sensors sensor, + uint32_t *value) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct PPTable_t *pptable = + (struct PPTable_t *)smu_table->driver_pptable; + struct smu_table *tables = smu_table->tables; + SystemMetricsTable_t *metrics; + struct smu_table *sys_table; + int ret; + + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS))) + return -EOPNOTSUPP; + + if (sensor == AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT) { + *value = pptable->MaxNodePowerLimit; + return 0; + } + + ret = smu_v13_0_12_get_system_metrics_table(smu); + if (ret) + return ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + metrics = (SystemMetricsTable_t *)sys_table->cache.buffer; + + switch (sensor) { + case AMDGPU_PP_SENSOR_NODEPOWERLIMIT: + *value = SMUQ10_ROUND(metrics->NodePowerLimit); + break; + case AMDGPU_PP_SENSOR_NODEPOWER: + *value = SMUQ10_ROUND(metrics->NodePower); + break; + case AMDGPU_PP_SENSOR_GPPTRESIDENCY: + *value = SMUQ10_ROUND(metrics->GlobalPPTResidencyAcc); + break; + default: + return -EINVAL; + } + + return ret; +} + static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu, enum smu_temp_metric_type type, void *table) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index ebee659f8a1c..285cf7979693 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -143,7 +143,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), - MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), + MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), @@ -353,8 +353,15 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } + if (fw_ver > 0x04560900) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); + if (fw_ver >= 0x04560700) { - if (!amdgpu_sriov_vf(smu->adev)) + if (fw_ver >= 0x04560900) { + smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); + if (smu->adev->gmc.xgmi.physical_node_id == 0) + smu_v13_0_6_cap_set(smu, SMU_CAP(NPM_METRICS)); + } else if (!amdgpu_sriov_vf(smu->adev)) smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); } else { smu_v13_0_12_tables_fini(smu); @@ -413,6 +420,10 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); if (amdgpu_sriov_vf(adev)) { + if (fw_ver >= 0x00558200) + amdgpu_virt_attr_set(&adev->virt.virt_caps, + AMDGPU_VIRT_CAP_POWER_LIMIT, + AMDGPU_CAP_ATTR_RW); if ((pgm == 0 && fw_ver >= 0x00558000) || (pgm == 7 && fw_ver >= 0x7551000)) { smu_v13_0_6_cap_set(smu, @@ -439,8 +450,7 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) ((pgm == 4) && (fw_ver >= 0x4557000))) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); - if (((pgm == 0) && (fw_ver >= 0x00558200)) || - ((pgm == 4) && (fw_ver >= 0x04557100))) + if ((pgm == 0) && (fw_ver >= 0x00558200)) smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); } @@ -1795,6 +1805,15 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu, ret = -EOPNOTSUPP; break; } + case AMDGPU_PP_SENSOR_NODEPOWERLIMIT: + case AMDGPU_PP_SENSOR_NODEPOWER: + case AMDGPU_PP_SENSOR_GPPTRESIDENCY: + case AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT: + ret = smu_v13_0_12_get_npm_data(smu, sensor, (uint32_t *)data); + if (ret) + return ret; + *size = 4; + break; case AMDGPU_PP_SENSOR_GPU_AVG_POWER: default: ret = -EOPNOTSUPP; @@ -2490,10 +2509,10 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) control->quirks = &smu_v13_0_6_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -2501,27 +2520,12 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } @@ -3223,6 +3227,20 @@ static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) } +static int smu_v13_0_6_post_init(struct smu_context *smu) +{ + if (smu_v13_0_6_is_link_reset_supported(smu)) + smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__LINK_RESET); + + if (smu_v13_0_6_reset_sdma_is_supported(smu)) + smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__SDMA_RESET); + + if (smu_v13_0_6_reset_vcn_is_supported(smu)) + smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__VCN_RESET); + + return 0; +} + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3839,6 +3857,12 @@ static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { .parse_error_code = aca_smu_parse_error_code, }; +static void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) +{ + smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) + == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; +} + static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { /* init dpm */ .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, @@ -3886,7 +3910,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .get_xcp_metrics = smu_v13_0_6_get_xcp_metrics, .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, - .link_reset_is_support = smu_v13_0_6_is_link_reset_supported, .mode1_reset = smu_v13_0_6_mode1_reset, .mode2_reset = smu_v13_0_6_mode2_reset, .link_reset = smu_v13_0_6_link_reset, @@ -3896,9 +3919,8 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, .send_rma_reason = smu_v13_0_6_send_rma_reason, .reset_sdma = smu_v13_0_6_reset_sdma, - .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, .dpm_reset_vcn = smu_v13_0_6_reset_vcn, - .reset_vcn_is_supported = smu_v13_0_6_reset_vcn_is_supported, + .post_init = smu_v13_0_6_post_init, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) @@ -3910,15 +3932,11 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map; smu->table_map = smu_v13_0_6_table_map; - smu->smc_driver_if_version = SMU13_0_6_DRIVER_IF_VERSION; + smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION; smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI; smu_v13_0_set_smu_mailbox_registers(smu); + smu_v13_0_6_set_temp_funcs(smu); amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); } -void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) -{ - smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) - == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; -} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index aae9a546a67e..7ef5f3e66c27 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -49,6 +49,7 @@ struct PPTable_t { uint32_t MaxLclkDpmRange; uint32_t MinLclkDpmRange; uint64_t PublicSerialNumber_AID; + uint32_t MaxNodePowerLimit; bool Init; }; @@ -70,11 +71,11 @@ enum smu_v13_0_6_caps { SMU_CAP(BOARD_VOLTAGE), SMU_CAP(PLDM_VERSION), SMU_CAP(TEMP_METRICS), + SMU_CAP(NPM_METRICS), SMU_CAP(ALL), }; extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); -extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu); bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap); int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu); int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, @@ -92,6 +93,9 @@ ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, void *smu_metrics); int smu_v13_0_12_tables_init(struct smu_context *smu); void smu_v13_0_12_tables_fini(struct smu_context *smu); +int smu_v13_0_12_get_npm_data(struct smu_context *smu, + enum amd_pp_sensors sensor, + uint32_t *value); extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[]; extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index f32474af90b3..086501cc5213 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2087,10 +2087,10 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu) control->quirks = &smu_v14_0_2_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -2100,27 +2100,12 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void smu_v14_0_2_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index d588f74b98de..0ae91c8b6d72 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -40,6 +40,8 @@ #define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 #define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 +#define SMU_IGNORE_IF_VERSION 0xFFFFFFFF + #define smu_cmn_init_soft_gpu_metrics(ptr, frev, crev) \ do { \ typecheck(struct gpu_metrics_v##frev##_##crev *, (ptr)); \ diff --git a/drivers/gpu/drm/amd/ras/rascore/Makefile b/drivers/gpu/drm/amd/ras/rascore/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h index 0d878ca3acba..144fbe4ceb9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2025 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,12 +22,16 @@ * */ -#ifndef __DCE_V11_0_H__ -#define __DCE_V11_0_H__ - -extern const struct amdgpu_ip_block_version dce_v11_0_ip_block; -extern const struct amdgpu_ip_block_version dce_v11_2_ip_block; - -void dce_v11_0_disable_dce(struct amdgpu_device *adev); +#ifndef __RAS_CORE_STATUS_H__ +#define __RAS_CORE_STATUS_H__ +#define RAS_CORE_OK 0 +#define RAS_CORE_NOT_SUPPORTED 248 +#define RAS_CORE_FAIL_ERROR_QUERY 249 +#define RAS_CORE_FAIL_ERROR_INJECTION 250 +#define RAS_CORE_FAIL_FATAL_RECOVERY 251 +#define RAS_CORE_FAIL_POISON_CONSUMPTION 252 +#define RAS_CORE_FAIL_POISON_CREATION 253 +#define RAS_CORE_FAIL_NO_VALID_BANKS 254 +#define RAS_CORE_GPU_IN_MODE1_RESET 255 #endif diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 19c04687b0fe..8e650a02c528 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -134,7 +134,7 @@ static int ast_astdp_read_edid_block(void *data, u8 *buf, unsigned int block, si * 3. The Delays are often longer a lot when system resume from S3/S4. */ if (j) - mdelay(j + 1); + msleep(j + 1); /* Wait for EDID offset to show up in mirror register */ vgacrd7 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xd7); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 609cdb9d371e..6f3fdcb6afdb 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2678,7 +2678,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) ret = devm_request_threaded_irq(dev, platform->pdata.intp_irq, NULL, anx7625_intr_hpd_isr, IRQF_TRIGGER_FALLING | - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "anx7625-intp", platform); if (ret) { DRM_DEV_ERROR(dev, "fail to request irq\n"); @@ -2747,8 +2747,10 @@ static int anx7625_i2c_probe(struct i2c_client *client) } /* Add work function */ - if (platform->pdata.intp_irq) + if (platform->pdata.intp_irq) { + enable_irq(platform->pdata.intp_irq); queue_work(platform->workqueue, &platform->work); + } if (platform->pdata.audio_en) anx7625_register_audio(dev, platform); diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index a614d1384f71..38726ae1bf15 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -1984,8 +1984,10 @@ static void cdns_mhdp_atomic_enable(struct drm_bridge *bridge, mhdp_state = to_cdns_mhdp_bridge_state(new_state); mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode); - if (!mhdp_state->current_mode) - return; + if (!mhdp_state->current_mode) { + ret = -EINVAL; + goto out; + } drm_mode_set_name(mhdp_state->current_mode); diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index b5dd71f6a990..eabc4c32f6ab 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -31,11 +31,10 @@ /* returns true iff both arguments logically differs */ #define NEQV(a, b) (!(a) ^ !(b)) -/* DSIM_STATUS */ +/* DSIM_STATUS or DSIM_DPHY_STATUS */ #define DSIM_STOP_STATE_DAT(x) (((x) & 0xf) << 0) #define DSIM_STOP_STATE_CLK BIT(8) #define DSIM_TX_READY_HS_CLK BIT(10) -#define DSIM_PLL_STABLE BIT(31) /* DSIM_SWRST */ #define DSIM_FUNCRST BIT(16) @@ -46,17 +45,13 @@ #define DSIM_BTA_TIMEOUT(x) ((x) << 16) /* DSIM_CLKCTRL */ -#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) -#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) -#define DSIM_LANE_ESC_CLK_EN_CLK BIT(19) -#define DSIM_LANE_ESC_CLK_EN_DATA(x) (((x) & 0xf) << 20) -#define DSIM_LANE_ESC_CLK_EN_DATA_MASK (0xf << 20) -#define DSIM_BYTE_CLKEN BIT(24) -#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) -#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) -#define DSIM_PLL_BYPASS BIT(27) -#define DSIM_ESC_CLKEN BIT(28) -#define DSIM_TX_REQUEST_HSCLK BIT(31) +#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) +#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) +#define DSIM_LANE_ESC_CLK_EN_DATA(x, offset) (((x) & 0xf) << offset) +#define DSIM_LANE_ESC_CLK_EN_DATA_MASK(offset) (0xf << offset) +#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) +#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) +#define DSIM_PLL_BYPASS BIT(27) /* DSIM_CONFIG */ #define DSIM_LANE_EN_CLK BIT(0) @@ -91,7 +86,6 @@ */ #define DSIM_HSE_DISABLE_MODE BIT(23) #define DSIM_AUTO_MODE BIT(24) -#define DSIM_VIDEO_MODE BIT(25) #define DSIM_BURST_MODE BIT(26) #define DSIM_SYNC_INFORM BIT(27) #define DSIM_EOT_DISABLE BIT(28) @@ -129,9 +123,9 @@ #define DSIM_MAIN_HBP_MASK ((0xffff) << 0) /* DSIM_MSYNC */ -#define DSIM_MAIN_VSA(x) ((x) << 22) +#define DSIM_MAIN_VSA(x, offset) ((x) << offset) #define DSIM_MAIN_HSA(x) ((x) << 0) -#define DSIM_MAIN_VSA_MASK ((0x3ff) << 22) +#define DSIM_MAIN_VSA_MASK(offset) ((0x3ff) << offset) #define DSIM_MAIN_HSA_MASK ((0xffff) << 0) /* DSIM_SDRESOL */ @@ -157,6 +151,11 @@ #define DSIM_INT_RX_ECC_ERR BIT(15) #define DSIM_INT_RX_CRC_ERR BIT(14) +/* DSIM_SFRCTRL */ +#define DSIM_SFR_CTRL_STAND_BY BIT(4) +#define DSIM_SFR_CTRL_SHADOW_UPDATE BIT(1) +#define DSIM_SFR_CTRL_SHADOW_EN BIT(0) + /* DSIM_FIFOCTRL */ #define DSIM_RX_DATA_FULL BIT(25) #define DSIM_RX_DATA_EMPTY BIT(24) @@ -191,9 +190,7 @@ #define DSIM_PLL_DPDNSWAP_DAT (1 << 24) #define DSIM_FREQ_BAND(x) ((x) << 24) #define DSIM_PLL_EN BIT(23) -#define DSIM_PLL_P(x, offset) ((x) << (offset)) -#define DSIM_PLL_M(x) ((x) << 4) -#define DSIM_PLL_S(x) ((x) << 1) +#define DSIM_PLL(x, offset) ((x) << (offset)) /* DSIM_PHYCTRL */ #define DSIM_PHYCTRL_ULPS_EXIT(x) (((x) & 0x1ff) << 0) @@ -222,25 +219,42 @@ #define DSI_XFER_TIMEOUT_MS 100 #define DSI_RX_FIFO_EMPTY 0x30800002 -#define OLD_SCLK_MIPI_CLK_NAME "pll_clk" - #define PS_TO_CYCLE(ps, hz) DIV64_U64_ROUND_CLOSEST(((ps) * (hz)), 1000000000000ULL) -static const char *const clk_names[5] = { - "bus_clk", - "sclk_mipi", - "phyclk_mipidphy0_bitclkdiv8", - "phyclk_mipidphy0_rxclkesc0", - "sclk_rgb_vclk_to_dsim0" -}; - enum samsung_dsim_transfer_type { EXYNOS_DSI_TX, EXYNOS_DSI_RX, }; +static struct clk_bulk_data exynos3_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "pll_clk" }, +}; + +static struct clk_bulk_data exynos4_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, +}; + +static struct clk_bulk_data exynos5433_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, + { .id = "phyclk_mipidphy0_bitclkdiv8" }, + { .id = "phyclk_mipidphy0_rxclkesc0" }, + { .id = "sclk_rgb_vclk_to_dsim0" }, +}; + +static struct clk_bulk_data exynos7870_clk_bulk_data[] = { + { .id = "bus" }, + { .id = "pll" }, + { .id = "byte" }, + { .id = "esc" }, +}; + enum reg_idx { - DSIM_STATUS_REG, /* Status register */ + DSIM_STATUS_REG, /* Status register (legacy) */ + DSIM_LINK_STATUS_REG, /* Link status register */ + DSIM_DPHY_STATUS_REG, /* D-PHY status register */ DSIM_SWRST_REG, /* Software reset register */ DSIM_CLKCTRL_REG, /* Clock control register */ DSIM_TIMEOUT_REG, /* Time out register */ @@ -255,6 +269,7 @@ enum reg_idx { DSIM_PKTHDR_REG, /* Packet Header FIFO register */ DSIM_PAYLOAD_REG, /* Payload FIFO register */ DSIM_RXFIFO_REG, /* Read FIFO register */ + DSIM_SFRCTRL_REG, /* SFR standby and shadow control register */ DSIM_FIFOCTRL_REG, /* FIFO status and control register */ DSIM_PLLCTRL_REG, /* PLL control register */ DSIM_PHYCTRL_REG, @@ -312,6 +327,32 @@ static const unsigned int exynos5433_reg_ofs[] = { [DSIM_PHYTIMING2_REG] = 0xBC, }; +static const unsigned int exynos7870_reg_ofs[] = { + [DSIM_LINK_STATUS_REG] = 0x04, + [DSIM_DPHY_STATUS_REG] = 0x08, + [DSIM_SWRST_REG] = 0x0C, + [DSIM_CLKCTRL_REG] = 0x10, + [DSIM_TIMEOUT_REG] = 0x14, + [DSIM_ESCMODE_REG] = 0x1C, + [DSIM_MDRESOL_REG] = 0x20, + [DSIM_MVPORCH_REG] = 0x24, + [DSIM_MHPORCH_REG] = 0x28, + [DSIM_MSYNC_REG] = 0x2C, + [DSIM_CONFIG_REG] = 0x30, + [DSIM_INTSRC_REG] = 0x34, + [DSIM_INTMSK_REG] = 0x38, + [DSIM_PKTHDR_REG] = 0x3C, + [DSIM_PAYLOAD_REG] = 0x40, + [DSIM_RXFIFO_REG] = 0x44, + [DSIM_SFRCTRL_REG] = 0x48, + [DSIM_FIFOCTRL_REG] = 0x4C, + [DSIM_PLLCTRL_REG] = 0x94, + [DSIM_PHYCTRL_REG] = 0xA4, + [DSIM_PHYTIMING_REG] = 0xB4, + [DSIM_PHYTIMING1_REG] = 0xB8, + [DSIM_PHYTIMING2_REG] = 0xBC, +}; + enum reg_value_idx { RESET_TYPE, PLL_TIMER, @@ -384,6 +425,24 @@ static const unsigned int exynos5433_reg_values[] = { [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), }; +static const unsigned int exynos7870_reg_values[] = { + [RESET_TYPE] = DSIM_SWRST, + [PLL_TIMER] = 80000, + [STOP_STATE_CNT] = 0xa, + [PHYCTRL_ULPS_EXIT] = DSIM_PHYCTRL_ULPS_EXIT(0x177), + [PHYCTRL_VREG_LP] = 0, + [PHYCTRL_SLEW_UP] = 0, + [PHYTIMING_LPX] = DSIM_PHYTIMING_LPX(0x07), + [PHYTIMING_HS_EXIT] = DSIM_PHYTIMING_HS_EXIT(0x0c), + [PHYTIMING_CLK_PREPARE] = DSIM_PHYTIMING1_CLK_PREPARE(0x08), + [PHYTIMING_CLK_ZERO] = DSIM_PHYTIMING1_CLK_ZERO(0x2b), + [PHYTIMING_CLK_POST] = DSIM_PHYTIMING1_CLK_POST(0x0d), + [PHYTIMING_CLK_TRAIL] = DSIM_PHYTIMING1_CLK_TRAIL(0x09), + [PHYTIMING_HS_PREPARE] = DSIM_PHYTIMING2_HS_PREPARE(0x09), + [PHYTIMING_HS_ZERO] = DSIM_PHYTIMING2_HS_ZERO(0x0f), + [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), +}; + static const unsigned int imx8mm_dsim_reg_values[] = { [RESET_TYPE] = DSIM_SWRST, [PLL_TIMER] = 500, @@ -405,13 +464,26 @@ static const unsigned int imx8mm_dsim_reg_values[] = { static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -424,13 +496,26 @@ static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -443,11 +528,24 @@ static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x58, - .num_clks = 2, + .has_legacy_status_reg = 1, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -459,12 +557,25 @@ static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 5, + .clk_data = exynos5433_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos5433_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5433_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -476,12 +587,25 @@ static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5422_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -490,19 +614,62 @@ static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .min_freq = 500, }; +static const struct samsung_dsim_driver_data exynos7870_dsi_driver_data = { + .reg_ofs = exynos7870_reg_ofs, + .plltmr_reg = 0xa0, + .has_clklane_stop = 1, + .has_sfrctrl = 1, + .clk_data = exynos7870_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos7870_clk_bulk_data), + .max_freq = 1500, + .wait_for_hdr_fifo = 0, + .wait_for_reset = 1, + .num_bits_resol = 12, + .video_mode_bit = 18, + .pll_stable_bit = 24, + .esc_clken_bit = 16, + .byte_clken_bit = 17, + .tx_req_hsclk_bit = 20, + .lane_esc_clk_bit = 8, + .lane_esc_data_offset = 9, + .pll_p_offset = 13, + .pll_m_offset = 3, + .pll_s_offset = 0, + .main_vsa_offset = 16, + .reg_values = exynos7870_reg_values, + .pll_fin_min = 6, + .pll_fin_max = 12, + .m_min = 41, + .m_max = 125, + .min_freq = 500, +}; + static const struct samsung_dsim_driver_data imx8mm_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 2100, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, /* * Unlike Exynos, PLL_P(PMS_P) offset 14 is used in i.MX8M Mini/Nano/Plus * downstream driver - drivers/gpu/drm/bridge/sec-dsim.c */ .pll_p_offset = 14, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = imx8mm_dsim_reg_values, .pll_fin_min = 2, .pll_fin_max = 30, @@ -518,6 +685,7 @@ samsung_dsim_types[DSIM_TYPE_COUNT] = { [DSIM_TYPE_EXYNOS5410] = &exynos5_dsi_driver_data, [DSIM_TYPE_EXYNOS5422] = &exynos5422_dsi_driver_data, [DSIM_TYPE_EXYNOS5433] = &exynos5433_dsi_driver_data, + [DSIM_TYPE_EXYNOS7870] = &exynos7870_dsi_driver_data, [DSIM_TYPE_IMX8MM] = &imx8mm_dsi_driver_data, [DSIM_TYPE_IMX8MP] = &imx8mm_dsi_driver_data, }; @@ -653,8 +821,9 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, writel(driver_data->reg_values[PLL_TIMER], dsi->reg_base + driver_data->plltmr_reg); - reg = DSIM_PLL_EN | DSIM_PLL_P(p, driver_data->pll_p_offset) | - DSIM_PLL_M(m) | DSIM_PLL_S(s); + reg = DSIM_PLL_EN | DSIM_PLL(p, driver_data->pll_p_offset) + | DSIM_PLL(m, driver_data->pll_m_offset) + | DSIM_PLL(s, driver_data->pll_s_offset); if (driver_data->has_freqband) { static const unsigned long freq_bands[] = { @@ -682,14 +851,17 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, samsung_dsim_write(dsi, DSIM_PLLCTRL_REG, reg); - timeout = 1000; + timeout = 3000; do { if (timeout-- == 0) { dev_err(dsi->dev, "PLL failed to stabilize\n"); return 0; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); - } while ((reg & DSIM_PLL_STABLE) == 0); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_LINK_STATUS_REG); + } while ((reg & BIT(driver_data->pll_stable_bit)) == 0); dsi->hs_clock = fout; @@ -698,6 +870,7 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; unsigned long hs_clk, byte_clk, esc_clk, pix_clk; unsigned long esc_div; u32 reg; @@ -731,15 +904,17 @@ static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) hs_clk, byte_clk, esc_clk); reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_ESC_PRESCALER_MASK | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA_MASK | DSIM_PLL_BYPASS - | DSIM_BYTE_CLK_SRC_MASK); - reg |= DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN - | DSIM_ESC_PRESCALER(esc_div) - | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1) - | DSIM_BYTE_CLK_SRC(0) - | DSIM_TX_REQUEST_HSCLK; + reg &= ~(DSIM_ESC_PRESCALER_MASK | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | DSIM_PLL_BYPASS + | DSIM_BYTE_CLK_SRC_MASK); + reg |= BIT(driver_data->esc_clken_bit) | BIT(driver_data->byte_clken_bit) + | DSIM_ESC_PRESCALER(esc_div) + | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1, + driver_data->lane_esc_data_offset) + | DSIM_BYTE_CLK_SRC(0) + | BIT(driver_data->tx_req_hsclk_bit); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); return 0; @@ -843,11 +1018,14 @@ static void samsung_dsim_set_phy_ctrl(struct samsung_dsim *dsi) static void samsung_dsim_disable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_LANE_ESC_CLK_EN_CLK | DSIM_LANE_ESC_CLK_EN_DATA_MASK - | DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN); + reg &= ~(BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | BIT(driver_data->esc_clken_bit) + | BIT(driver_data->byte_clken_bit)); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); reg = samsung_dsim_read(dsi, DSIM_PLLCTRL_REG); @@ -891,7 +1069,7 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) * mode, otherwise it will support command mode. */ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { - reg |= DSIM_VIDEO_MODE; + reg |= BIT(driver_data->video_mode_bit); /* * The user manual describes that following bits are ignored in @@ -962,7 +1140,10 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) return -EFAULT; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_DPHY_STATUS_REG); if ((reg & DSIM_STOP_STATE_DAT(lanes_mask)) != DSIM_STOP_STATE_DAT(lanes_mask)) continue; @@ -983,6 +1164,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) { struct drm_display_mode *m = &dsi->mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; + unsigned int main_vsa_offset = dsi->driver_data->main_vsa_offset; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { @@ -1009,7 +1191,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) reg = DSIM_MAIN_HFP(hfp) | DSIM_MAIN_HBP(hbp); samsung_dsim_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start, main_vsa_offset) | DSIM_MAIN_HSA(hsa); samsung_dsim_write(dsi, DSIM_MSYNC_REG, reg); } @@ -1023,6 +1205,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enable) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_MDRESOL_REG); @@ -1031,6 +1214,15 @@ static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enabl else reg &= ~DSIM_MAIN_STAND_BY; samsung_dsim_write(dsi, DSIM_MDRESOL_REG, reg); + + if (driver_data->has_sfrctrl) { + reg = samsung_dsim_read(dsi, DSIM_SFRCTRL_REG); + if (enable) + reg |= DSIM_SFR_CTRL_STAND_BY; + else + reg &= ~DSIM_SFR_CTRL_STAND_BY; + samsung_dsim_write(dsi, DSIM_SFRCTRL_REG, reg); + } } static int samsung_dsim_wait_for_hdr_fifo(struct samsung_dsim *dsi) @@ -1087,6 +1279,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, { struct device *dev = dsi->dev; struct mipi_dsi_packet *pkt = &xfer->packet; + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; const u8 *payload = pkt->payload + xfer->tx_done; u16 length = pkt->payload_length - xfer->tx_done; bool first = !xfer->tx_done; @@ -1127,9 +1320,11 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, return; reg = get_unaligned_le32(pkt->header); - if (samsung_dsim_wait_for_hdr_fifo(dsi)) { - dev_err(dev, "waiting for header FIFO timed out\n"); - return; + if (driver_data->wait_for_hdr_fifo) { + if (samsung_dsim_wait_for_hdr_fifo(dsi)) { + dev_err(dev, "waiting for header FIFO timed out\n"); + return; + } } if (NEQV(xfer->flags & MIPI_DSI_MSG_USE_LPM, @@ -1922,7 +2117,7 @@ int samsung_dsim_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct samsung_dsim *dsi; - int ret, i; + int ret; dsi = devm_drm_bridge_alloc(dev, struct samsung_dsim, bridge, &samsung_dsim_bridge_funcs); if (IS_ERR(dsi)) @@ -1946,23 +2141,11 @@ int samsung_dsim_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to get regulators\n"); - dsi->clks = devm_kcalloc(dev, dsi->driver_data->num_clks, - sizeof(*dsi->clks), GFP_KERNEL); - if (!dsi->clks) - return -ENOMEM; - - for (i = 0; i < dsi->driver_data->num_clks; i++) { - dsi->clks[i] = devm_clk_get(dev, clk_names[i]); - if (IS_ERR(dsi->clks[i])) { - if (strcmp(clk_names[i], "sclk_mipi") == 0) { - dsi->clks[i] = devm_clk_get(dev, OLD_SCLK_MIPI_CLK_NAME); - if (!IS_ERR(dsi->clks[i])) - continue; - } - - dev_info(dev, "failed to get the clock: %s\n", clk_names[i]); - return PTR_ERR(dsi->clks[i]); - } + ret = devm_clk_bulk_get(dev, dsi->driver_data->num_clks, + dsi->driver_data->clk_data); + if (ret) { + dev_err(dev, "failed to get clocks in bulk (%d)\n", ret); + return ret; } dsi->reg_base = devm_platform_ioremap_resource(pdev, 0); @@ -2035,7 +2218,7 @@ static int samsung_dsim_suspend(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; usleep_range(10000, 20000); @@ -2051,8 +2234,7 @@ static int samsung_dsim_suspend(struct device *dev) phy_power_off(dsi->phy); - for (i = driver_data->num_clks - 1; i > -1; i--) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) @@ -2065,7 +2247,7 @@ static int samsung_dsim_resume(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) { @@ -2073,11 +2255,9 @@ static int samsung_dsim_resume(struct device *dev) return ret; } - for (i = 0; i < driver_data->num_clks; i++) { - ret = clk_prepare_enable(dsi->clks[i]); - if (ret < 0) - goto err_clk; - } + ret = clk_bulk_prepare_enable(driver_data->num_clks, driver_data->clk_data); + if (ret < 0) + goto err_clk; ret = phy_power_on(dsi->phy); if (ret < 0) { @@ -2088,8 +2268,7 @@ static int samsung_dsim_resume(struct device *dev) return 0; err_clk: - while (--i > -1) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); return ret; diff --git a/drivers/gpu/drm/bridge/waveshare-dsi.c b/drivers/gpu/drm/bridge/waveshare-dsi.c index 01c70e7d3d3b..43f4e7412d72 100644 --- a/drivers/gpu/drm/bridge/waveshare-dsi.c +++ b/drivers/gpu/drm/bridge/waveshare-dsi.c @@ -147,8 +147,8 @@ static int ws_bridge_probe(struct i2c_client *i2c) int ret; ws = devm_drm_bridge_alloc(dev, struct ws_bridge, bridge, &ws_bridge_bridge_funcs); - if (!ws) - return -ENOMEM; + if (IS_ERR(ws)) + return PTR_ERR(ws); ws->dev = dev; diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 3b50d817c839..436bfe9f9081 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -42,7 +42,7 @@ * * https://hverkuil.home.xs4all.nl/cec-status.txt * - * Please mail me (hverkuil@xs4all.nl) if you find an adapter that works + * Please mail me (hverkuil@kernel.org) if you find an adapter that works * and is not yet listed there. * * Note that the current implementation does not support CEC over an MST hub. diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 0ac723a46a91..8e3cb08241c8 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -696,7 +696,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); - mutex_destroy(&dev->struct_mutex); } static int drm_dev_init(struct drm_device *dev, @@ -737,7 +736,6 @@ static int drm_dev_init(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->event_lock); - mutex_init(&dev->struct_mutex); mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index cbeb76b2124f..a1a9c828938b 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -626,7 +626,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) struct page **pages; struct folio *folio; struct folio_batch fbatch; - long i, j, npages; + unsigned long i, j, npages; if (WARN_ON(!obj->filp)) return ERR_PTR(-EINVAL); @@ -650,7 +650,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) i = 0; while (i < npages) { - long nr; + unsigned long nr; folio = shmem_read_folio_gfp(mapping, i, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index e2a9a6ae1d54..cb906765897e 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -361,7 +361,6 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { * @name: Name of the GPU SVM. * @drm: Pointer to the DRM device structure. * @mm: Pointer to the mm_struct for the address space. - * @device_private_page_owner: Device private pages owner. * @mm_start: Start address of GPU SVM. * @mm_range: Range of the GPU SVM. * @notifier_size: Size of individual notifiers. @@ -373,23 +372,35 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { * * This function initializes the GPU SVM. * + * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free), + * then only @gpusvm, @name, and @drm are expected. However, the same base + * @gpusvm can also be used with both modes together in which case the full + * setup is needed, where the core drm_gpusvm_pages API will simply never use + * the other fields. + * * Return: 0 on success, a negative error code on failure. */ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, const char *name, struct drm_device *drm, - struct mm_struct *mm, void *device_private_page_owner, + struct mm_struct *mm, unsigned long mm_start, unsigned long mm_range, unsigned long notifier_size, const struct drm_gpusvm_ops *ops, const unsigned long *chunk_sizes, int num_chunks) { - if (!ops->invalidate || !num_chunks) - return -EINVAL; + if (mm) { + if (!ops->invalidate || !num_chunks) + return -EINVAL; + mmgrab(mm); + } else { + /* No full SVM mode, only core drm_gpusvm_pages API. */ + if (ops || num_chunks || mm_range || notifier_size) + return -EINVAL; + } gpusvm->name = name; gpusvm->drm = drm; gpusvm->mm = mm; - gpusvm->device_private_page_owner = device_private_page_owner; gpusvm->mm_start = mm_start; gpusvm->mm_range = mm_range; gpusvm->notifier_size = notifier_size; @@ -397,7 +408,6 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, gpusvm->chunk_sizes = chunk_sizes; gpusvm->num_chunks = num_chunks; - mmgrab(mm); gpusvm->root = RB_ROOT_CACHED; INIT_LIST_HEAD(&gpusvm->notifier_list); @@ -489,7 +499,8 @@ void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) drm_gpusvm_range_remove(gpusvm, range); } - mmdrop(gpusvm->mm); + if (gpusvm->mm) + mmdrop(gpusvm->mm); WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); } EXPORT_SYMBOL_GPL(drm_gpusvm_fini); @@ -629,18 +640,48 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; INIT_LIST_HEAD(&range->entry); - range->notifier_seq = LONG_MAX; - range->flags.migrate_devmem = migrate_devmem ? 1 : 0; + range->pages.notifier_seq = LONG_MAX; + range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0; return range; } /** + * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order. + * @hmm_pfn: The current hmm_pfn. + * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array. + * @npages: Number of pages within the pfn array i.e the hmm range size. + * + * To allow skipping PFNs with the same flags (like when they belong to + * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn, + * and return the largest order that will fit inside the CPU PTE, but also + * crucially accounting for the original hmm range boundaries. + * + * Return: The largest order that will safely fit within the size of the hmm_pfn + * CPU PTE. + */ +static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, + unsigned long hmm_pfn_index, + unsigned long npages) +{ + unsigned long size; + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1); + hmm_pfn_index += size; + if (hmm_pfn_index > npages) + size -= (hmm_pfn_index - npages); + + return ilog2(size); +} + +/** * drm_gpusvm_check_pages() - Check pages * @gpusvm: Pointer to the GPU SVM structure * @notifier: Pointer to the GPU SVM notifier structure * @start: Start address * @end: End address + * @dev_private_owner: The device private page owner * * Check if pages between start and end have been faulted in on the CPU. Use to * prevent migration of pages without CPU backing store. @@ -649,14 +690,15 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, */ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + void *dev_private_owner) { struct hmm_range hmm_range = { .default_flags = 0, .notifier = ¬ifier->notifier, .start = start, .end = end, - .dev_private_owner = gpusvm->device_private_page_owner, + .dev_private_owner = dev_private_owner, }; unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); @@ -693,7 +735,7 @@ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, err = -EFAULT; goto err_free; } - i += 0x1 << hmm_pfn_to_map_order(pfns[i]); + i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); } err_free: @@ -710,6 +752,7 @@ err_free: * @gpuva_start: Start address of GPUVA which mirrors CPU * @gpuva_end: End address of GPUVA which mirrors CPU * @check_pages_threshold: Check CPU pages for present threshold + * @dev_private_owner: The device private page owner * * This function determines the chunk size for the GPU SVM range based on the * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual @@ -724,7 +767,8 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, unsigned long fault_addr, unsigned long gpuva_start, unsigned long gpuva_end, - unsigned long check_pages_threshold) + unsigned long check_pages_threshold, + void *dev_private_owner) { unsigned long start, end; int i = 0; @@ -771,7 +815,7 @@ retry: * process-many-malloc' mallocs at least 64k at a time. */ if (end - start <= check_pages_threshold && - !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { + !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) { ++i; goto retry; } @@ -914,7 +958,8 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, fault_addr, gpuva_start, gpuva_end, - ctx->check_pages_threshold); + ctx->check_pages_threshold, + ctx->device_private_page_owner); if (chunk_size == LONG_MAX) { err = -EINVAL; goto err_notifier_remove; @@ -951,31 +996,31 @@ err_mmunlock: EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); /** - * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) + * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal) * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * @npages: Number of pages to unmap * - * This function unmap pages associated with a GPU SVM range. Assumes and + * This function unmap pages associated with a GPU SVM pages struct. Assumes and * asserts correct locking is in place when called. */ -static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - unsigned long npages) +static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages) { - unsigned long i, j; - struct drm_pagemap *dpagemap = range->dpagemap; + struct drm_pagemap *dpagemap = svm_pages->dpagemap; struct device *dev = gpusvm->drm->dev; + unsigned long i, j; lockdep_assert_held(&gpusvm->notifier_lock); - if (range->flags.has_dma_mapping) { - struct drm_gpusvm_range_flags flags = { - .__flags = range->flags.__flags, + if (svm_pages->flags.has_dma_mapping) { + struct drm_gpusvm_pages_flags flags = { + .__flags = svm_pages->flags.__flags, }; for (i = 0, j = 0; i < npages; j++) { - struct drm_pagemap_addr *addr = &range->dma_addr[j]; + struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; if (addr->proto == DRM_INTERCONNECT_SYSTEM) dma_unmap_page(dev, @@ -991,31 +1036,52 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ flags.has_devmem_pages = false; flags.has_dma_mapping = false; - WRITE_ONCE(range->flags.__flags, flags.__flags); + WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); - range->dpagemap = NULL; + svm_pages->dpagemap = NULL; } } /** - * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range + * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * * This function frees the dma address array associated with a GPU SVM range. */ -static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range) +static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages) { lockdep_assert_held(&gpusvm->notifier_lock); - if (range->dma_addr) { - kvfree(range->dma_addr); - range->dma_addr = NULL; + if (svm_pages->dma_addr) { + kvfree(svm_pages->dma_addr); + svm_pages->dma_addr = NULL; } } /** + * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages + * struct + * @gpusvm: Pointer to the GPU SVM structure + * @svm_pages: Pointer to the GPU SVM pages structure + * @npages: Number of mapped pages + * + * This function unmaps and frees the dma address array associated with a GPU + * SVM pages struct. + */ +void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages) +{ + drm_gpusvm_notifier_lock(gpusvm); + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); + __drm_gpusvm_free_pages(gpusvm, svm_pages); + drm_gpusvm_notifier_unlock(gpusvm); +} +EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages); + +/** * drm_gpusvm_range_remove() - Remove GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range to be removed @@ -1040,8 +1106,8 @@ void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, return; drm_gpusvm_notifier_lock(gpusvm); - __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); - drm_gpusvm_range_free_pages(gpusvm, range); + __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages); + __drm_gpusvm_free_pages(gpusvm, &range->pages); __drm_gpusvm_range_remove(notifier, range); drm_gpusvm_notifier_unlock(gpusvm); @@ -1107,6 +1173,28 @@ void drm_gpusvm_range_put(struct drm_gpusvm_range *range) EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); /** + * drm_gpusvm_pages_valid() - GPU SVM range pages valid + * @gpusvm: Pointer to the GPU SVM structure + * @svm_pages: Pointer to the GPU SVM pages structure + * + * This function determines if a GPU SVM range pages are valid. Expected be + * called holding gpusvm->notifier_lock and as the last step before committing a + * GPU binding. This is akin to a notifier seqno check in the HMM documentation + * but due to wider notifiers (i.e., notifiers which span multiple ranges) this + * function is required for finer grained checking (i.e., per range) if pages + * are valid. + * + * Return: True if GPU SVM range has valid pages, False otherwise + */ +static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages) +{ + lockdep_assert_held(&gpusvm->notifier_lock); + + return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping; +} + +/** * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range structure @@ -1123,9 +1211,7 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) { - lockdep_assert_held(&gpusvm->notifier_lock); - - return range->flags.has_devmem_pages || range->flags.has_dma_mapping; + return drm_gpusvm_pages_valid(gpusvm, &range->pages); } EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); @@ -1139,66 +1225,71 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); * * Return: True if GPU SVM range has valid pages, False otherwise */ -static bool -drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range) +static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages) { bool pages_valid; - if (!range->dma_addr) + if (!svm_pages->dma_addr) return false; drm_gpusvm_notifier_lock(gpusvm); - pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); + pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages); if (!pages_valid) - drm_gpusvm_range_free_pages(gpusvm, range); + __drm_gpusvm_free_pages(gpusvm, svm_pages); drm_gpusvm_notifier_unlock(gpusvm); return pages_valid; } /** - * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range + * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: The SVM pages to populate. This will contain the dma-addresses + * @mm: The mm corresponding to the CPU range + * @notifier: The corresponding notifier for the given CPU range + * @pages_start: Start CPU address for the pages + * @pages_end: End CPU address for the pages (exclusive) * @ctx: GPU SVM context * - * This function gets pages for a GPU SVM range and ensures they are mapped for - * DMA access. + * This function gets and maps pages for CPU range and ensures they are + * mapped for DMA access. * * Return: 0 on success, negative error code on failure. */ -int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - const struct drm_gpusvm_ctx *ctx) +int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + struct mm_struct *mm, + struct mmu_interval_notifier *notifier, + unsigned long pages_start, unsigned long pages_end, + const struct drm_gpusvm_ctx *ctx) { - struct mmu_interval_notifier *notifier = &range->notifier->notifier; struct hmm_range hmm_range = { .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : HMM_PFN_REQ_WRITE), .notifier = notifier, - .start = drm_gpusvm_range_start(range), - .end = drm_gpusvm_range_end(range), - .dev_private_owner = gpusvm->device_private_page_owner, + .start = pages_start, + .end = pages_end, + .dev_private_owner = ctx->device_private_page_owner, }; - struct mm_struct *mm = gpusvm->mm; void *zdd; unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); unsigned long i, j; - unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), - drm_gpusvm_range_end(range)); + unsigned long npages = npages_in_range(pages_start, pages_end); unsigned long num_dma_mapped; unsigned int order = 0; unsigned long *pfns; int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; - struct drm_gpusvm_range_flags flags; + struct drm_gpusvm_pages_flags flags; + enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); - if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) + if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) goto set_seqno; pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); @@ -1238,7 +1329,7 @@ map_pages: */ drm_gpusvm_notifier_lock(gpusvm); - flags.__flags = range->flags.__flags; + flags.__flags = svm_pages->flags.__flags; if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; @@ -1251,13 +1342,12 @@ map_pages: goto retry; } - if (!range->dma_addr) { + if (!svm_pages->dma_addr) { /* Unlock and restart mapping to allocate memory. */ drm_gpusvm_notifier_unlock(gpusvm); - range->dma_addr = kvmalloc_array(npages, - sizeof(*range->dma_addr), - GFP_KERNEL); - if (!range->dma_addr) { + svm_pages->dma_addr = + kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL); + if (!svm_pages->dma_addr) { err = -ENOMEM; goto err_free; } @@ -1270,7 +1360,7 @@ map_pages: for (i = 0, j = 0; i < npages; ++j) { struct page *page = hmm_pfn_to_page(pfns[i]); - order = hmm_pfn_to_map_order(pfns[i]); + order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); if (is_device_private_page(page) || is_device_coherent_page(page)) { if (zdd != page->zone_device_data && i > 0) { @@ -1296,13 +1386,13 @@ map_pages: goto err_unmap; } } - range->dma_addr[j] = + svm_pages->dma_addr[j] = dpagemap->ops->device_map(dpagemap, gpusvm->drm->dev, page, order, - DMA_BIDIRECTIONAL); + dma_dir); if (dma_mapping_error(gpusvm->drm->dev, - range->dma_addr[j].addr)) { + svm_pages->dma_addr[j].addr)) { err = -EFAULT; goto err_unmap; } @@ -1322,15 +1412,15 @@ map_pages: addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, - DMA_BIDIRECTIONAL); + dma_dir); if (dma_mapping_error(gpusvm->drm->dev, addr)) { err = -EFAULT; goto err_unmap; } - range->dma_addr[j] = drm_pagemap_addr_encode + svm_pages->dma_addr[j] = drm_pagemap_addr_encode (addr, DRM_INTERCONNECT_SYSTEM, order, - DMA_BIDIRECTIONAL); + dma_dir); } i += 1 << order; num_dma_mapped = i; @@ -1339,21 +1429,21 @@ map_pages: if (pagemap) { flags.has_devmem_pages = true; - range->dpagemap = dpagemap; + svm_pages->dpagemap = dpagemap; } /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ - WRITE_ONCE(range->flags.__flags, flags.__flags); + WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: - range->notifier_seq = hmm_range.notifier_seq; + svm_pages->notifier_seq = hmm_range.notifier_seq; return 0; err_unmap: - __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); drm_gpusvm_notifier_unlock(gpusvm); err_free: kvfree(pfns); @@ -1361,11 +1451,62 @@ err_free: goto retry; return err; } +EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages); + +/** + * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range + * @gpusvm: Pointer to the GPU SVM structure + * @range: Pointer to the GPU SVM range structure + * @ctx: GPU SVM context + * + * This function gets pages for a GPU SVM range and ensures they are mapped for + * DMA access. + * + * Return: 0 on success, negative error code on failure. + */ +int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm, + &range->notifier->notifier, + drm_gpusvm_range_start(range), + drm_gpusvm_range_end(range), ctx); +} EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); /** + * drm_gpusvm_unmap_pages() - Unmap GPU svm pages + * @gpusvm: Pointer to the GPU SVM structure + * @svm_pages: Pointer to the GPU SVM pages structure + * @npages: Number of pages in @svm_pages. + * @ctx: GPU SVM context + * + * This function unmaps pages associated with a GPU SVM pages struct. If + * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in + * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode. + * Must be called in the invalidate() callback of the corresponding notifier for + * IOMMU security model. + */ +void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages, + const struct drm_gpusvm_ctx *ctx) +{ + if (ctx->in_notifier) + lockdep_assert_held_write(&gpusvm->notifier_lock); + else + drm_gpusvm_notifier_lock(gpusvm); + + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); + + if (!ctx->in_notifier) + drm_gpusvm_notifier_unlock(gpusvm); +} +EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages); + +/** * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range - * drm_gpusvm_range_evict() - Evict GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range structure * @ctx: GPU SVM context @@ -1383,15 +1524,7 @@ void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), drm_gpusvm_range_end(range)); - if (ctx->in_notifier) - lockdep_assert_held_write(&gpusvm->notifier_lock); - else - drm_gpusvm_notifier_lock(gpusvm); - - __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); - - if (!ctx->in_notifier) - drm_gpusvm_notifier_unlock(gpusvm); + return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx); } EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); @@ -1489,10 +1622,10 @@ void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, { lockdep_assert_held_write(&range->gpusvm->notifier_lock); - range->flags.unmapped = true; + range->pages.flags.unmapped = true; if (drm_gpusvm_range_start(range) < mmu_range->start || drm_gpusvm_range_end(range) > mmu_range->end) - range->flags.partial_unmap = true; + range->pages.flags.partial_unmap = true; } EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index a52e95555549..af63f4d00315 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -2533,8 +2533,6 @@ static const struct drm_gpuvm_ops lock_ops = { * * The expected usage is:: * - * .. code-block:: c - * * vm_bind { * struct drm_exec exec; * diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 50c286c5cee8..ac27e86c601c 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -968,7 +968,7 @@ pub unsafe extern "C" fn drm_panic_qr_generate( // nul-terminated string. let url_cstr: &CStr = unsafe { CStr::from_char_ptr(url) }; let segments = &[ - &Segment::Binary(url_cstr.as_bytes()), + &Segment::Binary(url_cstr.to_bytes()), &Segment::Numeric(&data_slice[0..data_len]), ]; match EncodedMsg::new(segments, tmp_slice) { diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 805aa28c1723..b8d9b7251319 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -69,7 +69,6 @@ struct decon_context { void __iomem *regs; unsigned long irq_flags; bool i80_if; - bool suspended; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; @@ -132,9 +131,6 @@ static void decon_shadow_protect_win(struct decon_context *ctx, static void decon_wait_for_vblank(struct decon_context *ctx) { - if (ctx->suspended) - return; - atomic_set(&ctx->wait_vsync_event, 1); /* @@ -210,9 +206,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; u32 val, clkdiv; - if (ctx->suspended) - return; - /* nothing to do if we haven't set the mode yet */ if (mode->htotal == 0 || mode->vtotal == 0) return; @@ -274,9 +267,6 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return -EPERM; - if (!test_and_set_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -299,9 +289,6 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return; - if (test_and_clear_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -404,9 +391,6 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, true); } @@ -427,9 +411,6 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, unsigned int pitch = fb->pitches[0]; unsigned int vidw_addr0_base = ctx->data->vidw_buf_start_base; - if (ctx->suspended) - return; - /* * SHADOWCON/PRTCON register is used for enabling timing. * @@ -517,9 +498,6 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, unsigned int win = plane->index; u32 val; - if (ctx->suspended) - return; - /* protect windows */ decon_shadow_protect_win(ctx, win, true); @@ -538,9 +516,6 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); exynos_crtc_handle_event(crtc); @@ -568,9 +543,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int ret; - if (!ctx->suspended) - return; - ret = pm_runtime_resume_and_get(ctx->dev); if (ret < 0) { DRM_DEV_ERROR(ctx->dev, "failed to enable DECON device.\n"); @@ -584,8 +556,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) decon_enable_vblank(ctx->crtc); decon_commit(ctx->crtc); - - ctx->suspended = false; } static void decon_atomic_disable(struct exynos_drm_crtc *crtc) @@ -593,9 +563,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - /* * We need to make sure that all windows are disabled before we * suspend that connector. Otherwise we might try to scan from @@ -605,8 +572,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) decon_disable_plane(crtc, &ctx->planes[i]); pm_runtime_put_sync(ctx->dev); - - ctx->suspended = true; } static const struct exynos_drm_crtc_ops decon_crtc_ops = { @@ -727,7 +692,6 @@ static int decon_probe(struct platform_device *pdev) return -ENOMEM; ctx->dev = dev; - ctx->suspended = true; ctx->data = of_device_get_match_data(dev); i80_if_timings = of_get_child_by_name(dev->of_node, "i80-if-timings"); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 896a03639e2d..c4d098ab7863 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -154,6 +154,11 @@ static const struct samsung_dsim_plat_data exynos5433_dsi_pdata = { .host_ops = &exynos_dsi_exynos_host_ops, }; +static const struct samsung_dsim_plat_data exynos7870_dsi_pdata = { + .hw_type = DSIM_TYPE_EXYNOS7870, + .host_ops = &exynos_dsi_exynos_host_ops, +}; + static const struct of_device_id exynos_dsi_of_match[] = { { .compatible = "samsung,exynos3250-mipi-dsi", @@ -175,6 +180,10 @@ static const struct of_device_id exynos_dsi_of_match[] = { .compatible = "samsung,exynos5433-mipi-dsi", .data = &exynos5433_dsi_pdata, }, + { + .compatible = "samsung,exynos7870-mipi-dsi", + .data = &exynos7870_dsi_pdata, + }, { /* sentinel. */ } }; MODULE_DEVICE_TABLE(of, exynos_dsi_of_match); diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 1cf394369127..c0feca58511d 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -726,8 +726,8 @@ void oaktrail_hdmi_teardown(struct drm_device *dev) if (hdmi_dev) { pdev = hdmi_dev->dev; - pci_set_drvdata(pdev, NULL); oaktrail_hdmi_i2c_exit(pdev); + pci_set_drvdata(pdev, NULL); iounmap(hdmi_dev->regs); kfree(hdmi_dev); pci_dev_put(pdev); diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 853543443072..e58c0c158b3a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -32,6 +32,7 @@ i915-y += \ i915_scatterlist.o \ i915_switcheroo.o \ i915_sysfs.o \ + i915_timer_util.o \ i915_utils.o \ intel_clock_gating.o \ intel_cpu_info.o \ @@ -280,6 +281,7 @@ i915-y += \ display/intel_modeset_setup.o \ display/intel_modeset_verify.o \ display/intel_overlay.o \ + display/intel_panic.o \ display/intel_pch.o \ display/intel_pch_display.o \ display/intel_pch_refclk.o \ diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 3eb96d8abba8..407deb5dfb57 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -15,7 +15,6 @@ #include "i9xx_plane.h" #include "i9xx_plane_regs.h" #include "intel_atomic.h" -#include "intel_bo.h" #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_regs.h" @@ -23,6 +22,7 @@ #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" #include "intel_plane.h" #include "intel_sprite.h" @@ -1178,7 +1178,7 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, drm_WARN_ON(display->drm, pipe != crtc->pipe); - intel_fb = intel_bo_alloc_framebuffer(); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) { drm_dbg_kms(display->drm, "failed to alloc fb\n"); return; diff --git a/drivers/gpu/drm/i915/display/intel_bo.c b/drivers/gpu/drm/i915/display/intel_bo.c index d29c1508ccb9..6ae1374d5c2b 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.c +++ b/drivers/gpu/drm/i915/display/intel_bo.c @@ -59,18 +59,3 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { i915_debugfs_describe_obj(m, to_intel_bo(obj)); } - -struct intel_framebuffer *intel_bo_alloc_framebuffer(void) -{ - return i915_gem_object_alloc_framebuffer(); -} - -int intel_bo_panic_setup(struct drm_scanout_buffer *sb) -{ - return i915_gem_object_panic_setup(sb); -} - -void intel_bo_panic_finish(struct intel_framebuffer *fb) -{ - return i915_gem_object_panic_finish(fb); -} diff --git a/drivers/gpu/drm/i915/display/intel_bo.h b/drivers/gpu/drm/i915/display/intel_bo.h index 97087a64d23b..48d87019e48a 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.h +++ b/drivers/gpu/drm/i915/display/intel_bo.h @@ -25,8 +25,5 @@ struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj, struct intel_frontbuffer *front); void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj); -struct intel_framebuffer *intel_bo_alloc_framebuffer(void); -int intel_bo_panic_setup(struct drm_scanout_buffer *sb); -void intel_bo_panic_finish(struct intel_framebuffer *fb); #endif /* __INTEL_BO__ */ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 46017091bb0b..c09aa759f4d4 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -26,6 +26,7 @@ */ #include <linux/iopoll.h> +#include <linux/seq_buf.h> #include <linux/string_helpers.h> #include <drm/display/drm_dp_helper.h> @@ -596,8 +597,9 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, enum transcoder master; master = crtc_state->mst_master_transcoder; - drm_WARN_ON(display->drm, - master == INVALID_TRANSCODER); + if (drm_WARN_ON(display->drm, + master == INVALID_TRANSCODER)) + master = TRANSCODER_A; temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master); } } else { @@ -5067,11 +5069,45 @@ static bool port_in_use(struct intel_display *display, enum port port) return false; } +static const char *intel_ddi_encoder_name(struct intel_display *display, + enum port port, enum phy phy, + struct seq_buf *s) +{ + if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) { + seq_buf_printf(s, "DDI %c/PHY %c", + port_name(port - PORT_D_XELPD + PORT_D), + phy_name(phy)); + } else if (DISPLAY_VER(display) >= 12) { + enum tc_port tc_port = intel_port_to_tc(display, port); + + seq_buf_printf(s, "DDI %s%c/PHY %s%c", + port >= PORT_TC1 ? "TC" : "", + port >= PORT_TC1 ? port_tc_name(port) : port_name(port), + tc_port != TC_PORT_NONE ? "TC" : "", + tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); + } else if (DISPLAY_VER(display) >= 11) { + enum tc_port tc_port = intel_port_to_tc(display, port); + + seq_buf_printf(s, "DDI %c%s/PHY %s%c", + port_name(port), + port >= PORT_C ? " (TC)" : "", + tc_port != TC_PORT_NONE ? "TC" : "", + tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); + } else { + seq_buf_printf(s, "DDI %c/PHY %c", port_name(port), phy_name(phy)); + } + + drm_WARN_ON(display->drm, seq_buf_has_overflowed(s)); + + return seq_buf_str(s); +} + void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata) { struct intel_digital_port *dig_port; struct intel_encoder *encoder; + DECLARE_SEQ_BUF(encoder_name, 20); bool init_hdmi, init_dp; enum port port; enum phy phy; @@ -5156,37 +5192,9 @@ void intel_ddi_init(struct intel_display *display, encoder = &dig_port->base; encoder->devdata = devdata; - if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) { - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", - port_name(port - PORT_D_XELPD + PORT_D), - phy_name(phy)); - } else if (DISPLAY_VER(display) >= 12) { - enum tc_port tc_port = intel_port_to_tc(display, port); - - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %s%c/PHY %s%c", - port >= PORT_TC1 ? "TC" : "", - port >= PORT_TC1 ? port_tc_name(port) : port_name(port), - tc_port != TC_PORT_NONE ? "TC" : "", - tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); - } else if (DISPLAY_VER(display) >= 11) { - enum tc_port tc_port = intel_port_to_tc(display, port); - - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c%s/PHY %s%c", - port_name(port), - port >= PORT_C ? " (TC)" : "", - tc_port != TC_PORT_NONE ? "TC" : "", - tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); - } else { - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", port_name(port), phy_name(phy)); - } + drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, + DRM_MODE_ENCODER_TMDS, "%s", + intel_ddi_encoder_name(display, port, phy, &encoder_name)); intel_encoder_link_check_init(encoder, intel_ddi_link_check); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c1a3a95c65f0..5dca7f96b425 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7271,6 +7271,9 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, intel_psr_trigger_frame_change_event(new_crtc_state->dsb_commit, state, crtc); + intel_psr_wait_for_idle_dsb(new_crtc_state->dsb_commit, + new_crtc_state); + if (new_crtc_state->use_dsb) intel_dsb_vblank_evade(state, new_crtc_state->dsb_commit); diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 65f0efc35bb7..a002bc6ce7b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1944,6 +1944,11 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } +bool intel_display_device_present(struct intel_display *display) +{ + return display && HAS_DISPLAY(display); +} + /* * Assuming the device has display hardware, should it be enabled? * diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 6e87b763fe7c..f329f1beafef 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -306,6 +306,7 @@ struct intel_display_device_info { } color; }; +bool intel_display_device_present(struct intel_display *display); bool intel_display_device_enabled(struct intel_display *display); struct intel_display *intel_display_device_probe(struct pci_dev *pdev); void intel_display_device_remove(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index fd9d2527889b..358ab922d7a7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -60,6 +60,7 @@ struct intel_ddi_buf_trans; struct intel_fbc; struct intel_global_objs_state; struct intel_hdcp_shim; +struct intel_panic; struct intel_tc_port; /* @@ -149,6 +150,7 @@ struct intel_framebuffer { unsigned int vtd_guard; unsigned int (*panic_tiling)(unsigned int x, unsigned int y, unsigned int width); + struct intel_panic *panic; }; enum intel_hotplug_state { diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 12084a542fc5..eb05ef4bd9f6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -508,9 +508,6 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - if (panel->backlight.edp.vesa.luminance_control_support) - return; - drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info); if (!panel->backlight.edp.vesa.info.aux_enable) @@ -533,7 +530,7 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, luminance_range->max_luminance, panel->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd, ¤t_level, ¤t_mode, - false); + panel->backlight.edp.vesa.luminance_control_support); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index b210c3250501..22a4a1575d22 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -20,6 +20,7 @@ #include "intel_fb.h" #include "intel_fb_bo.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" #include "intel_plane.h" #define check_array_bounds(display, a, i) drm_WARN_ON((display)->drm, (i) >= ARRAY_SIZE(a)) @@ -2343,6 +2344,26 @@ intel_user_framebuffer_create(struct drm_device *dev, return fb; } +struct intel_framebuffer *intel_framebuffer_alloc(void) +{ + struct intel_framebuffer *intel_fb; + struct intel_panic *panic; + + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); + if (!intel_fb) + return NULL; + + panic = intel_panic_alloc(); + if (!panic) { + kfree(intel_fb); + return NULL; + } + + intel_fb->panic = panic; + + return intel_fb; +} + struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, const struct drm_format_info *info, @@ -2351,7 +2372,7 @@ intel_framebuffer_create(struct drm_gem_object *obj, struct intel_framebuffer *intel_fb; int ret; - intel_fb = intel_bo_alloc_framebuffer(); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 403b8b63721a..22514d5f2bb6 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -104,6 +104,9 @@ int intel_framebuffer_init(struct intel_framebuffer *ifb, struct drm_gem_object *obj, const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd); + +struct intel_framebuffer *intel_framebuffer_alloc(void); + struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, const struct drm_format_info *info, diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index d4c5deff9cbe..0d380c825791 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -98,11 +98,7 @@ struct intel_fbc { struct intel_display *display; const struct intel_fbc_funcs *funcs; - /* - * This is always the inner lock when overlapping with - * struct_mutex and it's the outer lock when overlapping - * with stolen_lock. - */ + /* This is always the outer lock when overlapping with stolen_lock */ struct mutex lock; unsigned int busy_bits; @@ -383,11 +379,11 @@ static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) struct drm_i915_private *i915 = to_i915(display->drm); drm_WARN_ON(display->drm, - range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_fb), U32_MAX)); drm_WARN_ON(display->drm, - range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_llb), U32_MAX)); intel_de_write(display, FBC_CFB_BASE, @@ -1550,14 +1546,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * having a Y offset that isn't divisible by 4 causes FIFO underrun * and screen flicker. */ - if (DISPLAY_VER(display) >= 9 && + if (IS_DISPLAY_VER(display, 9, 12) && plane_state->view.color_plane[0].y & 3) { plane_state->no_fbc_reason = "plane start Y offset misaligned"; return 0; } /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ - if (DISPLAY_VER(display) >= 11 && + if (IS_DISPLAY_VER(display, 9, 12) && (plane_state->view.color_plane[0].y + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; diff --git a/drivers/gpu/drm/i915/display/intel_panic.c b/drivers/gpu/drm/i915/display/intel_panic.c new file mode 100644 index 000000000000..7311ce4e8b6c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_panic.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_panic.h> + +#include "gem/i915_gem_object.h" +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" + +struct intel_panic *intel_panic_alloc(void) +{ + return i915_gem_object_alloc_panic(); +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = sb->private; + struct drm_gem_object *obj = intel_fb_bo(&fb->base); + + return i915_gem_object_panic_setup(panic, sb, obj, fb->panic_tiling); +} + +void intel_panic_finish(struct intel_panic *panic) +{ + return i915_gem_object_panic_finish(panic); +} diff --git a/drivers/gpu/drm/i915/display/intel_panic.h b/drivers/gpu/drm/i915/display/intel_panic.h new file mode 100644 index 000000000000..afb472e924aa --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_panic.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_PANIC_H__ +#define __INTEL_PANIC_H__ + +struct drm_scanout_buffer; +struct intel_panic; + +struct intel_panic *intel_panic_alloc(void); +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb); +void intel_panic_finish(struct intel_panic *panic); + +#endif /* __INTEL_PANIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 81f05ee9a21a..2329f09d413d 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -47,7 +47,6 @@ #include "gem/i915_gem_object.h" #include "i915_scheduler_types.h" #include "i9xx_plane_regs.h" -#include "intel_bo.h" #include "intel_cdclk.h" #include "intel_cursor.h" #include "intel_display_rps.h" @@ -56,6 +55,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_fbdev.h" +#include "intel_panic.h" #include "intel_plane.h" #include "intel_psr.h" #include "skl_scaler.h" @@ -1326,7 +1326,7 @@ static void intel_panic_flush(struct drm_plane *plane) struct drm_framebuffer *fb = plane_state->hw.fb; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - intel_bo_panic_finish(intel_fb); + intel_panic_finish(intel_fb->panic); if (crtc_state->enable_psr2_sel_fetch) { /* Force a full update for psr2 */ @@ -1409,7 +1409,7 @@ static int intel_get_scanout_buffer(struct drm_plane *plane, return -EOPNOTSUPP; } sb->private = intel_fb; - ret = intel_bo_panic_setup(sb); + ret = intel_panic_setup(intel_fb->panic, sb); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 22433fe2ee14..01bf304c705f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -42,6 +42,7 @@ #include "intel_dmc.h" #include "intel_dp.h" #include "intel_dp_aux.h" +#include "intel_dsb.h" #include "intel_frontbuffer.h" #include "intel_hdmi.h" #include "intel_psr.h" @@ -494,12 +495,14 @@ static u8 intel_dp_get_su_capability(struct intel_dp *intel_dp) { u8 su_capability = 0; - if (intel_dp->psr.sink_panel_replay_su_support) - drm_dp_dpcd_readb(&intel_dp->aux, - DP_PANEL_REPLAY_CAP_CAPABILITY, - &su_capability); - else + if (intel_dp->psr.sink_panel_replay_su_support) { + if (drm_dp_dpcd_read_byte(&intel_dp->aux, + DP_PANEL_REPLAY_CAP_CAPABILITY, + &su_capability) < 0) + return 0; + } else { su_capability = intel_dp->psr_dpcd[1]; + } return su_capability; } @@ -2997,35 +3000,57 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state, } } -static int _psr2_ready_for_pipe_update_locked(struct intel_dp *intel_dp) +/* + * From bspec: Panel Self Refresh (BDW+) + * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of + * exit training time + 1.5 ms of aux channel handshake. 50 ms is + * defensive enough to cover everything. + */ +#define PSR_IDLE_TIMEOUT_MS 50 + +static int +_psr2_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state, + struct intel_dsb *dsb) { - struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->psr.transcoder; + struct intel_display *display = to_intel_display(new_crtc_state); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; /* * Any state lower than EDP_PSR2_STATUS_STATE_DEEP_SLEEP is enough. * As all higher states has bit 4 of PSR2 state set we can just wait for * EDP_PSR2_STATUS_STATE_DEEP_SLEEP to be cleared. */ + if (dsb) { + intel_dsb_poll(dsb, EDP_PSR2_STATUS(display, cpu_transcoder), + EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 0, 200, + PSR_IDLE_TIMEOUT_MS * 1000 / 200); + return true; + } + return intel_de_wait_for_clear(display, EDP_PSR2_STATUS(display, cpu_transcoder), - EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 50); + EDP_PSR2_STATUS_STATE_DEEP_SLEEP, + PSR_IDLE_TIMEOUT_MS); } -static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp) +static int +_psr1_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state, + struct intel_dsb *dsb) { - struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->psr.transcoder; + struct intel_display *display = to_intel_display(new_crtc_state); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; + + if (dsb) { + intel_dsb_poll(dsb, psr_status_reg(display, cpu_transcoder), + EDP_PSR_STATUS_STATE_MASK, 0, 200, + PSR_IDLE_TIMEOUT_MS * 1000 / 200); + return true; + } - /* - * From bspec: Panel Self Refresh (BDW+) - * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of - * exit training time + 1.5 ms of aux channel handshake. 50 ms is - * defensive enough to cover everything. - */ return intel_de_wait_for_clear(display, psr_status_reg(display, cpu_transcoder), - EDP_PSR_STATUS_STATE_MASK, 50); + EDP_PSR_STATUS_STATE_MASK, + PSR_IDLE_TIMEOUT_MS); } /** @@ -3054,9 +3079,11 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat continue; if (intel_dp->psr.sel_update_enabled) - ret = _psr2_ready_for_pipe_update_locked(intel_dp); + ret = _psr2_ready_for_pipe_update_locked(new_crtc_state, + NULL); else - ret = _psr1_ready_for_pipe_update_locked(intel_dp); + ret = _psr1_ready_for_pipe_update_locked(new_crtc_state, + NULL); if (ret) drm_err(display->drm, @@ -3064,6 +3091,18 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat } } +void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->has_psr || new_crtc_state->has_panel_replay) + return; + + if (new_crtc_state->has_sel_update) + _psr2_ready_for_pipe_update_locked(new_crtc_state, dsb); + else + _psr1_ready_for_pipe_update_locked(new_crtc_state, dsb); +} + static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 9b061a22361f..077751aa599f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -52,6 +52,8 @@ void intel_psr_get_config(struct intel_encoder *encoder, void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_state); +void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb, + const struct intel_crtc_state *new_crtc_state); bool intel_psr_enabled(struct intel_dp *intel_dp); int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 950dc79dbdd4..e13fb781e7b2 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -20,6 +20,7 @@ #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" #include "intel_plane.h" #include "intel_psr.h" #include "intel_psr_regs.h" @@ -3028,7 +3029,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, return; } - intel_fb = intel_bo_alloc_framebuffer(); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) { drm_dbg_kms(display->drm, "failed to alloc fb\n"); return; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index f243f8a5215d..39c7c32e1e74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -182,7 +182,7 @@ enum { * the object. Simple! ... The relocation entries are stored in user memory * and so to access them we have to copy them into a local buffer. That copy * has to avoid taking any pagefaults as they may lead back to a GEM object - * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * requiring the vm->mutex (i.e. recursive deadlock). So once again we split * the relocation into multiple passes. First we try to do everything within an * atomic context (avoid the pagefaults) which requires that we never wait. If * we detect that we may wait, or if we need to fault, then we have to fallback diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1f38e367c60b..478011e5ecb3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -459,8 +459,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) atomic_inc(&i915->mm.free_count); /* - * Since we require blocking on struct_mutex to unbind the freed - * object from the GPU before releasing resources back to the + * Since we require blocking on drm_i915_gem_object->vma.lock to unbind + * the freed object from the GPU before releasing resources back to the * system, we can not do that directly from the RCU callback (which may * be a softirq context), but must instead then defer that work onto a * kthread. We use the RCU callback rather than move the freed object diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 565f8fa330db..148034ef504d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -16,9 +16,9 @@ #include "i915_gem_ww.h" #include "i915_vma_types.h" -struct drm_scanout_buffer; enum intel_region_id; -struct intel_framebuffer; +struct drm_scanout_buffer; +struct intel_panic; #define obj_to_i915(obj__) to_i915((obj__)->base.dev) @@ -693,9 +693,10 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); int i915_gem_object_truncate(struct drm_i915_gem_object *obj); -struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void); -int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb); -void i915_gem_object_panic_finish(struct intel_framebuffer *fb); +struct intel_panic *i915_gem_object_alloc_panic(void); +int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb, + struct drm_gem_object *_obj, bool panic_tiling); +void i915_gem_object_panic_finish(struct intel_panic *panic); /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index c16a57160b26..3f09cbce05bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -357,23 +357,13 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, return vaddr ?: ERR_PTR(-ENOMEM); } -struct i915_panic_data { +struct intel_panic { struct page **pages; int page; void *vaddr; }; -struct i915_framebuffer { - struct intel_framebuffer base; - struct i915_panic_data panic; -}; - -static inline struct i915_panic_data *to_i915_panic_data(struct intel_framebuffer *fb) -{ - return &container_of_const(fb, struct i915_framebuffer, base)->panic; -} - -static void i915_panic_kunmap(struct i915_panic_data *panic) +static void i915_panic_kunmap(struct intel_panic *panic) { if (panic->vaddr) { drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); @@ -420,7 +410,7 @@ static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int new_page; unsigned int offset; struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct i915_panic_data *panic = to_i915_panic_data(fb); + struct intel_panic *panic = fb->panic; if (fb->panic_tiling) offset = fb->panic_tiling(sb->width, x, y); @@ -441,14 +431,13 @@ static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb, } } -struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void) +struct intel_panic *i915_gem_object_alloc_panic(void) { - struct i915_framebuffer *i915_fb; + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); - i915_fb = kzalloc(sizeof(*i915_fb), GFP_KERNEL); - if (i915_fb) - return &i915_fb->base; - return NULL; + return panic; } /* @@ -456,12 +445,11 @@ struct intel_framebuffer *i915_gem_object_alloc_framebuffer(void) * Use current vaddr if it exists, or setup a list of pages. * pfn is not supported yet. */ -int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb) +int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb, + struct drm_gem_object *_obj, bool panic_tiling) { enum i915_map_type has_type; - struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct i915_panic_data *panic = to_i915_panic_data(fb); - struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base)); + struct drm_i915_gem_object *obj = to_intel_bo(_obj); void *ptr; ptr = page_unpack_bits(obj->mm.mapping, &has_type); @@ -471,7 +459,7 @@ int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb) else iosys_map_set_vaddr(&sb->map[0], ptr); - if (fb->panic_tiling) + if (panic_tiling) sb->set_pixel = i915_gem_object_panic_map_set_pixel; return 0; } @@ -486,10 +474,8 @@ int i915_gem_object_panic_setup(struct drm_scanout_buffer *sb) return -EOPNOTSUPP; } -void i915_gem_object_panic_finish(struct intel_framebuffer *fb) +void i915_gem_object_panic_finish(struct intel_panic *panic) { - struct i915_panic_data *panic = to_i915_panic_data(fb); - i915_panic_kunmap(panic); panic->page = -1; kfree(panic->pages); @@ -779,7 +765,7 @@ __i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); sg = i915_gem_object_get_sg(obj, n, &offset); - return nth_page(sg_page(sg), offset); + return sg_page(sg) + offset; } /* Like i915_gem_object_get_page(), but mark the returned page dirty */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index e3d188455f67..b9dae15c1d16 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -514,6 +514,13 @@ static int __create_shmem(struct drm_i915_private *i915, if (IS_ERR(filp)) return PTR_ERR(filp); + /* + * Prevent -EFBIG by allowing large writes beyond MAX_NON_LFS on shmem + * objects by setting O_LARGEFILE. + */ + if (force_o_largefile()) + filp->f_flags |= O_LARGEFILE; + obj->filp = filp; return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b81e67504bbe..7a3e74a6676e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -170,7 +170,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, * Also note that although these lists do not hold a reference to * the object we can safely grab one here: The final object * unreferencing and the bound_list are both protected by the - * dev->struct_mutex and so we won't ever be able to observe an + * i915->mm.obj_lock and so we won't ever be able to observe an * object on the bound_list with a reference count equals 0. */ for (phase = phases; phase->list; phase++) { @@ -185,7 +185,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, /* * We serialize our access to unreferenced objects through - * the use of the struct_mutex. While the objects are not + * the use of the obj_lock. While the objects are not * yet freed (due to RCU then a workqueue) we still want * to be able to shrink their pages, so they remain on * the unbound/bound list until actually freed. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 991666fd9f85..54829801d3f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -217,10 +217,10 @@ static unsigned long to_wait_timeout(s64 timeout_ns) * * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any * non-zero timeout parameter the wait ioctl will wait for the given number of - * nanoseconds on an object becoming unbusy. Since the wait itself does so - * without holding struct_mutex the object may become re-busied before this - * function completes. A similar but shorter * race condition exists in the busy - * ioctl + * nanoseconds on an object becoming unbusy. Since the wait occurs without + * holding a global or exclusive lock the object may become re-busied before + * this function completes. A similar but shorter * race condition exists + * in the busy ioctl */ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index a09e2eb47175..8f13ec4ff0d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -11,11 +11,6 @@ #include "i915_gemfs.h" #include "i915_utils.h" -static int add_param(struct fs_context *fc, const char *key, const char *val) -{ - return vfs_parse_fs_string(fc, key, val, strlen(val)); -} - void i915_gemfs_init(struct drm_i915_private *i915) { struct file_system_type *type; @@ -48,9 +43,9 @@ void i915_gemfs_init(struct drm_i915_private *i915) fc = fs_context_for_mount(type, SB_KERNMOUNT); if (IS_ERR(fc)) goto err; - ret = add_param(fc, "source", "tmpfs"); + ret = vfs_parse_fs_string(fc, "source", "tmpfs"); if (!ret) - ret = add_param(fc, "huge", "within_size"); + ret = vfs_parse_fs_string(fc, "huge", "within_size"); if (!ret) gemfs = fc_mount_longterm(fc); put_fs_context(fc); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index e747f5ed195e..539c620364e3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,7 +5,7 @@ #include "i915_selftest.h" -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "gt/intel_context.h" #include "gt/intel_engine_regs.h" #include "gt/intel_engine_user.h" @@ -122,7 +122,7 @@ static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) return false; - return HAS_DISPLAY(display); + return intel_display_device_present(display); } static bool fast_blit_ok(const struct blit_buffer *buf) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 98c7f6052069..10070ee4d74c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -14,7 +14,6 @@ #include "i915_active_types.h" #include "i915_sw_fence.h" -#include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" #include "intel_wakeref.h" diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 03baa7fa0a27..7f389cb0bde4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -106,14 +106,18 @@ * preemption, but just sampling the new tail pointer). * */ + #include <linux/interrupt.h> #include <linux/string_helpers.h> +#include "gen8_engine_cs.h" #include "i915_drv.h" +#include "i915_list_util.h" #include "i915_reg.h" +#include "i915_timer_util.h" #include "i915_trace.h" #include "i915_vgpu.h" -#include "gen8_engine_cs.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_heartbeat.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 86b5a9ba323d..c7befc5c20d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -7,6 +7,7 @@ #include "gem/i915_gem_object.h" #include "i915_drv.h" +#include "i915_list_util.h" #include "intel_engine_pm.h" #include "intel_gt_buffer_pool.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index a60822e2b5d4..c3afa321fe30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -4,6 +4,7 @@ */ #include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9ca42589da4d..bf38cc5fe872 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -341,7 +341,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) return PTR_ERR(pctx); } - GEM_BUG_ON(range_overflows_end_t(u64, + GEM_BUG_ON(range_end_overflows_t(u64, i915->dsm.stolen.start, pctx->stolen->start, U32_MAX)); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4a1675dea1c7..41b5036dc538 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,18 +9,17 @@ #include "display/intel_display_reset.h" #include "display/intel_overlay.h" - #include "gem/i915_gem_context.h" - #include "gt/intel_gt_regs.h" - #include "gt/uc/intel_gsc_fw.h" +#include "uc/intel_guc.h" #include "i915_drv.h" #include "i915_file_private.h" #include "i915_gpu_error.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_engine_pm.h" #include "intel_engine_regs.h" @@ -32,8 +31,6 @@ #include "intel_pci_config.h" #include "intel_reset.h" -#include "uc/intel_guc.h" - #define RESET_MAX_RETRIES 3 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 4f5fd393af6f..ee4eb574a219 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -20,7 +20,7 @@ struct intel_reset { * FENCE registers). * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit + * acquire a global lock to reset an engine, we need an explicit * flag to prevent two concurrent reset attempts in the same engine. * As the number of engines continues to grow, allocate the flags from * the most significant bits. diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2a6d79abf25b..8314a4b0505e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -15,18 +15,19 @@ #include "i915_irq.h" #include "i915_mitigations.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_gt.h" #include "intel_gt_irq.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" -#include "intel_engine_heartbeat.h" -#include "intel_engine_pm.h" -#include "intel_gt_print.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 006042e0b229..4da94098bd3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -10,9 +10,11 @@ #include "display/intel_display.h" #include "display/intel_display_rps.h" #include "soc/intel_dram.h" + #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 57308c4d664a..85b43f9b9d95 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -9,6 +9,7 @@ #include <linux/lockdep.h> #include "i915_active.h" +#include "i915_list_util.h" #include "i915_syncmap.h" #include "intel_timeline_types.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 69ed946a39e5..a5184f09d1de 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -3,17 +3,17 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_selftest.h" - #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gen8_engine_cs.h" #include "i915_gem_ww.h" +#include "i915_selftest.h" +#include "i915_wait_util.h" +#include "intel_context.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" -#include "intel_context.h" #include "intel_gt.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index aab2759067d2..4a81bc396b21 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -7,6 +7,7 @@ #include <linux/sysfs.h> #include "i915_drv.h" +#include "i915_timer_util.h" #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "sysfs_engines.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index d8edd7c054c8..e7444ebc373e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -10,11 +10,13 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" + +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_gsc_proxy.h" #include "intel_gsc_uc.h" #include "intel_gsc_uc_heci_cmd_submit.h" -#include "i915_drv.h" -#include "i915_reg.h" /* * GSC proxy: diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index 2fde5c360cff..9bd29be7656f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -8,6 +8,8 @@ #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_ring.h" + +#include "i915_wait_util.h" #include "intel_gsc_uc_heci_cmd_submit.h" struct gsc_heci_pkt { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f360f020d8f1..52ec4421a211 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -8,15 +8,17 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "gt/intel_gt_regs.h" + +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_slpc.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_irq.h" -#include "i915_reg.h" /** * DOC: GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 380a11c92d63..3e7e5badcc2b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -5,11 +5,12 @@ #include <linux/circ_buf.h> #include <linux/ktime.h> -#include <linux/time64.h> #include <linux/string_helpers.h> +#include <linux/time64.h> #include <linux/timekeeping.h> #include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_guc_ct.h" #include "intel_guc_print.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 384d1400134d..b1bda1b84f0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -13,9 +13,11 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" + +#include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_guc_fw.h" #include "intel_guc_print.h" -#include "i915_drv.h" static void guc_prepare_xfer(struct intel_gt *gt) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 09a64f224c49..cdff48920ee6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -6,6 +6,8 @@ #include <linux/debugfs.h> #include <linux/string_helpers.h> +#include <drm/drm_managed.h> + #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_irq.h" @@ -511,7 +513,11 @@ static void guc_log_relay_unmap(struct intel_guc_log *log) void intel_guc_log_init_early(struct intel_guc_log *log) { - mutex_init(&log->relay.lock); + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_i915(guc); + + drmm_mutex_init(&i915->drm, &log->relay.lock); + drmm_mutex_init(&i915->drm, &log->guc_lock); INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); log->relay.started = false; } @@ -677,7 +683,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX) return -EINVAL; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&log->guc_lock); if (log->level == level) goto out_unlock; @@ -695,7 +701,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) log->level = level; out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&log->guc_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 02127703be80..13cb93ad0710 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -42,6 +42,14 @@ enum { struct intel_guc_log { u32 level; + /* + * Protects concurrent access and modification of intel_guc_log->level. + * + * This lock replaces the legacy struct_mutex usage in + * intel_guc_log system. + */ + struct mutex guc_lock; + /* Allocation settings */ struct { s32 bytes; /* Size in bytes */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index d5ee6e5e1443..fa9af08f9708 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -3,17 +3,20 @@ * Copyright © 2021 Intel Corporation */ -#include <drm/drm_cache.h> #include <linux/string_helpers.h> +#include <drm/drm_cache.h> + +#include "gt/intel_gt.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_reg.h" -#include "intel_guc_slpc.h" +#include "i915_wait_util.h" #include "intel_guc_print.h" +#include "intel_guc_slpc.h" #include "intel_mchbar_regs.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_regs.h" -#include "gt/intel_rps.h" /** * DOC: SLPC - Dynamic Frequency management diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 127316d2c8aa..68f2b8d363ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -25,16 +25,16 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_trace.h" +#include "i915_wait_util.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_reg.h" -#include "i915_irq.h" -#include "i915_trace.h" - /** * DOC: GuC-based command submission * diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index a91e23c22ea1..d432fdd69833 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1921,7 +1921,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; - bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + bb->ppgtt = s->buf_addr_type != GTT_BUFFER; /* * The start_offset stores the batch buffer's start gma's diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 2f7208843367..0b810baad20a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -33,14 +33,16 @@ * */ -#include "i915_drv.h" -#include "i915_reg.h" #include "gt/intel_context.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_ring.h" + #include "gvt.h" +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "trace.h" #define GEN9_MOCS_SIZE 64 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 23fa098c4479..c2e38d4bcd01 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -26,11 +26,11 @@ * */ +#include <linux/debugfs.h> #include <linux/sched/mm.h> #include <linux/sort.h> #include <linux/string_helpers.h> -#include <linux/debugfs.h> #include <drm/drm_debugfs.h> #include "gem/i915_gem_context.h" @@ -54,6 +54,7 @@ #include "i915_irq.h" #include "i915_reg.h" #include "i915_scheduler.h" +#include "i915_wait_util.h" #include "intel_mchbar_regs.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 70f042ce8705..a28c3710c4d5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -51,13 +51,15 @@ #include "display/intel_bw.h" #include "display/intel_cdclk.h" #include "display/intel_crtc.h" -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "display/intel_display_driver.h" +#include "display/intel_display_power.h" #include "display/intel_dmc.h" #include "display/intel_dp.h" #include "display/intel_dpt.h" #include "display/intel_encoder.h" #include "display/intel_fbdev.h" +#include "display/intel_gmbus.h" #include "display/intel_hotplug.h" #include "display/intel_opregion.h" #include "display/intel_overlay.h" @@ -977,7 +979,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_power_domains_disable(display); drm_client_dev_suspend(&i915->drm, false); - if (HAS_DISPLAY(display)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&i915->drm); intel_display_driver_disable_user_access(display); @@ -989,7 +991,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_irq_suspend(i915); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1060,7 +1062,7 @@ static int i915_drm_suspend(struct drm_device *dev) * properly. */ intel_power_domains_disable(display); drm_client_dev_suspend(dev, false); - if (HAS_DISPLAY(display)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(dev); intel_display_driver_disable_user_access(display); } @@ -1072,7 +1074,7 @@ static int i915_drm_suspend(struct drm_device *dev) intel_irq_suspend(dev_priv); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1219,7 +1221,7 @@ static int i915_drm_resume(struct drm_device *dev) */ intel_irq_resume(dev_priv); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) drm_mode_config_reset(dev); i915_gem_resume(dev_priv); @@ -1228,14 +1230,14 @@ static int i915_drm_resume(struct drm_device *dev) intel_clock_gating_init(dev_priv); - if (HAS_DISPLAY(display)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); intel_display_driver_resume(display); - if (HAS_DISPLAY(display)) { + if (intel_display_device_present(display)) { intel_display_driver_enable_user_access(display); drm_kms_helper_poll_enable(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2f3965feada1..6a768aad8edd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -114,8 +114,7 @@ struct i915_gem_mm { struct intel_memory_region *stolen_region; /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; - /** Protects the usage of the GTT stolen memory allocator. This is - * always the inner lock when overlapping with struct_mutex. */ + /** Protects the usage of the GTT stolen memory allocator */ struct mutex stolen_lock; /* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */ @@ -222,6 +221,9 @@ struct drm_i915_private { bool irqs_enabled; + /* LPT/WPT IOSF sideband protection */ + struct mutex sbi_lock; + /* VLV/CHV IOSF sideband */ struct { struct mutex lock; /* protect sideband access */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8c8d43451f35..e14a0c3db999 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -847,8 +847,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) /* * Only called during RPM suspend. All users of the userfault_list * must be holding an RPM wakeref to ensure that this can not - * run concurrently with themselves (and use the struct_mutex for - * protection between themselves). + * run concurrently with themselves. */ list_for_each_entry_safe(obj, on, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a5fa40ab5de2..8d5da222a187 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -163,11 +163,6 @@ static void ivb_parity_work(struct work_struct *work) u32 misccpctl; u8 slice = 0; - /* We must turn off DOP level clock gating to access the L3 registers. - * In order to prevent a get/put style interface, acquire struct mutex - * any time we access those registers. - */ - mutex_lock(&dev_priv->drm.struct_mutex); /* If we've screwed up tracking, just let the interrupt fire again */ if (drm_WARN_ON(&dev_priv->drm, !dev_priv->l3_parity.which_slice)) @@ -225,7 +220,6 @@ out: gen5_gt_enable_irq(gt, GT_PARITY_ERROR(dev_priv)); spin_unlock_irq(gt->irq_lock); - mutex_unlock(&dev_priv->drm.struct_mutex); } static irqreturn_t valleyview_irq_handler(int irq, void *arg) diff --git a/drivers/gpu/drm/i915/i915_list_util.h b/drivers/gpu/drm/i915/i915_list_util.h new file mode 100644 index 000000000000..4e515dc8a3e0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_list_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_LIST_UTIL_H__ +#define __I915_LIST_UTIL_H__ + +#include <linux/list.h> +#include <asm/rwonce.h> + +static inline void __list_del_many(struct list_head *head, + struct list_head *first) +{ + first->prev = head; + WRITE_ONCE(head->next, first); +} + +static inline int list_is_last_rcu(const struct list_head *list, + const struct list_head *head) +{ + return READ_ONCE(list->next) == head; +} + +#endif /* __I915_LIST_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_ptr_util.h b/drivers/gpu/drm/i915/i915_ptr_util.h new file mode 100644 index 000000000000..9f8931d7d99b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ptr_util.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_PTR_UTIL_H__ +#define __I915_PTR_UTIL_H__ + +#include <linux/types.h> + +#define ptr_mask_bits(ptr, n) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v & -BIT(n)); \ +}) + +#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) + +#define ptr_unpack_bits(ptr, bits, n) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + *(bits) = __v & (BIT(n) - 1); \ + (typeof(ptr))(__v & -BIT(n)); \ +}) + +#define ptr_pack_bits(ptr, bits, n) ({ \ + unsigned long __bits = (bits); \ + GEM_BUG_ON(__bits & -BIT(n)); \ + ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ +}) + +#define ptr_dec(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v - 1); \ +}) + +#define ptr_inc(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v + 1); \ +}) + +#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) +#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) +#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) +#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) + +static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) +{ + return a - b; +} + +#define u64_to_ptr(T, x) ({ \ + typecheck(u64, x); \ + (T *)(uintptr_t)(x); \ +}) + +/* + * container_of_user: Extract the superclass from a pointer to a member. + * + * Exactly like container_of() with the exception that it plays nicely + * with sparse for __user @ptr. + */ +#define container_of_user(ptr, type, member) ({ \ + void __user *__mptr = (void __user *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type __user *)(__mptr - offsetof(type, member))); }) + +#endif /* __I915_PTR_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 5f7e8138ec14..b09135301f39 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -31,19 +31,20 @@ #include <linux/llist.h> #include <linux/lockdep.h> +#include <uapi/drm/i915_drm.h> + #include "gem/i915_gem_context_types.h" #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "gt/intel_timeline_types.h" #include "i915_gem.h" +#include "i915_ptr_util.h" #include "i915_scheduler.h" #include "i915_selftest.h" #include "i915_sw_fence.h" #include "i915_vma_resource.h" -#include <uapi/drm/i915_drm.h> - struct drm_file; struct drm_i915_gem_object; struct drm_printer; diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c index 3a95a55b2e87..d5b6d8ab31a2 100644 --- a/drivers/gpu/drm/i915/i915_switcheroo.c +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -5,7 +5,7 @@ #include <linux/vga_switcheroo.h> -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "i915_driver.h" #include "i915_drv.h" @@ -22,7 +22,7 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); return; } - if (!HAS_DISPLAY(display)) { + if (!intel_display_device_present(display)) { dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n"); return; } @@ -52,7 +52,8 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev) * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. */ - return i915 && HAS_DISPLAY(display) && atomic_read(&i915->drm.open_count) == 0; + return i915 && intel_display_device_present(display) && + atomic_read(&i915->drm.open_count) == 0; } static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { diff --git a/drivers/gpu/drm/i915/i915_timer_util.c b/drivers/gpu/drm/i915/i915_timer_util.c new file mode 100644 index 000000000000..ee4cfd8b3c07 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timer_util.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <linux/jiffies.h> + +#include "i915_timer_util.h" + +void cancel_timer(struct timer_list *t) +{ + if (!timer_active(t)) + return; + + timer_delete(t); + WRITE_ONCE(t->expires, 0); +} + +void set_timer_ms(struct timer_list *t, unsigned long timeout) +{ + if (!timeout) { + cancel_timer(t); + return; + } + + timeout = msecs_to_jiffies(timeout); + + /* + * Paranoia to make sure the compiler computes the timeout before + * loading 'jiffies' as jiffies is volatile and may be updated in + * the background by a timer tick. All to reduce the complexity + * of the addition and reduce the risk of losing a jiffy. + */ + barrier(); + + /* Keep t->expires = 0 reserved to indicate a canceled timer. */ + mod_timer(t, jiffies + timeout ?: 1); +} diff --git a/drivers/gpu/drm/i915/i915_timer_util.h b/drivers/gpu/drm/i915/i915_timer_util.h new file mode 100644 index 000000000000..f35ad730820c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timer_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_TIMER_UTIL_H__ +#define __I915_TIMER_UTIL_H__ + +#include <linux/timer.h> +#include <asm/rwonce.h> + +void cancel_timer(struct timer_list *t); +void set_timer_ms(struct timer_list *t, unsigned long timeout); + +static inline bool timer_active(const struct timer_list *t) +{ + return READ_ONCE(t->expires); +} + +static inline bool timer_expired(const struct timer_list *t) +{ + return timer_active(t) && !timer_pending(t); +} + +#endif /* __I915_TIMER_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index b60c28fbd207..49f7ed413132 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -47,36 +47,6 @@ bool i915_error_injected(void) #endif -void cancel_timer(struct timer_list *t) -{ - if (!timer_active(t)) - return; - - timer_delete(t); - WRITE_ONCE(t->expires, 0); -} - -void set_timer_ms(struct timer_list *t, unsigned long timeout) -{ - if (!timeout) { - cancel_timer(t); - return; - } - - timeout = msecs_to_jiffies(timeout); - - /* - * Paranoia to make sure the compiler computes the timeout before - * loading 'jiffies' as jiffies is volatile and may be updated in - * the background by a timer tick. All to reduce the complexity - * of the addition and reduce the risk of losing a jiffy. - */ - barrier(); - - /* Keep t->expires = 0 reserved to indicate a canceled timer. */ - mod_timer(t, jiffies + timeout ?: 1); -} - bool i915_vtd_active(struct drm_i915_private *i915) { if (device_iommu_mapped(i915->drm.dev)) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 9cb40c2c4b12..a0c892e4c40d 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,7 +25,6 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H -#include <linux/list.h> #include <linux/overflow.h> #include <linux/sched.h> #include <linux/string_helpers.h> @@ -38,7 +37,6 @@ #endif struct drm_i915_private; -struct timer_list; #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) @@ -67,88 +65,12 @@ bool i915_error_injected(void); drm_err(&(i915)->drm, fmt, ##__VA_ARGS__); \ }) -#define range_overflows(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ >= max__ || size__ > max__ - start__; \ -}) - -#define range_overflows_t(type, start, size, max) \ - range_overflows((type)(start), (type)(size), (type)(max)) - -#define range_overflows_end(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ > max__ || size__ > max__ - start__; \ -}) - -#define range_overflows_end_t(type, start, size, max) \ - range_overflows_end((type)(start), (type)(size), (type)(max)) - -#define ptr_mask_bits(ptr, n) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v & -BIT(n)); \ -}) - -#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) - -#define ptr_unpack_bits(ptr, bits, n) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - *(bits) = __v & (BIT(n) - 1); \ - (typeof(ptr))(__v & -BIT(n)); \ -}) - -#define ptr_pack_bits(ptr, bits, n) ({ \ - unsigned long __bits = (bits); \ - GEM_BUG_ON(__bits & -BIT(n)); \ - ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ -}) - -#define ptr_dec(ptr) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v - 1); \ -}) - -#define ptr_inc(ptr) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v + 1); \ -}) - -#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) -#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) -#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) -#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) - #define fetch_and_zero(ptr) ({ \ typeof(*ptr) __T = *(ptr); \ *(ptr) = (typeof(*ptr))0; \ __T; \ }) -static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) -{ - return a - b; -} - -/* - * container_of_user: Extract the superclass from a pointer to a member. - * - * Exactly like container_of() with the exception that it plays nicely - * with sparse for __user @ptr. - */ -#define container_of_user(ptr, type, member) ({ \ - void __user *__mptr = (void __user *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - ((type __user *)(__mptr - offsetof(type, member))); }) - /* * check_user_mbz: Check that a user value exists and is zero * @@ -167,11 +89,6 @@ static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0; \ }) -#define u64_to_ptr(T, x) ({ \ - typecheck(u64, x); \ - (T *)(uintptr_t)(x); \ -}) - #define __mask_next_bit(mask) ({ \ int __idx = ffs(mask) - 1; \ mask &= ~BIT(__idx); \ @@ -183,19 +100,6 @@ static inline bool is_power_of_2_u64(u64 n) return (n != 0 && ((n & (n - 1)) == 0)); } -static inline void __list_del_many(struct list_head *head, - struct list_head *first) -{ - first->prev = head; - WRITE_ONCE(head->next, first); -} - -static inline int list_is_last_rcu(const struct list_head *list, - const struct list_head *head) -{ - return READ_ONCE(list->next) == head; -} - static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) { unsigned long j = msecs_to_jiffies(m); @@ -230,112 +134,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } } -/* - * __wait_for - magic wait macro - * - * Macro to help avoid open coding check/wait/timeout patterns. Note that it's - * important that we check the condition again after having timed out, since the - * timeout could be due to preemption or similar and we've never had a chance to - * check the condition before the timeout. - */ -#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ - const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ - long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ - int ret__; \ - might_sleep(); \ - for (;;) { \ - const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret__ = 0; \ - break; \ - } \ - if (expired__) { \ - ret__ = -ETIMEDOUT; \ - break; \ - } \ - usleep_range(wait__, wait__ * 2); \ - if (wait__ < (Wmax)) \ - wait__ <<= 1; \ - } \ - ret__; \ -}) - -#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ - (Wmax)) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) - -/* - * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. - * On PREEMPT_RT the context isn't becoming atomic because it is used in an - * interrupt handler or because a spinlock_t is acquired. This leads to - * warnings which don't occur otherwise and therefore the check is disabled. - */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) -#else -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) -#endif - -#define _wait_for_atomic(COND, US, ATOMIC) \ -({ \ - int cpu, ret, timeout = (US) * 1000; \ - u64 base; \ - _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - cpu = smp_processor_id(); \ - } \ - base = local_clock(); \ - for (;;) { \ - u64 now = local_clock(); \ - if (!(ATOMIC)) \ - preempt_enable(); \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret = 0; \ - break; \ - } \ - if (now - base >= timeout) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - cpu_relax(); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - if (unlikely(cpu != smp_processor_id())) { \ - timeout -= now - base; \ - cpu = smp_processor_id(); \ - base = local_clock(); \ - } \ - } \ - } \ - ret; \ -}) - -#define wait_for_us(COND, US) \ -({ \ - int ret__; \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - if ((US) > 10) \ - ret__ = _wait_for((COND), (US), 10, 10); \ - else \ - ret__ = _wait_for_atomic((COND), (US), 0); \ - ret__; \ -}) - -#define wait_for_atomic_us(COND, US) \ -({ \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - BUILD_BUG_ON((US) > 50000); \ - _wait_for_atomic((COND), (US), 1); \ -}) - -#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) - #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) @@ -351,19 +149,6 @@ static inline void __add_taint_for_CI(unsigned int taint) add_taint(taint, LOCKDEP_STILL_OK); } -void cancel_timer(struct timer_list *t); -void set_timer_ms(struct timer_list *t, unsigned long timeout); - -static inline bool timer_active(const struct timer_list *t) -{ - return READ_ONCE(t->expires); -} - -static inline bool timer_expired(const struct timer_list *t) -{ - return timer_active(t) && !timer_pending(t); -} - static inline bool i915_run_as_guest(void) { #if IS_ENABLED(CONFIG_X86) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 0f9eee6d18d2..8054047840aa 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -30,12 +30,12 @@ #include <drm/drm_mm.h> -#include "gt/intel_ggtt_fencing.h" #include "gem/i915_gem_object.h" - -#include "i915_gem_gtt.h" +#include "gt/intel_ggtt_fencing.h" #include "i915_active.h" +#include "i915_gem_gtt.h" +#include "i915_ptr_util.h" #include "i915_request.h" #include "i915_vma_resource.h" #include "i915_vma_types.h" diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h new file mode 100644 index 000000000000..7376898e3bf8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_wait_util.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_WAIT_UTIL_H__ +#define __I915_WAIT_UTIL_H__ + +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/ktime.h> +#include <linux/sched/clock.h> +#include <linux/smp.h> + +/* + * __wait_for - magic wait macro + * + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's + * important that we check the condition again after having timed out, since the + * timeout could be due to preemption or similar and we've never had a chance to + * check the condition before the timeout. + */ +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ + int ret__; \ + might_sleep(); \ + for (;;) { \ + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ + OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret__ = 0; \ + break; \ + } \ + if (expired__) { \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + usleep_range(wait__, wait__ * 2); \ + if (wait__ < (Wmax)) \ + wait__ <<= 1; \ + } \ + ret__; \ +}) + +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ + (Wmax)) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + +/* + * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. + * On PREEMPT_RT the context isn't becoming atomic because it is used in an + * interrupt handler or because a spinlock_t is acquired. This leads to + * warnings which don't occur otherwise and therefore the check is disabled. + */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) +#else +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +#endif + +#define _wait_for_atomic(COND, US, ATOMIC) \ +({ \ + int cpu, ret, timeout = (US) * 1000; \ + u64 base; \ + _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + cpu = smp_processor_id(); \ + } \ + base = local_clock(); \ + for (;;) { \ + u64 now = local_clock(); \ + if (!(ATOMIC)) \ + preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret = 0; \ + break; \ + } \ + if (now - base >= timeout) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + cpu_relax(); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + if (unlikely(cpu != smp_processor_id())) { \ + timeout -= now - base; \ + cpu = smp_processor_id(); \ + base = local_clock(); \ + } \ + } \ + } \ + ret; \ +}) + +#define wait_for_us(COND, US) \ +({ \ + int ret__; \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + if ((US) > 10) \ + ret__ = _wait_for((COND), (US), 10, 10); \ + else \ + ret__ = _wait_for_atomic((COND), (US), 0); \ + ret__; \ +}) + +#define wait_for_atomic_us(COND, US) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + BUILD_BUG_ON((US) > 50000); \ + _wait_for_atomic((COND), (US), 1); \ +}) + +#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) + +#endif /* __I915_WAIT_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/intel_pcode.c b/drivers/gpu/drm/i915/intel_pcode.c index 81da75108c60..55ffedad2490 100644 --- a/drivers/gpu/drm/i915/intel_pcode.c +++ b/drivers/gpu/drm/i915/intel_pcode.c @@ -5,6 +5,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_pcode.h" static int gen6_check_mailbox_status(u32 mbox) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4ccba7c8ffb3..8cb59f8d1f4c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -21,19 +21,20 @@ * IN THE SOFTWARE. */ -#include <drm/drm_managed.h> #include <linux/pm_runtime.h> -#include "display/intel_display_core.h" +#include <drm/drm_managed.h> -#include "gt/intel_gt.h" +#include "display/intel_display_core.h" #include "gt/intel_engine_regs.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_regs.h" #include "i915_drv.h" #include "i915_iosf_mbi.h" #include "i915_reg.h" #include "i915_vgpu.h" +#include "i915_wait_util.h" #include "intel_uncore_trace.h" #define FORCEWAKE_ACK_TIMEOUT_MS 50 diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index f8da693ad3ce..27d545c4e6a5 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -2,15 +2,15 @@ /* * Copyright(c) 2020 Intel Corporation. */ + #include <linux/workqueue.h> #include "gem/i915_gem_context.h" - #include "gt/intel_context.h" #include "gt/intel_gt.h" #include "i915_drv.h" - +#include "i915_wait_util.h" #include "intel_pxp.h" #include "intel_pxp_gsccs.h" #include "intel_pxp_irq.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 2fb7a9e7efec..48cd617247d1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -22,14 +22,13 @@ * */ -#include <linux/prime_numbers.h> #include <linux/pm_qos.h> +#include <linux/prime_numbers.h> #include <linux/sort.h> #include "gem/i915_gem_internal.h" #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" - #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" @@ -40,11 +39,11 @@ #include "i915_random.h" #include "i915_selftest.h" +#include "i915_wait_util.h" #include "igt_flush_test.h" #include "igt_live_test.h" #include "igt_spinner.h" #include "lib_sw_fence.h" - #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 889281819c5b..9c276c9d0a75 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -31,7 +31,7 @@ #include "i915_driver.h" #include "i915_drv.h" #include "i915_selftest.h" - +#include "i915_wait_util.h" #include "igt_flush_test.h" struct i915_selftest i915_selftest __read_mostly = { diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 8c3e1f20e5a1..820364171ebe 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -3,12 +3,13 @@ * * Copyright © 2018 Intel Corporation */ -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" #include "gem/i915_gem_internal.h" #include "gem/selftests/igt_gem_utils.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt.h" +#include "i915_wait_util.h" #include "igt_spinner.h" int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 149527827624..edffaed8f9a7 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -732,7 +732,7 @@ int intel_dram_detect(struct drm_i915_private *i915) struct dram_info *dram_info; int ret; - if (IS_DG2(i915) || !HAS_DISPLAY(display)) + if (IS_DG2(i915) || !intel_display_device_present(display)) return 0; dram_info = drmm_kzalloc(&i915->drm, sizeof(*dram_info), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/vlv_suspend.c b/drivers/gpu/drm/i915/vlv_suspend.c index fc9f311ea1db..221e4c0b2c58 100644 --- a/drivers/gpu/drm/i915/vlv_suspend.c +++ b/drivers/gpu/drm/i915/vlv_suspend.c @@ -8,16 +8,17 @@ #include <drm/drm_print.h> +#include "gt/intel_gt_regs.h" + #include "i915_drv.h" #include "i915_reg.h" #include "i915_trace.h" #include "i915_utils.h" +#include "i915_wait_util.h" #include "intel_clock_gating.h" #include "intel_uncore_trace.h" #include "vlv_suspend.h" -#include "gt/intel_gt_regs.h" - struct vlv_s0ix_state { /* GAM */ u32 wr_watermark; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c index 00e1afd46b81..44df6410bce1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_catalog.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_catalog.c @@ -913,6 +913,11 @@ static const struct adreno_info a6xx_gpus[] = { { /* sentinel */ }, }, }, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 185, 0 }, + { 127, 1 }, + ), }, { .chip_ids = ADRENO_CHIP_IDS( 0x06030001, @@ -1024,6 +1029,11 @@ static const struct adreno_info a6xx_gpus[] = { .gmu_cgc_mode = 0x00020200, .prim_fifo_threshold = 0x00300200, }, + .speedbins = ADRENO_SPEEDBINS( + { 0, 0 }, + { 169, 0 }, + { 113, 1 }, + ), }, { .chip_ids = ADRENO_CHIP_IDS(0x06030500), .family = ADRENO_6XX_GEN4, @@ -1343,6 +1353,69 @@ static const uint32_t a7xx_pwrup_reglist_regs[] = { DECLARE_ADRENO_REGLIST_LIST(a7xx_pwrup_reglist); +/* Applicable for X185, A750 */ +static const u32 a750_ifpc_reglist_regs[] = { + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), + REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), + REG_A6XX_TPL1_NC_MODE_CNTL, + REG_A6XX_SP_NC_MODE_CNTL, + REG_A6XX_CP_DBG_ECO_CNTL, + REG_A6XX_CP_PROTECT_CNTL, + REG_A6XX_CP_PROTECT(0), + REG_A6XX_CP_PROTECT(1), + REG_A6XX_CP_PROTECT(2), + REG_A6XX_CP_PROTECT(3), + REG_A6XX_CP_PROTECT(4), + REG_A6XX_CP_PROTECT(5), + REG_A6XX_CP_PROTECT(6), + REG_A6XX_CP_PROTECT(7), + REG_A6XX_CP_PROTECT(8), + REG_A6XX_CP_PROTECT(9), + REG_A6XX_CP_PROTECT(10), + REG_A6XX_CP_PROTECT(11), + REG_A6XX_CP_PROTECT(12), + REG_A6XX_CP_PROTECT(13), + REG_A6XX_CP_PROTECT(14), + REG_A6XX_CP_PROTECT(15), + REG_A6XX_CP_PROTECT(16), + REG_A6XX_CP_PROTECT(17), + REG_A6XX_CP_PROTECT(18), + REG_A6XX_CP_PROTECT(19), + REG_A6XX_CP_PROTECT(20), + REG_A6XX_CP_PROTECT(21), + REG_A6XX_CP_PROTECT(22), + REG_A6XX_CP_PROTECT(23), + REG_A6XX_CP_PROTECT(24), + REG_A6XX_CP_PROTECT(25), + REG_A6XX_CP_PROTECT(26), + REG_A6XX_CP_PROTECT(27), + REG_A6XX_CP_PROTECT(28), + REG_A6XX_CP_PROTECT(29), + REG_A6XX_CP_PROTECT(30), + REG_A6XX_CP_PROTECT(31), + REG_A6XX_CP_PROTECT(32), + REG_A6XX_CP_PROTECT(33), + REG_A6XX_CP_PROTECT(34), + REG_A6XX_CP_PROTECT(35), + REG_A6XX_CP_PROTECT(36), + REG_A6XX_CP_PROTECT(37), + REG_A6XX_CP_PROTECT(38), + REG_A6XX_CP_PROTECT(39), + REG_A6XX_CP_PROTECT(40), + REG_A6XX_CP_PROTECT(41), + REG_A6XX_CP_PROTECT(42), + REG_A6XX_CP_PROTECT(43), + REG_A6XX_CP_PROTECT(44), + REG_A6XX_CP_PROTECT(45), + REG_A6XX_CP_PROTECT(46), + REG_A6XX_CP_PROTECT(47), +}; + +DECLARE_ADRENO_REGLIST_LIST(a750_ifpc_reglist); + static const struct adreno_info a7xx_gpus[] = { { .chip_ids = ADRENO_CHIP_IDS(0x07000200), @@ -1432,14 +1505,27 @@ static const struct adreno_info a7xx_gpus[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | ADRENO_QUIRK_HAS_HW_APRIV | - ADRENO_QUIRK_PREEMPTION, + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, .init = a6xx_gpu_init, .a6xx = &(const struct a6xx_info) { .hwcg = a740_hwcg, .protect = &a730_protect, .pwrup_reglist = &a7xx_pwrup_reglist, + .ifpc_reglist = &a750_ifpc_reglist, .gmu_chipid = 0x7050001, .gmu_cgc_mode = 0x00020202, + .bcms = (const struct a6xx_bcm[]) { + { .name = "SH0", .buswidth = 16 }, + { .name = "MC0", .buswidth = 4 }, + { + .name = "ACV", + .fixed = true, + .perfmode = BIT(3), + .perfmode_bw = 16500000, + }, + { /* sentinel */ }, + }, }, .preempt_record_size = 4192 * SZ_1K, .speedbins = ADRENO_SPEEDBINS( @@ -1460,12 +1546,14 @@ static const struct adreno_info a7xx_gpus[] = { .inactive_period = DRM_MSM_INACTIVE_PERIOD, .quirks = ADRENO_QUIRK_HAS_CACHED_COHERENT | ADRENO_QUIRK_HAS_HW_APRIV | - ADRENO_QUIRK_PREEMPTION, + ADRENO_QUIRK_PREEMPTION | + ADRENO_QUIRK_IFPC, .init = a6xx_gpu_init, .zapfw = "gen70900_zap.mbn", .a6xx = &(const struct a6xx_info) { .protect = &a730_protect, .pwrup_reglist = &a7xx_pwrup_reglist, + .ifpc_reglist = &a750_ifpc_reglist, .gmu_chipid = 0x7090100, .gmu_cgc_mode = 0x00020202, .bcms = (const struct a6xx_bcm[]) { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 28e6705c6da6..fc62fef2fed8 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -93,14 +93,25 @@ bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu) /* Check to see if the GX rail is still powered */ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) { + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; u32 val; /* This can be called from gpu state code so make sure GMU is valid */ if (!gmu->initialized) return false; + /* If GMU is absent, then GX power domain is ON as long as GPU is in active state */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return true; + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + if (adreno_is_a7xx(adreno_gpu)) + return !(val & + (A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | + A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); + return !(val & (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); @@ -272,6 +283,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu) if (ret) DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n"); + set_bit(GMU_STATUS_FW_START, &gmu->status); + return ret; } @@ -403,7 +416,10 @@ int a6xx_sptprac_enable(struct a6xx_gmu *gmu) int ret; u32 val; - if (!gmu->legacy) + WARN_ON(!gmu->legacy); + + /* Nothing to do if GMU does the power management */ + if (gmu->idle_level > GMU_IDLE_STATE_ACTIVE) return 0; gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000); @@ -518,6 +534,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu) int ret; u32 val; + if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status)) + return 0; + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1)); ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val, @@ -545,6 +564,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) int ret; u32 val; + if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status)) + return; + gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1); ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, @@ -553,6 +575,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu) DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n"); gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0); + + set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status); } static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value) @@ -681,8 +705,6 @@ setup_pdc: /* ensure no writes happen before the uCode is fully written */ wmb(); - a6xx_rpmh_stop(gmu); - err: if (!IS_ERR_OR_NULL(pdcptr)) iounmap(pdcptr); @@ -842,19 +864,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) else gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1); - if (state == GMU_WARM_BOOT) { - ret = a6xx_rpmh_start(gmu); - if (ret) - return ret; - } else { + ret = a6xx_rpmh_start(gmu); + if (ret) + return ret; + + if (state == GMU_COLD_BOOT) { if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU], "GMU firmware is not loaded\n")) return -ENOENT; - ret = a6xx_rpmh_start(gmu); - if (ret) - return ret; - ret = a6xx_gmu_fw_load(gmu); if (ret) return ret; @@ -925,10 +943,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) ret = a6xx_gmu_gfx_rail_on(gmu); if (ret) return ret; - } - /* Enable SPTP_PC if the CPU is responsible for it */ - if (gmu->idle_level < GMU_IDLE_STATE_SPTP) { ret = a6xx_sptprac_enable(gmu); if (ret) return ret; @@ -980,6 +995,22 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu) val, (val & 1), 100, 10000); gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS + seqmem_off, val, (val & 1), 100, 1000); + + if (!adreno_is_a740_family(adreno_gpu)) + return; + + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS4_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS5_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS6_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS7_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 1000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS8_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 10000); + gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS9_DRV0_STATUS + seqmem_off, + val, (val & 1), 100, 1000); } /* Force the GMU off in case it isn't responsive */ @@ -1023,6 +1054,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) /* Reset GPU core blocks */ a6xx_gpu_sw_reset(gpu, true); + + a6xx_rpmh_stop(gmu); } static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) @@ -1128,6 +1161,11 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Set the GPU to the current freq */ a6xx_gmu_set_initial_freq(gpu, gmu); + if (refcount_read(&gpu->sysprof_active) > 1) { + ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + if (!ret) + set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status); + } out: /* On failure, shut down the GMU to leave it in a good state */ if (ret) { @@ -1175,6 +1213,9 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); } + if (test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + ret = a6xx_gmu_wait_for_idle(gmu); /* If the GMU isn't responding assume it is hung */ @@ -1318,8 +1359,6 @@ static int a6xx_gmu_memory_probe(struct drm_device *drm, struct a6xx_gmu *gmu) struct msm_mmu *mmu; mmu = msm_iommu_new(gmu->dev, 0); - if (!mmu) - return -ENODEV; if (IS_ERR(mmu)) return PTR_ERR(mmu); @@ -1692,6 +1731,7 @@ static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu) u32 val; freq = gmu->gpu_freqs[i]; + /* This is unlikely to fail because we are passing back a known freq */ opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true); np = dev_pm_opp_get_of_node(opp); @@ -1790,6 +1830,35 @@ static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev, return irq; } +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + unsigned int sysprof_active; + + /* Nothing to do if GPU is suspended. We will handle this during GMU resume */ + if (!pm_runtime_get_if_active(&gpu->pdev->dev)) + return; + + mutex_lock(&gmu->lock); + + sysprof_active = refcount_read(&gpu->sysprof_active); + + /* + * 'Perfcounter select' register values are lost during IFPC collapse. To avoid that, + * use the currently unused perfcounter oob vote to block IFPC when sysprof is active + */ + if ((sysprof_active > 1) && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + else if ((sysprof_active == 1) && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status)) + a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET); + + mutex_unlock(&gmu->lock); + + pm_runtime_put(&gpu->pdev->dev); +} + void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; @@ -1932,8 +2001,9 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (ret) return ret; - /* Fow now, don't do anything fancy until we get our feet under us */ - gmu->idle_level = GMU_IDLE_STATE_ACTIVE; + /* Set GMU idle level */ + gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ? + GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE; pm_runtime_enable(gmu->dev); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index d1ce11131ba6..06cfc294016f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -50,6 +50,9 @@ struct a6xx_bcm { /* The GMU does not do any idle state management */ #define GMU_IDLE_STATE_ACTIVE 0 +/* Unknown power state. Not exposed by the firmware. For documentation purpose only */ +#define GMU_IDLE_STATE_RESERVED 1 + /* The GMU manages SPTP power collapse */ #define GMU_IDLE_STATE_SPTP 2 @@ -117,6 +120,14 @@ struct a6xx_gmu { struct qmp *qmp; struct a6xx_hfi_msg_bw_table *bw_table; + +/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */ +#define GMU_STATUS_FW_START 0 +/* To track if PDC sleep seq was done */ +#define GMU_STATUS_PDC_SLEEP 1 +/* To track Perfcounter OOB set status */ +#define GMU_STATUS_OOB_PERF_SET 2 + unsigned long status; }; static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset) @@ -158,6 +169,9 @@ static inline u64 gmu_read64(struct a6xx_gmu *gmu, u32 lo, u32 hi) #define gmu_poll_timeout(gmu, addr, val, cond, interval, timeout) \ readl_poll_timeout((gmu)->mmio + ((addr) << 2), val, cond, \ interval, timeout) +#define gmu_poll_timeout_atomic(gmu, addr, val, cond, interval, timeout) \ + readl_poll_timeout_atomic((gmu)->mmio + ((addr) << 2), val, cond, \ + interval, timeout) static inline u32 gmu_read_rscc(struct a6xx_gmu *gmu, u32 offset) { diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 45dd5fd1c2bf..b8f8ae940b55 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -16,6 +16,97 @@ #define GPU_PAS_ID 13 +static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) +{ + u64 count_hi, count_lo, temp; + + do { + count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); + temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + } while (unlikely(count_hi != temp)); + + return (count_hi << 32) | count_lo; +} + +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) +{ + /* Success if !writedropped0/1 */ + if (!(status & mask)) + return true; + + udelay(10); + + /* Try to update fenced register again */ + gpu_write(gpu, offset, value); + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + return false; +} + +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + gpu_write(gpu, offset, value); + + /* Nothing else to be done in the case of no-GMU */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) + return 0; + + /* Try again for another 1ms before failing */ + gpu_write(gpu, offset, value); + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { + /* + * The 'delay' warning is here because the pause to print this + * warning will allow gpu to move to power collapse which + * defeats the purpose of continuous polling for 2 ms + */ + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", + offset); + return 0; + } + + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", + offset); + + return -ETIMEDOUT; +} + +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) +{ + int ret; + + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); + if (ret) + return ret; + + if (!is_64b) + return 0; + + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); + + return ret; +} + static inline bool _a6xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring) - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); else ring->restore_wptr = true; } else { @@ -173,8 +264,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, * Needed for preemption */ OUT_PKT7(ring, CP_MEM_WRITE, 5); - OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); - OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); OUT_RING(ring, upper_32_bits(ttbr)); OUT_RING(ring, ctx->seqno); @@ -204,9 +295,9 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, */ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); - OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); - OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); @@ -298,8 +389,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); } @@ -499,8 +589,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) } - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); @@ -739,11 +828,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) u32 *dest = (u32 *)&lock->regs[0]; int i; - reglist = adreno_gpu->info->a6xx->pwrup_reglist; - lock->gpu_req = lock->cpu_req = lock->turn = 0; - lock->ifpc_list_len = 0; - lock->preemption_list_len = reglist->count; + + reglist = adreno_gpu->info->a6xx->ifpc_reglist; + lock->ifpc_list_len = reglist->count; /* * For each entry in each of the lists, write the offset and the current @@ -754,6 +842,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) *dest++ = gpu_read(gpu, reglist->regs[i]); } + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + lock->preemption_list_len = reglist->count; + + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + /* * The overall register list is composed of * 1. Static IFPC-only registers @@ -1241,14 +1337,14 @@ static int hw_init(struct msm_gpu *gpu) /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); } @@ -1448,21 +1544,25 @@ static void a6xx_recover(struct msm_gpu *gpu) adreno_dump_info(gpu); - for (i = 0; i < 8; i++) - DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { + /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + + for (i = 0; i < 8; i++) + DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, + gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); - if (hang_debug) - a6xx_dump(gpu); + if (hang_debug) + a6xx_dump(gpu); + + } /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ - a6xx_gpu->hung = true; - /* Halt SQE first */ - gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + a6xx_gpu->hung = true; pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); @@ -1693,8 +1793,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) static void a6xx_fault_detect_irq(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); /* @@ -1706,13 +1804,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) return; - /* - * Force the GPU to stay on until after we finish - * collecting information - */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); - DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, @@ -1727,6 +1818,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); + /* Turn off interrupts to avoid triggering recovery again */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); + kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1751,9 +1845,49 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) } } +static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); +} + +static int irq_poll_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { + u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", + status, rbbm_unmasked); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; + + /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ + a6xx_gpu_keepalive_vote(gpu, true); + + if (irq_poll_fence(gpu)) + goto done; + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); @@ -1790,6 +1924,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_CP_SW) a6xx_preempt_irq(gpu); +done: + a6xx_gpu_keepalive_vote(gpu, false); + return IRQ_HANDLED; } @@ -2179,16 +2316,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - mutex_lock(&a6xx_gpu->gmu.lock); - - /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); - - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - mutex_unlock(&a6xx_gpu->gmu.lock); + *value = read_gmu_ao_counter(a6xx_gpu); return 0; } @@ -2298,18 +2426,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) return a6xx_gpu->shadow[ring->id]; + /* + * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware + * without 'whereami' support + */ + WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), + "Can't read CP_RB_RPTR register reliably\n"); + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { - struct msm_cp_state cp_state = { + struct msm_cp_state cp_state; + bool progress; + + /* + * With IFPC, KMD doesn't know whether GX power domain is collapsed + * or not. So, we can't blindly read the below registers in GX domain. + * Lets trust the hang detection in HW and lie to the caller that + * there was progress. + */ + if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) + return true; + + cp_state = (struct msm_cp_state) { .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), }; - bool progress; /* * Adjust the remaining data to account for what has already been @@ -2408,6 +2554,7 @@ static const struct adreno_gpu_funcs funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; @@ -2468,6 +2615,7 @@ static const struct adreno_gpu_funcs funcs_a7xx = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .get_timestamp = a6xx_gmu_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 6e71f617fc3d..0b17d36c36a9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -45,6 +45,7 @@ struct a6xx_info { const struct adreno_reglist *hwcg; const struct adreno_protect *protect; const struct adreno_reglist_list *pwrup_reglist; + const struct adreno_reglist_list *ifpc_reglist; u32 gmu_chipid; u32 gmu_cgc_mode; u32 prim_fifo_threshold; @@ -254,6 +255,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); +void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu); void a6xx_preempt_init(struct msm_gpu *gpu); void a6xx_preempt_hw_init(struct msm_gpu *gpu); @@ -295,5 +297,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state); void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off); void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b); #endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index d5d1271fce61..4c7f3c642f6a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -1586,8 +1586,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), GFP_KERNEL); - bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & - A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); + bool stalled; if (!a6xx_state) return ERR_PTR(-ENOMEM); @@ -1608,15 +1607,20 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) } /* If GX isn't on the rest of the data isn't going to be accessible */ - if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) return &a6xx_state->base; + /* Halt SQE first */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + /* Get the banks of indexed registers */ if (adreno_is_a7xx(adreno_gpu)) a7xx_get_indexed_registers(gpu, a6xx_state); else a6xx_get_indexed_registers(gpu, a6xx_state); + stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & + A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT); /* * Try to initialize the crashdumper, if we are not dumping state * with the SMMU stalled. The crashdumper needs memory access to diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index 8e69b1e84657..550de6ad68ef 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -21,6 +21,7 @@ static const char * const a6xx_hfi_msg_id[] = { HFI_MSG_ID(HFI_H2F_MSG_PERF_TABLE), HFI_MSG_ID(HFI_H2F_MSG_TEST), HFI_MSG_ID(HFI_H2F_MSG_START), + HFI_MSG_ID(HFI_H2F_FEATURE_CTRL), HFI_MSG_ID(HFI_H2F_MSG_CORE_FW_START), HFI_MSG_ID(HFI_H2F_MSG_GX_BW_PERF_VOTE), HFI_MSG_ID(HFI_H2F_MSG_PREPARE_SLUMBER), @@ -765,23 +766,40 @@ send: NULL, 0); } +static int a6xx_hfi_feature_ctrl_msg(struct a6xx_gmu *gmu, u32 feature, u32 enable, u32 data) +{ + struct a6xx_hfi_msg_feature_ctrl msg = { + .feature = feature, + .enable = enable, + .data = data, + }; + + return a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); +} + +#define HFI_FEATURE_IFPC 9 +#define IFPC_LONG_HYST 0x1680 + +static int a6xx_hfi_enable_ifpc(struct a6xx_gmu *gmu) +{ + if (gmu->idle_level != GMU_IDLE_STATE_IFPC) + return 0; + + return a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_IFPC, 1, IFPC_LONG_HYST); +} + #define HFI_FEATURE_ACD 12 static int a6xx_hfi_enable_acd(struct a6xx_gmu *gmu) { struct a6xx_hfi_acd_table *acd_table = &gmu->acd_table; - struct a6xx_hfi_msg_feature_ctrl msg = { - .feature = HFI_FEATURE_ACD, - .enable = 1, - .data = 0, - }; int ret; if (!acd_table->enable_by_level) return 0; /* Enable ACD feature at GMU */ - ret = a6xx_hfi_send_msg(gmu, HFI_H2F_FEATURE_CTRL, &msg, sizeof(msg), NULL, 0); + ret = a6xx_hfi_feature_ctrl_msg(gmu, HFI_FEATURE_ACD, 1, 0); if (ret) { DRM_DEV_ERROR(gmu->dev, "Unable to enable ACD (%d)\n", ret); return ret; @@ -898,6 +916,10 @@ int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state) if (ret) return ret; + ret = a6xx_hfi_enable_ifpc(gmu); + if (ret) + return ret; + ret = a6xx_hfi_send_core_fw_start(gmu); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c index 6a12a35dabff..afc5f4aa3b17 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu, } /* Write the most recent wptr for the given ring into the hardware */ -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring) { unsigned long flags; uint32_t wptr; @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (ring->restore_wptr) { wptr = get_wptr(ring); - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); ring->restore_wptr = false; } @@ -111,9 +111,9 @@ static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu) postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6); postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ); - postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO( + postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO( REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS); - postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0); + postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0); postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1); postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1); postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0); @@ -136,6 +136,21 @@ static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu) a6xx_gpu->postamble_enabled = false; } +/* + * Set preemption keepalive vote. Please note that this vote is different from the one used in + * a6xx_irq() + */ +static void a6xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on); +} + void a6xx_preempt_irq(struct msm_gpu *gpu) { uint32_t status; @@ -172,10 +187,12 @@ void a6xx_preempt_irq(struct msm_gpu *gpu) set_preempt_state(a6xx_gpu, PREEMPT_FINISH); - update_wptr(gpu, a6xx_gpu->cur_ring); + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); set_preempt_state(a6xx_gpu, PREEMPT_NONE); + a6xx_preempt_keepalive_vote(gpu, false); + trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id); /* @@ -268,7 +285,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) */ if (!ring || (a6xx_gpu->cur_ring == ring)) { set_preempt_state(a6xx_gpu, PREEMPT_FINISH); - update_wptr(gpu, a6xx_gpu->cur_ring); + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); set_preempt_state(a6xx_gpu, PREEMPT_NONE); spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); return; @@ -302,13 +319,16 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) spin_unlock_irqrestore(&ring->preempt_lock, flags); - gpu_write64(gpu, - REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, - a6xx_gpu->preempt_smmu_iova[ring->id]); + /* Set the keepalive bit to keep the GPU ON until preemption is complete */ + a6xx_preempt_keepalive_vote(gpu, true); + + a6xx_fenced_write(a6xx_gpu, + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id], + BIT(1), true); - gpu_write64(gpu, + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, - a6xx_gpu->preempt_iova[ring->id]); + a6xx_gpu->preempt_iova[ring->id], BIT(1), true); a6xx_gpu->next_ring = ring; @@ -328,7 +348,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); /* Trigger the preemption */ - gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false); } static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 50945bfe9b49..28f744f3caf7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -24,6 +24,10 @@ bool disable_acd; MODULE_PARM_DESC(disable_acd, "Forcefully disable GPU ACD"); module_param_unsafe(disable_acd, bool, 0400); +static bool skip_gpu; +MODULE_PARM_DESC(no_gpu, "Disable GPU driver register (0=enable GPU driver register (default), 1=skip GPU driver register"); +module_param(skip_gpu, bool, 0400); + extern const struct adreno_gpulist a2xx_gpulist; extern const struct adreno_gpulist a3xx_gpulist; extern const struct adreno_gpulist a4xx_gpulist; @@ -184,6 +188,9 @@ bool adreno_has_gpu(struct device_node *node) uint32_t chip_id; int ret; + if (skip_gpu) + return false; + ret = find_chipid(node, &chip_id); if (ret) return false; @@ -404,10 +411,16 @@ static struct platform_driver adreno_driver = { void __init adreno_register(void) { + if (skip_gpu) + return; + platform_driver_register(&adreno_driver); } void __exit adreno_unregister(void) { + if (skip_gpu) + return; + platform_driver_unregister(&adreno_driver); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f1230465bf0d..afaa3cfefd35 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -10,7 +10,7 @@ #include <linux/interconnect.h> #include <linux/firmware/qcom/qcom_scm.h> #include <linux/kernel.h> -#include <linux/of_address.h> +#include <linux/of_reserved_mem.h> #include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/soc/qcom/mdt_loader.h> @@ -33,7 +33,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, struct device *dev = &gpu->pdev->dev; const struct firmware *fw; const char *signed_fwname = NULL; - struct device_node *np, *mem_np; + struct device_node *np; struct resource r; phys_addr_t mem_phys; ssize_t mem_size; @@ -51,18 +51,11 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, return -ENODEV; } - mem_np = of_parse_phandle(np, "memory-region", 0); - of_node_put(np); - if (!mem_np) { + ret = of_reserved_mem_region_to_resource(np, 0, &r); + if (ret) { zap_available = false; - return -EINVAL; - } - - ret = of_address_to_resource(mem_np, 0, &r); - of_node_put(mem_np); - if (ret) return ret; - + } mem_phys = r.start; /* @@ -209,9 +202,7 @@ adreno_iommu_create_vm(struct msm_gpu *gpu, u64 start, size; mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks); - if (!mmu) - return ERR_PTR(-ENODEV); - else if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return ERR_CAST(mmu); geometry = msm_iommu_get_geometry(mmu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 9dc93c247196..390fa6720d9b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -59,6 +59,7 @@ enum adreno_family { #define ADRENO_QUIRK_HAS_CACHED_COHERENT BIT(4) #define ADRENO_QUIRK_PREEMPTION BIT(5) #define ADRENO_QUIRK_4GB_VA BIT(6) +#define ADRENO_QUIRK_IFPC BIT(7) /* Helper for formating the chip_id in the way that userspace tools like * crashdec expect. diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 0fb5789c60d0..13cc658065c5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -32,6 +32,26 @@ enum dpu_perf_mode { }; /** + * dpu_core_perf_adjusted_mode_clk - Adjust given mode clock rate according to + * the perf clock factor. + * @crtc_clk_rate - Unadjusted mode clock rate + * @perf_cfg: performance configuration + */ +u64 dpu_core_perf_adjusted_mode_clk(u64 mode_clk_rate, + const struct dpu_perf_cfg *perf_cfg) +{ + u32 clk_factor; + + clk_factor = perf_cfg->clk_inefficiency_factor; + if (clk_factor) { + mode_clk_rate *= clk_factor; + do_div(mode_clk_rate, 100); + } + + return mode_clk_rate; +} + +/** * _dpu_core_perf_calc_bw() - to calculate BW per crtc * @perf_cfg: performance configuration * @crtc: pointer to a crtc @@ -75,28 +95,21 @@ static u64 _dpu_core_perf_calc_clk(const struct dpu_perf_cfg *perf_cfg, struct drm_plane *plane; struct dpu_plane_state *pstate; struct drm_display_mode *mode; - u64 crtc_clk; - u32 clk_factor; + u64 mode_clk; mode = &state->adjusted_mode; - crtc_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); + mode_clk = (u64)mode->vtotal * mode->hdisplay * drm_mode_vrefresh(mode); drm_atomic_crtc_for_each_plane(plane, crtc) { pstate = to_dpu_plane_state(plane->state); if (!pstate) continue; - crtc_clk = max(pstate->plane_clk, crtc_clk); - } - - clk_factor = perf_cfg->clk_inefficiency_factor; - if (clk_factor) { - crtc_clk *= clk_factor; - do_div(crtc_clk, 100); + mode_clk = max(pstate->plane_clk, mode_clk); } - return crtc_clk; + return dpu_core_perf_adjusted_mode_clk(mode_clk, perf_cfg); } static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h index d2f21d34e501..3740bc97422c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h @@ -54,6 +54,9 @@ struct dpu_core_perf { u32 fix_core_ab_vote; }; +u64 dpu_core_perf_adjusted_mode_clk(u64 clk_rate, + const struct dpu_perf_cfg *perf_cfg); + int dpu_core_perf_crtc_check(struct drm_crtc *crtc, struct drm_crtc_state *state); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 94912b4708fb..4b970a59deaf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -377,11 +377,10 @@ static void _dpu_crtc_setup_blend_cfg(struct dpu_crtc_mixer *mixer, static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc) { struct dpu_crtc_state *crtc_state; - int lm_idx, lm_horiz_position; + int lm_idx; crtc_state = to_dpu_crtc_state(crtc->state); - lm_horiz_position = 0; for (lm_idx = 0; lm_idx < crtc_state->num_mixers; lm_idx++) { const struct drm_rect *lm_roi = &crtc_state->lm_bounds[lm_idx]; struct dpu_hw_mixer *hw_lm = crtc_state->mixers[lm_idx].hw_lm; @@ -392,7 +391,7 @@ static void _dpu_crtc_program_lm_output_roi(struct drm_crtc *crtc) cfg.out_width = drm_rect_width(lm_roi); cfg.out_height = drm_rect_height(lm_roi); - cfg.right_mixer = lm_horiz_position++; + cfg.right_mixer = lm_idx & 0x1; cfg.flags = 0; hw_lm->ops.setup_mixer_out(hw_lm, &cfg); } @@ -1534,6 +1533,7 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) { struct dpu_kms *dpu_kms = _dpu_crtc_get_kms(crtc); + u64 adjusted_mode_clk; /* if there is no 3d_mux block we cannot merge LMs so we cannot * split the large layer into 2 LMs, filter out such modes @@ -1541,6 +1541,17 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, if (!dpu_kms->catalog->caps->has_3d_merge && mode->hdisplay > dpu_kms->catalog->caps->max_mixer_width) return MODE_BAD_HVALUE; + + adjusted_mode_clk = dpu_core_perf_adjusted_mode_clk(mode->clock, + dpu_kms->perf.perf_cfg); + + /* + * The given mode, adjusted for the perf clock factor, should not exceed + * the max core clock rate + */ + if (dpu_kms->perf.max_core_clk_rate < adjusted_mode_clk * 1000) + return MODE_CLOCK_HIGH; + /* * max crtc width is equal to the max mixer width * 2 and max height is 4K */ diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 56a5b596554d..46f348972a97 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -446,7 +446,7 @@ static void _dpu_encoder_phys_wb_handle_wbdone_timeout( static int dpu_encoder_phys_wb_wait_for_commit_done( struct dpu_encoder_phys *phys_enc) { - unsigned long ret; + int ret; struct dpu_encoder_wait_info wait_info; struct dpu_encoder_phys_wb *wb_enc = to_dpu_encoder_phys_wb(phys_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index e824cd64fd3f..6641455c4ec6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -338,7 +338,6 @@ static const struct dpu_sspp_sub_blks dpu_dma_sblk = _DMA_SBLK(); *************************************************************/ static const struct dpu_lm_sub_blks msm8998_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 7, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x50, 0x80, 0xb0, 0x230, @@ -347,7 +346,6 @@ static const struct dpu_lm_sub_blks msm8998_lm_sblk = { }; static const struct dpu_lm_sub_blks sdm845_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, @@ -356,7 +354,6 @@ static const struct dpu_lm_sub_blks sdm845_lm_sblk = { }; static const struct dpu_lm_sub_blks sc7180_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 7, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68, 0x80, 0x98, 0xb0 @@ -364,7 +361,6 @@ static const struct dpu_lm_sub_blks sc7180_lm_sblk = { }; static const struct dpu_lm_sub_blks sm8750_lm_sblk = { - .maxwidth = DEFAULT_DPU_OUTPUT_LINE_WIDTH, .maxblendstages = 11, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ /* 0x40 + n*0x30 */ @@ -374,7 +370,6 @@ static const struct dpu_lm_sub_blks sm8750_lm_sblk = { }; static const struct dpu_lm_sub_blks qcm2290_lm_sblk = { - .maxwidth = DEFAULT_DPU_LINE_WIDTH, .maxblendstages = 4, /* excluding base layer */ .blendstage_base = { /* offsets relative to mixer base */ 0x20, 0x38, 0x50, 0x68 diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h index a78bb2c334e3..f0768f54e9b3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h @@ -307,7 +307,6 @@ struct dpu_sspp_sub_blks { * @blendstage_base: Blend-stage register base offset */ struct dpu_lm_sub_blks { - u32 maxwidth; u32 maxblendstages; u32 blendstage_base[MAX_BLOCKS]; }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index a306077647c3..4e5a8ecd31f7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -1110,7 +1110,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) { struct drm_gpuvm *vm; - vm = msm_kms_init_vm(dpu_kms->dev); + vm = msm_kms_init_vm(dpu_kms->dev, dpu_kms->dev->dev->parent); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 6859e8ef6b05..f54cf0faa1c7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -922,6 +922,9 @@ static int dpu_plane_is_multirect_capable(struct dpu_hw_sspp *sspp, if (MSM_FORMAT_IS_YUV(fmt)) return false; + if (!sspp) + return true; + if (!test_bit(DPU_SSPP_SMART_DMA_V1, &sspp->cap->features) && !test_bit(DPU_SSPP_SMART_DMA_V2, &sspp->cap->features)) return false; @@ -1028,6 +1031,7 @@ static int dpu_plane_try_multirect_shared(struct dpu_plane_state *pstate, prev_pipe->multirect_mode != DPU_SSPP_MULTIRECT_NONE) return false; + /* Do not validate SSPP of current plane when it is not ready */ if (!dpu_plane_is_multirect_capable(pipe->sspp, pipe_cfg, fmt) || !dpu_plane_is_multirect_capable(prev_pipe->sspp, prev_pipe_cfg, prev_fmt)) return false; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 25382120cb1a..2c77c74fac0f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -865,6 +865,21 @@ void dpu_rm_release_all_sspp(struct dpu_global_state *global_state, ARRAY_SIZE(global_state->sspp_to_crtc_id), crtc_id); } +static char *dpu_hw_blk_type_name[] = { + [DPU_HW_BLK_TOP] = "TOP", + [DPU_HW_BLK_SSPP] = "SSPP", + [DPU_HW_BLK_LM] = "LM", + [DPU_HW_BLK_CTL] = "CTL", + [DPU_HW_BLK_PINGPONG] = "pingpong", + [DPU_HW_BLK_INTF] = "INTF", + [DPU_HW_BLK_WB] = "WB", + [DPU_HW_BLK_DSPP] = "DSPP", + [DPU_HW_BLK_MERGE_3D] = "merge_3d", + [DPU_HW_BLK_DSC] = "DSC", + [DPU_HW_BLK_CDM] = "CDM", + [DPU_HW_BLK_MAX] = "unknown", +}; + /** * dpu_rm_get_assigned_resources - Get hw resources of the given type that are * assigned to this encoder @@ -946,13 +961,13 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm, } if (num_blks == blks_size) { - DPU_ERROR("More than %d resources assigned to crtc %d\n", - blks_size, crtc_id); + DPU_ERROR("More than %d %s assigned to crtc %d\n", + blks_size, dpu_hw_blk_type_name[type], crtc_id); break; } if (!hw_blks[i]) { - DPU_ERROR("Allocated resource %d unavailable to assign to crtc %d\n", - type, crtc_id); + DPU_ERROR("%s unavailable to assign to crtc %d\n", + dpu_hw_blk_type_name[type], crtc_id); break; } blks[num_blks++] = hw_blks[i]; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 8ff496082902..cd73468e369a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -80,7 +80,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, static const struct drm_connector_funcs dpu_wb_conn_funcs = { .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; @@ -131,12 +130,9 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc, drm_connector_helper_add(&dpu_wb_conn->base.base, &dpu_wb_conn_helper_funcs); - /* DPU initializes the encoder and sets it up completely for writeback - * cases and hence should use the new API drm_writeback_connector_init_with_encoder - * to initialize the writeback connector - */ - rc = drm_writeback_connector_init_with_encoder(dev, &dpu_wb_conn->base, enc, - &dpu_wb_conn_funcs, format_list, num_formats); + rc = drmm_writeback_connector_init(dev, &dpu_wb_conn->base, + &dpu_wb_conn_funcs, enc, + format_list, num_formats); if (!rc) dpu_wb_conn->wb_enc = enc; diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 0952c7f18abd..809ca191e9de 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -391,11 +391,9 @@ static void read_mdp_hw_revision(struct mdp4_kms *mdp4_kms, static int mdp4_kms_init(struct drm_device *dev) { - struct platform_device *pdev = to_platform_device(dev->dev); struct msm_drm_private *priv = dev->dev_private; struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(priv->kms)); struct msm_kms *kms = NULL; - struct msm_mmu *mmu; struct drm_gpuvm *vm; int ret; u32 major, minor; @@ -458,29 +456,14 @@ static int mdp4_kms_init(struct drm_device *dev) mdp4_disable(mdp4_kms); mdelay(16); - mmu = msm_iommu_new(&pdev->dev, 0); - if (IS_ERR(mmu)) { - ret = PTR_ERR(mmu); + vm = msm_kms_init_vm(mdp4_kms->dev, NULL); + if (IS_ERR(vm)) { + ret = PTR_ERR(vm); goto fail; - } else if (!mmu) { - DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys " - "contig buffers for scanout\n"); - vm = NULL; - } else { - vm = msm_gem_vm_create(dev, mmu, "mdp4", - 0x1000, 0x100000000 - 0x1000, - true); - - if (IS_ERR(vm)) { - if (!IS_ERR(mmu)) - mmu->funcs->destroy(mmu); - ret = PTR_ERR(vm); - goto fail; - } - - kms->vm = vm; } + kms->vm = vm; + ret = modeset_init(mdp4_kms); if (ret) { DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret); @@ -529,7 +512,7 @@ static int mdp4_probe(struct platform_device *pdev) mdp4_kms = devm_kzalloc(dev, sizeof(*mdp4_kms), GFP_KERNEL); if (!mdp4_kms) - return dev_err_probe(dev, -ENOMEM, "failed to allocate kms\n"); + return -ENOMEM; mdp4_kms->mmio = msm_ioremap(pdev, NULL); if (IS_ERR(mdp4_kms->mmio)) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h index fb348583dc84..06458d4ee48c 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.h @@ -202,6 +202,6 @@ static inline struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev) } #endif -struct clk *mpd4_get_lcdc_clock(struct drm_device *dev); +struct clk *mdp4_get_lcdc_clock(struct drm_device *dev); #endif /* __MDP4_KMS_H__ */ diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c index 06a307c1272d..1051873057f6 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c @@ -375,7 +375,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev) drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs); - mdp4_lcdc_encoder->lcdc_clk = mpd4_get_lcdc_clock(dev); + mdp4_lcdc_encoder->lcdc_clk = mdp4_get_lcdc_clock(dev); if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get lvds_clk\n"); return ERR_CAST(mdp4_lcdc_encoder->lcdc_clk); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c index fa2c29470510..04c49bf3d854 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_pll.c @@ -54,7 +54,7 @@ static const struct pll_rate *find_rate(unsigned long rate) return &freqtbl[i-1]; } -static int mpd4_lvds_pll_enable(struct clk_hw *hw) +static int mdp4_lvds_pll_enable(struct clk_hw *hw) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); struct mdp4_kms *mdp4_kms = get_kms(lvds_pll); @@ -80,7 +80,7 @@ static int mpd4_lvds_pll_enable(struct clk_hw *hw) return 0; } -static void mpd4_lvds_pll_disable(struct clk_hw *hw) +static void mdp4_lvds_pll_disable(struct clk_hw *hw) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); struct mdp4_kms *mdp4_kms = get_kms(lvds_pll); @@ -91,21 +91,24 @@ static void mpd4_lvds_pll_disable(struct clk_hw *hw) mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_CTRL_0, 0x0); } -static unsigned long mpd4_lvds_pll_recalc_rate(struct clk_hw *hw, +static unsigned long mdp4_lvds_pll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); return lvds_pll->pixclk; } -static long mpd4_lvds_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static int mdp4_lvds_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - const struct pll_rate *pll_rate = find_rate(rate); - return pll_rate->rate; + const struct pll_rate *pll_rate = find_rate(req->rate); + + req->rate = pll_rate->rate; + + return 0; } -static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, +static int mdp4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw); @@ -114,26 +117,26 @@ static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate, } -static const struct clk_ops mpd4_lvds_pll_ops = { - .enable = mpd4_lvds_pll_enable, - .disable = mpd4_lvds_pll_disable, - .recalc_rate = mpd4_lvds_pll_recalc_rate, - .round_rate = mpd4_lvds_pll_round_rate, - .set_rate = mpd4_lvds_pll_set_rate, +static const struct clk_ops mdp4_lvds_pll_ops = { + .enable = mdp4_lvds_pll_enable, + .disable = mdp4_lvds_pll_disable, + .recalc_rate = mdp4_lvds_pll_recalc_rate, + .determine_rate = mdp4_lvds_pll_determine_rate, + .set_rate = mdp4_lvds_pll_set_rate, }; -static const struct clk_parent_data mpd4_lvds_pll_parents[] = { +static const struct clk_parent_data mdp4_lvds_pll_parents[] = { { .fw_name = "pxo", .name = "pxo", }, }; static struct clk_init_data pll_init = { - .name = "mpd4_lvds_pll", - .ops = &mpd4_lvds_pll_ops, - .parent_data = mpd4_lvds_pll_parents, - .num_parents = ARRAY_SIZE(mpd4_lvds_pll_parents), + .name = "mdp4_lvds_pll", + .ops = &mdp4_lvds_pll_ops, + .parent_data = mdp4_lvds_pll_parents, + .num_parents = ARRAY_SIZE(mdp4_lvds_pll_parents), }; -static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev) +static struct clk_hw *mdp4_lvds_pll_init(struct drm_device *dev) { struct mdp4_lvds_pll *lvds_pll; int ret; @@ -156,14 +159,14 @@ static struct clk_hw *mpd4_lvds_pll_init(struct drm_device *dev) return &lvds_pll->pll_hw; } -struct clk *mpd4_get_lcdc_clock(struct drm_device *dev) +struct clk *mdp4_get_lcdc_clock(struct drm_device *dev) { struct clk_hw *hw; struct clk *clk; /* TODO: do we need different pll in other cases? */ - hw = mpd4_lvds_pll_init(dev); + hw = mdp4_lvds_pll_init(dev); if (IS_ERR(hw)) { DRM_DEV_ERROR(dev->dev, "failed to register LVDS PLL\n"); return ERR_CAST(hw); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 5b6ca8dd929e..61edf6864092 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -534,7 +534,7 @@ static int mdp5_kms_init(struct drm_device *dev) } mdelay(16); - vm = msm_kms_init_vm(mdp5_kms->dev); + vm = msm_kms_init_vm(mdp5_kms->dev, pdev->dev.parent); if (IS_ERR(vm)) { ret = PTR_ERR(vm); goto fail; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 3cbf08231492..e391505fdaf0 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -109,6 +109,7 @@ struct msm_dsi_phy { struct msm_dsi_dphy_timing timing; const struct msm_dsi_phy_cfg *cfg; void *tuning_cfg; + void *pll_data; enum msm_dsi_phy_usecase usecase; bool regulator_ldo_mode; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index af2e30f3f842..ec486ff02c9b 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -444,21 +444,19 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw, return (unsigned long)vco_rate; } -static long dsi_pll_10nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_10nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_10nm *pll_10nm = to_pll_10nm(hw); - if (rate < pll_10nm->phy->cfg->min_pll_rate) - return pll_10nm->phy->cfg->min_pll_rate; - else if (rate > pll_10nm->phy->cfg->max_pll_rate) - return pll_10nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_10nm->phy->cfg->min_pll_rate, pll_10nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_10nm_vco = { - .round_rate = dsi_pll_10nm_clk_round_rate, + .determine_rate = dsi_pll_10nm_clk_determine_rate, .set_rate = dsi_pll_10nm_vco_set_rate, .recalc_rate = dsi_pll_10nm_vco_recalc_rate, .prepare = dsi_pll_10nm_vco_prepare, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index 3a1c8ece6657..fdefcbd9c284 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -578,21 +578,19 @@ static void dsi_pll_14nm_vco_unprepare(struct clk_hw *hw) pll_14nm->phy->pll_on = false; } -static long dsi_pll_14nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_14nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_14nm *pll_14nm = to_pll_14nm(hw); - if (rate < pll_14nm->phy->cfg->min_pll_rate) - return pll_14nm->phy->cfg->min_pll_rate; - else if (rate > pll_14nm->phy->cfg->max_pll_rate) - return pll_14nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_14nm->phy->cfg->min_pll_rate, pll_14nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_14nm_vco = { - .round_rate = dsi_pll_14nm_clk_round_rate, + .determine_rate = dsi_pll_14nm_clk_determine_rate, .set_rate = dsi_pll_14nm_vco_set_rate, .recalc_rate = dsi_pll_14nm_vco_recalc_rate, .prepare = dsi_pll_14nm_vco_prepare, @@ -622,18 +620,20 @@ static unsigned long dsi_pll_14nm_postdiv_recalc_rate(struct clk_hw *hw, postdiv->flags, width); } -static long dsi_pll_14nm_postdiv_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *prate) +static int dsi_pll_14nm_postdiv_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw); struct dsi_pll_14nm *pll_14nm = postdiv->pll; - DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, rate); + DBG("DSI%d PLL parent rate=%lu", pll_14nm->phy->id, req->rate); - return divider_round_rate(hw, rate, prate, NULL, - postdiv->width, - postdiv->flags); + req->rate = divider_round_rate(hw, req->rate, &req->best_parent_rate, + NULL, + postdiv->width, + postdiv->flags); + + return 0; } static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, @@ -680,7 +680,7 @@ static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, static const struct clk_ops clk_ops_dsi_pll_14nm_postdiv = { .recalc_rate = dsi_pll_14nm_postdiv_recalc_rate, - .round_rate = dsi_pll_14nm_postdiv_round_rate, + .determine_rate = dsi_pll_14nm_postdiv_determine_rate, .set_rate = dsi_pll_14nm_postdiv_set_rate, }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c index 90348a2af3e9..d00e415b9a99 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c @@ -533,21 +533,20 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw) pll_28nm->phy->pll_on = false; } -static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw); - if (rate < pll_28nm->phy->cfg->min_pll_rate) - return pll_28nm->phy->cfg->min_pll_rate; - else if (rate > pll_28nm->phy->cfg->max_pll_rate) - return pll_28nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_28nm->phy->cfg->min_pll_rate, + pll_28nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_hpm, @@ -556,7 +555,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_hpm = { }; static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_lp, @@ -565,7 +564,7 @@ static const struct clk_ops clk_ops_dsi_pll_28nm_vco_lp = { }; static const struct clk_ops clk_ops_dsi_pll_28nm_vco_8226 = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare_8226, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index f3643320ff2f..8dcce9581dc3 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -231,21 +231,19 @@ static void dsi_pll_28nm_vco_unprepare(struct clk_hw *hw) pll_28nm->phy->pll_on = false; } -static long dsi_pll_28nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_28nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_28nm *pll_28nm = to_pll_28nm(hw); - if (rate < pll_28nm->phy->cfg->min_pll_rate) - return pll_28nm->phy->cfg->min_pll_rate; - else if (rate > pll_28nm->phy->cfg->max_pll_rate) - return pll_28nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_28nm->phy->cfg->min_pll_rate, pll_28nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_28nm_vco = { - .round_rate = dsi_pll_28nm_clk_round_rate, + .determine_rate = dsi_pll_28nm_clk_determine_rate, .set_rate = dsi_pll_28nm_clk_set_rate, .recalc_rate = dsi_pll_28nm_clk_recalc_rate, .prepare = dsi_pll_28nm_vco_prepare, @@ -296,18 +294,20 @@ static unsigned int get_vco_mul_factor(unsigned long byte_clk_rate) return 8; } -static long clk_bytediv_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) +static int clk_bytediv_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { unsigned long best_parent; unsigned int factor; - factor = get_vco_mul_factor(rate); + factor = get_vco_mul_factor(req->rate); + + best_parent = req->rate * factor; + req->best_parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent); - best_parent = rate * factor; - *prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent); + req->rate = req->best_parent_rate / factor; - return *prate / factor; + return 0; } static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate, @@ -328,7 +328,7 @@ static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate, /* Our special byte clock divider ops */ static const struct clk_ops clk_bytediv_ops = { - .round_rate = clk_bytediv_round_rate, + .determine_rate = clk_bytediv_determine_rate, .set_rate = clk_bytediv_set_rate, .recalc_rate = clk_bytediv_recalc_rate, }; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 8c98f91a5930..32f06edd21a9 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -90,6 +90,13 @@ struct dsi_pll_7nm { /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */ spinlock_t pclk_mux_lock; + /* + * protects REG_DSI_7nm_PHY_CMN_CTRL_0 register and pll_enable_cnt + * member + */ + spinlock_t pll_enable_lock; + int pll_enable_cnt; + struct pll_7nm_cached_state cached_state; struct dsi_pll_7nm *slave; @@ -103,6 +110,9 @@ struct dsi_pll_7nm { */ static struct dsi_pll_7nm *pll_7nm_list[DSI_MAX]; +static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll); +static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll); + static void dsi_pll_setup_config(struct dsi_pll_config *config) { config->ssc_freq = 31500; @@ -340,6 +350,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw); struct dsi_pll_config config; + dsi_pll_enable_pll_bias(pll_7nm); DBG("DSI PLL%d rate=%lu, parent's=%lu", pll_7nm->phy->id, rate, parent_rate); @@ -357,6 +368,7 @@ static int dsi_pll_7nm_vco_set_rate(struct clk_hw *hw, unsigned long rate, dsi_pll_ssc_commit(pll_7nm, &config); + dsi_pll_disable_pll_bias(pll_7nm); /* flush, ensure all register writes are done*/ wmb(); @@ -385,19 +397,47 @@ static int dsi_pll_7nm_lock_status(struct dsi_pll_7nm *pll) static void dsi_pll_disable_pll_bias(struct dsi_pll_7nm *pll) { - u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + unsigned long flags; + u32 data; + spin_lock_irqsave(&pll->pll_enable_lock, flags); + --pll->pll_enable_cnt; + if (pll->pll_enable_cnt < 0) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + DRM_DEV_ERROR_RATELIMITED(&pll->phy->pdev->dev, + "bug: imbalance in disabling PLL bias\n"); + return; + } else if (pll->pll_enable_cnt > 0) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + return; + } /* else: == 0 */ + + data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + data &= ~DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES); - writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); ndelay(250); } static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) { - u32 data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + unsigned long flags; + u32 data; + + spin_lock_irqsave(&pll->pll_enable_lock, flags); + if (pll->pll_enable_cnt++) { + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + WARN_ON(pll->pll_enable_cnt == INT_MAX); + return; + } + + data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); + data |= DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); - writel(data | BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0xc0, pll->phy->pll_base + REG_DSI_7nm_PHY_PLL_SYSTEM_MUXES); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); ndelay(250); } @@ -491,6 +531,10 @@ static int dsi_pll_7nm_vco_prepare(struct clk_hw *hw) if (pll_7nm->slave) dsi_pll_enable_global_clk(pll_7nm->slave); + writel(0x1, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL); + if (pll_7nm->slave) + writel(0x1, pll_7nm->slave->phy->base + REG_DSI_7nm_PHY_CMN_RBUF_CTRL); + error: return rc; } @@ -534,6 +578,7 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw, u32 dec; u64 pll_freq, tmp64; + dsi_pll_enable_pll_bias(pll_7nm); dec = readl(base + REG_DSI_7nm_PHY_PLL_DECIMAL_DIV_START_1); dec &= 0xff; @@ -558,24 +603,24 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw, DBG("DSI PLL%d returning vco rate = %lu, dec = %x, frac = %x", pll_7nm->phy->id, (unsigned long)vco_rate, dec, frac); + dsi_pll_disable_pll_bias(pll_7nm); + return (unsigned long)vco_rate; } -static long dsi_pll_7nm_clk_round_rate(struct clk_hw *hw, - unsigned long rate, unsigned long *parent_rate) +static int dsi_pll_7nm_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(hw); - if (rate < pll_7nm->phy->cfg->min_pll_rate) - return pll_7nm->phy->cfg->min_pll_rate; - else if (rate > pll_7nm->phy->cfg->max_pll_rate) - return pll_7nm->phy->cfg->max_pll_rate; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, + pll_7nm->phy->cfg->min_pll_rate, pll_7nm->phy->cfg->max_pll_rate); + + return 0; } static const struct clk_ops clk_ops_dsi_pll_7nm_vco = { - .round_rate = dsi_pll_7nm_clk_round_rate, + .determine_rate = dsi_pll_7nm_clk_determine_rate, .set_rate = dsi_pll_7nm_vco_set_rate, .recalc_rate = dsi_pll_7nm_vco_recalc_rate, .prepare = dsi_pll_7nm_vco_prepare, @@ -593,6 +638,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy) void __iomem *phy_base = pll_7nm->phy->base; u32 cmn_clk_cfg0, cmn_clk_cfg1; + dsi_pll_enable_pll_bias(pll_7nm); cached->pll_out_div = readl(pll_7nm->phy->pll_base + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); cached->pll_out_div &= 0x3; @@ -604,6 +650,7 @@ static void dsi_7nm_pll_save_state(struct msm_dsi_phy *phy) cmn_clk_cfg1 = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); cached->pll_mux = FIELD_GET(DSI_7nm_PHY_CMN_CLK_CFG1_DSICLK_SEL__MASK, cmn_clk_cfg1); + dsi_pll_disable_pll_bias(pll_7nm); DBG("DSI PLL%d outdiv %x bit_clk_div %x pix_clk_div %x pll_mux %x", pll_7nm->phy->id, cached->pll_out_div, cached->bit_clk_div, cached->pix_clk_div, cached->pll_mux); @@ -826,8 +873,10 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) spin_lock_init(&pll_7nm->postdiv_lock); spin_lock_init(&pll_7nm->pclk_mux_lock); + spin_lock_init(&pll_7nm->pll_enable_lock); pll_7nm->phy = phy; + phy->pll_data = pll_7nm; ret = pll_7nm_register(pll_7nm, phy->provided_clocks->hws); if (ret) { @@ -839,6 +888,12 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_7nm_pll_restore_state(). + */ + if (!dsi_pll_7nm_vco_recalc_rate(&pll_7nm->clk_hw, VCO_REF_CLK_RATE)) + pll_7nm->vco_current_rate = pll_7nm->phy->cfg->min_pll_rate; return 0; } @@ -910,8 +965,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, u32 const delay_us = 5; u32 const timeout_us = 1000; struct msm_dsi_dphy_timing *timing = &phy->timing; + struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; bool less_than_1500_mhz; + unsigned long flags; u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0; u32 glbl_pemph_ctrl_0; u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; @@ -1033,9 +1090,13 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, glbl_rescode_bot_ctrl = 0x3c; } + spin_lock_irqsave(&pll->pll_enable_lock, flags); + pll->pll_enable_cnt = 1; /* de-assert digital and pll power down */ - data = BIT(6) | BIT(5); + data = DSI_7nm_PHY_CMN_CTRL_0_DIGTOP_PWRDN_B | + DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* Assert PLL core reset */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_PLL_CNTRL); @@ -1148,7 +1209,9 @@ static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) { + struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; + unsigned long flags; u32 data; DBG(""); @@ -1175,8 +1238,12 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0, base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0); + spin_lock_irqsave(&pll->pll_enable_lock, flags); + pll->pll_enable_cnt = 0; /* Turn off all PHY blocks */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_CTRL_0); + spin_unlock_irqrestore(&pll->pll_enable_lock, flags); + /* make sure phy is turned off */ wmb(); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 8c8d80b59573..36e928b0fd5a 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -629,16 +629,12 @@ static int hdmi_8996_pll_prepare(struct clk_hw *hw) return 0; } -static long hdmi_8996_pll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int hdmi_8996_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate < HDMI_PCLK_MIN_FREQ) - return HDMI_PCLK_MIN_FREQ; - else if (rate > HDMI_PCLK_MAX_FREQ) - return HDMI_PCLK_MAX_FREQ; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ); + + return 0; } static unsigned long hdmi_8996_pll_recalc_rate(struct clk_hw *hw, @@ -684,7 +680,7 @@ static int hdmi_8996_pll_is_enabled(struct clk_hw *hw) static const struct clk_ops hdmi_8996_pll_ops = { .set_rate = hdmi_8996_pll_set_clk_rate, - .round_rate = hdmi_8996_pll_round_rate, + .determine_rate = hdmi_8996_pll_determine_rate, .recalc_rate = hdmi_8996_pll_recalc_rate, .prepare = hdmi_8996_pll_prepare, .unprepare = hdmi_8996_pll_unprepare, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c index 33bb48ae58a2..a86ff3706369 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8998.c @@ -646,16 +646,12 @@ static int hdmi_8998_pll_prepare(struct clk_hw *hw) return 0; } -static long hdmi_8998_pll_round_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long *parent_rate) +static int hdmi_8998_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - if (rate < HDMI_PCLK_MIN_FREQ) - return HDMI_PCLK_MIN_FREQ; - else if (rate > HDMI_PCLK_MAX_FREQ) - return HDMI_PCLK_MAX_FREQ; - else - return rate; + req->rate = clamp_t(unsigned long, req->rate, HDMI_PCLK_MIN_FREQ, HDMI_PCLK_MAX_FREQ); + + return 0; } static unsigned long hdmi_8998_pll_recalc_rate(struct clk_hw *hw, @@ -688,7 +684,7 @@ static int hdmi_8998_pll_is_enabled(struct clk_hw *hw) static const struct clk_ops hdmi_8998_pll_ops = { .set_rate = hdmi_8998_pll_set_clk_rate, - .round_rate = hdmi_8998_pll_round_rate, + .determine_rate = hdmi_8998_pll_determine_rate, .recalc_rate = hdmi_8998_pll_recalc_rate, .prepare = hdmi_8998_pll_prepare, .unprepare = hdmi_8998_pll_unprepare, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c index 83c8781fcc3f..6ba6bbdb7e05 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c @@ -373,12 +373,14 @@ static unsigned long hdmi_pll_recalc_rate(struct clk_hw *hw, return pll->pixclk; } -static long hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static int hdmi_pll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - const struct pll_rate *pll_rate = find_rate(rate); + const struct pll_rate *pll_rate = find_rate(req->rate); + + req->rate = pll_rate->rate; - return pll_rate->rate; + return 0; } static int hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, @@ -402,7 +404,7 @@ static const struct clk_ops hdmi_pll_ops = { .enable = hdmi_pll_enable, .disable = hdmi_pll_disable, .recalc_rate = hdmi_pll_recalc_rate, - .round_rate = hdmi_pll_round_rate, + .determine_rate = hdmi_pll_determine_rate, .set_rate = hdmi_pll_set_rate, }; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9dcc7a596a11..7e977fec4100 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -826,6 +826,7 @@ static const struct file_operations fops = { #define DRIVER_FEATURES_KMS ( \ DRIVER_GEM | \ + DRIVER_GEM_GPUVA | \ DRIVER_ATOMIC | \ DRIVER_MODESET | \ 0 ) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 985db9febd98..6d847d593f1a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -229,7 +229,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc); int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu); void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu); -struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev); +struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev); bool msm_use_mmu(struct drm_device *dev); int msm_ioctl_gem_submit(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a6635ba426d4..688705a871cf 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -193,7 +193,7 @@ static struct page **get_pages(struct drm_gem_object *obj) if (!msm_obj->pages) { struct drm_device *dev = obj->dev; struct page **p; - int npages = obj->size >> PAGE_SHIFT; + size_t npages = obj->size >> PAGE_SHIFT; p = drm_gem_get_pages(obj); @@ -1171,7 +1171,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct /* convenience method to construct a GEM buffer object, and userspace handle */ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, + size_t size, uint32_t flags, uint32_t *handle, char *name) { struct drm_gem_object *obj; @@ -1237,9 +1237,8 @@ static const struct drm_gem_object_funcs msm_gem_object_funcs = { .vm_ops = &vm_ops, }; -static int msm_gem_new_impl(struct drm_device *dev, - uint32_t size, uint32_t flags, - struct drm_gem_object **obj) +static int msm_gem_new_impl(struct drm_device *dev, uint32_t flags, + struct drm_gem_object **obj) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1273,7 +1272,7 @@ static int msm_gem_new_impl(struct drm_device *dev, return 0; } -struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags) +struct drm_gem_object *msm_gem_new(struct drm_device *dev, size_t size, uint32_t flags) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1288,7 +1287,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32 if (size == 0) return ERR_PTR(-EINVAL); - ret = msm_gem_new_impl(dev, size, flags, &obj); + ret = msm_gem_new_impl(dev, flags, &obj); if (ret) return ERR_PTR(ret); @@ -1328,12 +1327,12 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; struct drm_gem_object *obj; - uint32_t size; - int ret, npages; + size_t size, npages; + int ret; size = PAGE_ALIGN(dmabuf->size); - ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); + ret = msm_gem_new_impl(dev, MSM_BO_WC, &obj); if (ret) return ERR_PTR(ret); @@ -1376,7 +1375,7 @@ fail: return ERR_PTR(ret); } -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, +void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags, struct drm_gpuvm *vm, struct drm_gem_object **bo, uint64_t *iova) { diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 751c3b4965bc..a4cf31853c50 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -297,10 +297,10 @@ bool msm_gem_active(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); int msm_gem_cpu_fini(struct drm_gem_object *obj); int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, char *name); + size_t size, uint32_t flags, uint32_t *handle, char *name); struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags); -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, + size_t size, uint32_t flags); +void *msm_gem_kernel_new(struct drm_device *dev, size_t size, uint32_t flags, struct drm_gpuvm *vm, struct drm_gem_object **bo, uint64_t *iova); void msm_gem_kernel_put(struct drm_gem_object *bo, struct drm_gpuvm *vm); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index c0a33ac839cb..036d34c674d9 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -15,7 +15,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - int npages = obj->size >> PAGE_SHIFT; + size_t npages = obj->size >> PAGE_SHIFT; if (msm_obj->flags & MSM_BO_NO_SHARE) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 6df6b7c0984d..8316af1723c2 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -1030,6 +1030,7 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args struct drm_device *dev = job->vm->drm; int ret = 0; int cnt = 0; + int i = -1; if (args->nr_ops == 1) { /* Single op case, the op is inlined: */ @@ -1063,11 +1064,12 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args spin_lock(&file->table_lock); - for (unsigned i = 0; i < args->nr_ops; i++) { + for (i = 0; i < args->nr_ops; i++) { + struct msm_vm_bind_op *op = &job->ops[i]; struct drm_gem_object *obj; - if (!job->ops[i].handle) { - job->ops[i].obj = NULL; + if (!op->handle) { + op->obj = NULL; continue; } @@ -1075,16 +1077,22 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args * normally use drm_gem_object_lookup(), but for bulk lookup * all under single table_lock just hit object_idr directly: */ - obj = idr_find(&file->object_idr, job->ops[i].handle); + obj = idr_find(&file->object_idr, op->handle); if (!obj) { - ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", job->ops[i].handle, i); + ret = UERR(EINVAL, dev, "invalid handle %u at index %u\n", op->handle, i); goto out_unlock; } drm_gem_object_get(obj); - job->ops[i].obj = obj; + op->obj = obj; cnt++; + + if ((op->range + op->obj_offset) > obj->size) { + ret = UERR(EINVAL, dev, "invalid range: %016llx + %016llx > %016zx\n", + op->range, op->obj_offset, obj->size); + goto out_unlock; + } } *nr_bos = cnt; @@ -1092,6 +1100,17 @@ vm_bind_job_lookup_ops(struct msm_vm_bind_job *job, struct drm_msm_vm_bind *args out_unlock: spin_unlock(&file->table_lock); + if (ret) { + for (; i >= 0; i--) { + struct msm_vm_bind_op *op = &job->ops[i]; + + if (!op->obj) + continue; + + drm_gem_object_put(op->obj); + op->obj = NULL; + } + } out: return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 26c5ce897cbb..17759abc46d7 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -304,7 +304,7 @@ static void crashstate_get_bos(struct msm_gpu_state *state, struct msm_gem_submi sizeof(struct msm_gpu_state_bo), GFP_KERNEL); for (int i = 0; state->bos && i < submit->nr_bos; i++) { - struct drm_gem_object *obj = submit->bos[i].obj;; + struct drm_gem_object *obj = submit->bos[i].obj; bool dump = rd_full || (submit->bos[i].flags & MSM_SUBMIT_BO_DUMP); msm_gem_lock(obj); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index b2a96544f92a..a597f2bee30b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -16,6 +16,7 @@ #include "msm_drv.h" #include "msm_fence.h" +#include "msm_gpu_trace.h" #include "msm_ringbuffer.h" #include "msm_gem.h" @@ -91,6 +92,7 @@ struct msm_gpu_funcs { * for cmdstream that is buffered in this FIFO upstream of the CP fw. */ bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); + void (*sysprof_setup)(struct msm_gpu *gpu); }; /* Additional state for iommu faults: */ @@ -613,16 +615,19 @@ struct msm_gpu_state { static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { + trace_msm_gpu_regaccess(reg); writel(data, gpu->mmio + (reg << 2)); } static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) { + trace_msm_gpu_regaccess(reg); return readl(gpu->mmio + (reg << 2)); } static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) { + trace_msm_gpu_regaccess(reg); msm_rmw(gpu->mmio + (reg << 2), mask, or); } @@ -644,7 +649,9 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) * when the lo is read, so make sure to read the lo first to trigger * that */ + trace_msm_gpu_regaccess(reg); val = (u64) readl(gpu->mmio + (reg << 2)); + trace_msm_gpu_regaccess(reg+1); val |= ((u64) readl(gpu->mmio + ((reg + 1) << 2)) << 32); return val; @@ -652,8 +659,10 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val) { + trace_msm_gpu_regaccess(reg); /* Why not a writeq here? Read the screed above */ writel(lower_32_bits(val), gpu->mmio + (reg << 2)); + trace_msm_gpu_regaccess(reg+1); writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2)); } diff --git a/drivers/gpu/drm/msm/msm_gpu_trace.h b/drivers/gpu/drm/msm/msm_gpu_trace.h index 781bbe5540bd..5417f8d389a3 100644 --- a/drivers/gpu/drm/msm/msm_gpu_trace.h +++ b/drivers/gpu/drm/msm/msm_gpu_trace.h @@ -219,6 +219,18 @@ TRACE_EVENT(msm_mmu_prealloc_cleanup, TP_printk("count=%u, remaining=%u", __entry->count, __entry->remaining) ); +TRACE_EVENT(msm_gpu_regaccess, + TP_PROTO(u32 offset), + TP_ARGS(offset), + TP_STRUCT__entry( + __field(u32, offset) + ), + TP_fast_assign( + __entry->offset = offset; + ), + TP_printk("offset=0x%x", __entry->offset) +); + #endif #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 76cdd5ea06a0..0e18619f96cb 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -721,7 +721,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) int ret; if (!device_iommu_mapped(dev)) - return NULL; + return ERR_PTR(-ENODEV); domain = iommu_paging_domain_alloc(dev); if (IS_ERR(domain)) @@ -756,7 +756,7 @@ struct msm_mmu *msm_iommu_disp_new(struct device *dev, unsigned long quirks) struct msm_mmu *mmu; mmu = msm_iommu_new(dev, quirks); - if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return mmu; iommu = to_msm_iommu(mmu); @@ -772,11 +772,11 @@ struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsig struct msm_mmu *mmu; mmu = msm_iommu_new(dev, quirks); - if (IS_ERR_OR_NULL(mmu)) + if (IS_ERR(mmu)) return mmu; iommu = to_msm_iommu(mmu); - if (adreno_smmu && adreno_smmu->cookie) { + if (adreno_smmu->cookie) { const struct io_pgtable_cfg *cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie); size_t tblsz = get_tblsz(cfg); diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c index 56828d218e88..6e5e94f5c9a7 100644 --- a/drivers/gpu/drm/msm/msm_kms.c +++ b/drivers/gpu/drm/msm/msm_kms.c @@ -177,12 +177,11 @@ static int msm_kms_fault_handler(void *arg, unsigned long iova, int flags, void return -ENOSYS; } -struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev) +struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev, struct device *mdss_dev) { struct drm_gpuvm *vm; struct msm_mmu *mmu; struct device *mdp_dev = dev->dev; - struct device *mdss_dev = mdp_dev->parent; struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; struct device *iommu_dev; @@ -193,18 +192,17 @@ struct drm_gpuvm *msm_kms_init_vm(struct drm_device *dev) */ if (device_iommu_mapped(mdp_dev)) iommu_dev = mdp_dev; - else + else if (mdss_dev && device_iommu_mapped(mdss_dev)) iommu_dev = mdss_dev; + else { + drm_info(dev, "no IOMMU, bailing out\n"); + return ERR_PTR(-ENODEV); + } mmu = msm_iommu_disp_new(iommu_dev, 0); if (IS_ERR(mmu)) return ERR_CAST(mmu); - if (!mmu) { - drm_info(dev, "no IOMMU, fallback to phys contig buffers for scanout\n"); - return NULL; - } - vm = msm_gem_vm_create(dev, mmu, "mdp_kms", 0x1000, 0x100000000 - 0x1000, true); if (IS_ERR(vm)) { diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 39885b333910..2d0e3e784c04 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -154,8 +154,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss) dev = msm_mdss->dev; - domain = irq_domain_create_linear(of_fwnode_handle(dev->of_node), 32, - &msm_mdss_irqdomain_ops, msm_mdss); + domain = irq_domain_create_linear(dev_fwnode(dev), 32, &msm_mdss_irqdomain_ops, msm_mdss); if (!domain) { dev_err(dev, "failed to add irq_domain\n"); return -EINVAL; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 8617a82cd6b3..d53dfad16bde 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -40,6 +40,10 @@ int msm_context_set_sysprof(struct msm_context *ctx, struct msm_gpu *gpu, int sy break; } + /* Some gpu families require additional setup for sysprof */ + if (gpu->funcs->sysprof_setup) + gpu->funcs->sysprof_setup(gpu); + ctx->sysprof = sysprof; return 0; diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml index 86fab2750ba7..9459b6038217 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml @@ -814,7 +814,7 @@ by a particular renderpass/blit. <bitfield name="Y" low="16" high="29" type="uint"/> </bitset> - <reg32 offset="0x8000" name="GRAS_CL_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_cl_cntl" inline="yes"> <bitfield name="CLIP_DISABLE" pos="0" type="boolean"/> <bitfield name="ZNEAR_CLIP_DISABLE" pos="1" type="boolean"/> <bitfield name="ZFAR_CLIP_DISABLE" pos="2" type="boolean"/> @@ -826,18 +826,20 @@ by a particular renderpass/blit. <bitfield name="VP_CLIP_CODE_IGNORE" pos="7" type="boolean"/> <bitfield name="VP_XFORM_DISABLE" pos="8" type="boolean"/> <bitfield name="PERSP_DIVISION_DISABLE" pos="9" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x8000" name="GRAS_CL_CNTL" type="a6xx_gras_cl_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_gras_xs_clip_cull_distance" inline="yes"> <bitfield name="CLIP_MASK" low="0" high="7"/> <bitfield name="CULL_MASK" low="8" high="15"/> </bitset> - <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit"/> - <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit"/> + <reg32 offset="0x8001" name="GRAS_CL_VS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8002" name="GRAS_CL_DS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8003" name="GRAS_CL_GS_CLIP_CULL_DISTANCE" type="a6xx_gras_xs_clip_cull_distance" usage="rp_blit" variants="A6XX-A7XX" /> + <reg32 offset="0x8004" name="GRAS_CL_ARRAY_SIZE" low="0" high="10" type="uint" usage="rp_blit" variants="A6XX-A7XX" /> - <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_cl_interp_cntl" inline="yes"> <!-- see also RB_INTERP_CNTL --> <bitfield name="IJ_PERSP_PIXEL" pos="0" type="boolean"/> <bitfield name="IJ_PERSP_CENTROID" pos="1" type="boolean"/> @@ -848,26 +850,69 @@ by a particular renderpass/blit. <bitfield name="COORD_MASK" low="6" high="9" type="hex"/> <bitfield name="UNK10" pos="10" type="boolean" variants="A7XX-"/> <bitfield name="UNK11" pos="11" type="boolean" variants="A7XX-"/> - </reg32> - <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8005" name="GRAS_CL_INTERP_CNTL" type="a6xx_gras_cl_interp_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_cl_guardband_clip_adj" inline="true"> <bitfield name="HORZ" low="0" high="8" type="uint"/> <bitfield name="VERT" low="10" high="18" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x8006" name="GRAS_CL_GUARDBAND_CLIP_ADJ" type="a6xx_gras_cl_guardband_clip_adj" variants="A6XX-A7XX" usage="rp_blit"/> <!-- Something connected to depth-stencil attachment size --> <reg32 offset="0x8007" name="GRAS_UNKNOWN_8007" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x8008" name="GRAS_UNKNOWN_8008" variants="A7XX-" usage="cmd"/> + <!-- the scale/offset is per view, with up to 6 views --> + <bitset name="a6xx_gras_bin_foveat" inline="yes"> + <bitfield name="BINSCALEEN" pos="6" type="boolean"/> + <enum name="a7xx_bin_scale"> + <value value="0" name="NOSCALE"/> + <value value="1" name="SCALE2X"/> + <value value="2" name="SCALE4X"/> + </enum> + <bitfield name="XSCALE_0" low="8" high="9" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_0" low="10" high="11" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_1" low="12" high="13" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_1" low="14" high="15" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_2" low="16" high="17" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_2" low="18" high="19" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_3" low="20" high="21" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_3" low="22" high="23" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_4" low="24" high="25" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_4" low="26" high="27" type="a7xx_bin_scale"/> + <bitfield name="XSCALE_5" low="28" high="29" type="a7xx_bin_scale"/> + <bitfield name="YSCALE_5" low="30" high="31" type="a7xx_bin_scale"/> + </bitset> - <reg32 offset="0x8009" name="GRAS_UNKNOWN_8009" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800a" name="GRAS_UNKNOWN_800A" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800b" name="GRAS_UNKNOWN_800B" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x800c" name="GRAS_UNKNOWN_800C" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x8008" name="GRAS_BIN_FOVEAT" type="a6xx_gras_bin_foveat" variants="A7XX" usage="cmd"/> + + <reg32 offset="0x8009" name="GRAS_BIN_FOVEAT_OFFSET_0" variants="A7XX-" usage="cmd"> + <bitfield name="XOFFSET_0" low="0" high="9" shr="2" type="uint"/> + <bitfield name="XOFFSET_1" low="10" high="19" shr="2" type="uint"/> + <bitfield name="XOFFSET_2" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800a" name="GRAS_BIN_FOVEAT_OFFSET_1" variants="A7XX-" usage="cmd"> + <bitfield name="XOFFSET_3" low="0" high="9" shr="2" type="uint"/> + <bitfield name="XOFFSET_4" low="10" high="19" shr="2" type="uint"/> + <bitfield name="XOFFSET_5" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800b" name="GRAS_BIN_FOVEAT_OFFSET_2" variants="A7XX-" usage="cmd"> + <bitfield name="YOFFSET_0" low="0" high="9" shr="2" type="uint"/> + <bitfield name="YOFFSET_1" low="10" high="19" shr="2" type="uint"/> + <bitfield name="YOFFSET_2" low="20" high="29" shr="2" type="uint"/> + </reg32> + <reg32 offset="0x800c" name="GRAS_BIN_FOVEAT_OFFSET_3" variants="A7XX-" usage="cmd"> + <bitfield name="YOFFSET_3" low="0" high="9" shr="2" type="uint"/> + <bitfield name="YOFFSET_4" low="10" high="19" shr="2" type="uint"/> + <bitfield name="YOFFSET_5" low="20" high="29" shr="2" type="uint"/> + </reg32> <!-- <reg32 offset="0x80f0" name="GRAS_UNKNOWN_80F0" type="a6xx_reg_xy"/> --> <!-- 0x8006-0x800f invalid --> - <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" usage="rp_blit"> + <array offset="0x8010" name="GRAS_CL_VIEWPORT" stride="6" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="XOFFSET" type="float"/> <reg32 offset="1" name="XSCALE" type="float"/> <reg32 offset="2" name="YOFFSET" type="float"/> @@ -875,12 +920,13 @@ by a particular renderpass/blit. <reg32 offset="4" name="ZOFFSET" type="float"/> <reg32 offset="5" name="ZSCALE" type="float"/> </array> - <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" usage="rp_blit"> + + <array offset="0x8070" name="GRAS_CL_VIEWPORT_ZCLAMP" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="MIN" type="float"/> <reg32 offset="1" name="MAX" type="float"/> </array> - <reg32 offset="0x8090" name="GRAS_SU_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_su_cntl" varset="chip"> <bitfield name="CULL_FRONT" pos="0" type="boolean"/> <bitfield name="CULL_BACK" pos="1" type="boolean"/> <bitfield name="FRONT_CW" pos="2" type="boolean"/> @@ -890,39 +936,66 @@ by a particular renderpass/blit. <bitfield name="LINE_MODE" pos="13" type="a5xx_line_mode"/> <bitfield name="UNK15" low="15" high="16"/> <!-- - On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have - the ability to add the view index to either the RT array - index or the viewport index, and it seems that - MULTIVIEW_ENABLE doesn't do anything, instead we need to - set at least one of RENDERTARGETINDEXINCR or - VIEWPORTINDEXINCR to enable multiview. The blob still - sets MULTIVIEW_ENABLE regardless. - TODO: what about gen2 (a640)? + On gen1 only MULTIVIEW_ENABLE exists. On gen3 we have + the ability to add the view index to either the RT array + index or the viewport index, and it seems that + MULTIVIEW_ENABLE doesn't do anything, instead we need to + set at least one of RENDERTARGETINDEXINCR or + VIEWPORTINDEXINCR to enable multiview. The blob still + sets MULTIVIEW_ENABLE regardless. + TODO: what about gen2 (a640)? --> <bitfield name="MULTIVIEW_ENABLE" pos="17" type="boolean"/> - <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean"/> - <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean"/> - <bitfield name="UNK20" low="20" high="22"/> - </reg32> - <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" usage="rp_blit"> + <bitfield name="RENDERTARGETINDEXINCR" pos="18" type="boolean" variants="A6XX-A7XX"/> + <bitfield name="VIEWPORTINDEXINCR" pos="19" type="boolean" variants="A6XX-A7XX"/> + <bitfield name="UNK20" low="20" high="22" variants="A6XX-A7XX"/> + </bitset> + <reg32 offset="0x8090" name="GRAS_SU_CNTL" type="a6xx_gras_su_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_point_minmax" inline="yes"> <bitfield name="MIN" low="0" high="15" type="ufixed" radix="4"/> <bitfield name="MAX" low="16" high="31" type="ufixed" radix="4"/> - </reg32> - <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" usage="rp_blit"/> + </bitset> + + <reg32 offset="0x8091" name="GRAS_SU_POINT_MINMAX" type="a6xx_gras_su_point_minmax" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8092" name="GRAS_SU_POINT_SIZE" low="0" high="15" type="fixed" radix="4" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_depth_cntl" inline="yes"> + <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" variants="A6XX-A7XX" type="a6xx_gras_su_depth_cntl" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_stencil_cntl" inline="yes"> + <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" type="a6xx_gras_su_stencil_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_render_cntl" inline="yes"> + <bitfield name="FS_DISABLE" pos="7" type="boolean"/> + </bitset> + + <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" type="a6xx_gras_su_render_cntl" variants="A7XX" usage="rp_blit"/> + <!-- 0x8093 invalid --> - <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" usage="rp_blit"> + <bitset name="a6xx_depth_plane_cntl" inline="yes"> <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> - </reg32> - <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" usage="rp_blit"/> - <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" usage="rp_blit"/> - <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" usage="rp_blit"/> - <!-- duplicates RB_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8094" name="GRAS_SU_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8095" name="GRAS_SU_POLY_OFFSET_SCALE" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8096" name="GRAS_SU_POLY_OFFSET_OFFSET" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8097" name="GRAS_SU_POLY_OFFSET_OFFSET_CLAMP" type="float" variants="A6XX-A7XX" usage="rp_blit"/> + <bitset name="a6xx_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> <bitfield name="UNK3" pos="3"/> - </reg32> + </bitset> - <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" usage="cmd"> + <!-- duplicates RB_DEPTH_BUFFER_INFO: --> + <reg32 offset="0x8098" name="GRAS_SU_DEPTH_BUFFER_INFO" type="a6xx_depth_buffer_info" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> <enum name="a6xx_shift_amount"> <value value="0" name="NO_SHIFT"/> @@ -932,7 +1005,10 @@ by a particular renderpass/blit. <bitfield name="SHIFTAMOUNT" low="1" high="2" type="a6xx_shift_amount"/> <bitfield name="INNERCONSERVATIVERASEN" pos="3" type="boolean"/> <bitfield name="UNK4" low="4" high="5"/> - </reg32> + </bitset> + + <reg32 offset="0x8099" name="GRAS_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_gras_su_conservative_ras_cntl" variants="A6XX-A7XX" usage="cmd"/> + <reg32 offset="0x809a" name="GRAS_SU_PATH_RENDERING_CNTL"> <bitfield name="UNK0" pos="0" type="boolean"/> <bitfield name="LINELENGTHEN" pos="1" type="boolean"/> @@ -942,10 +1018,13 @@ by a particular renderpass/blit. <bitfield name="WRITES_LAYER" pos="0" type="boolean"/> <bitfield name="WRITES_VIEW" pos="1" type="boolean"/> </bitset> - <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" usage="rp_blit"/> - <!-- 0x809e/0x809f invalid --> + <reg32 offset="0x809b" name="GRAS_SU_VS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809c" name="GRAS_SU_GS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x809d" name="GRAS_SU_DS_SIV_CNTL" type="a6xx_gras_us_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_rast_cntl" inline="yes"> + <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> + </bitset> <enum name="a6xx_sequenced_thread_dist"> <value value="0x0" name="DIST_SCREEN_COORD"/> @@ -993,7 +1072,7 @@ by a particular renderpass/blit. <value value="0x3" name="RB_BT"/> </enum> - <reg32 offset="0x80a0" name="GRAS_SC_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_sc_cntl" inline="yes"> <bitfield name="CCUSINGLECACHELINESIZE" low="0" high="2"/> <bitfield name="SINGLE_PRIM_MODE" low="3" high="4" type="a6xx_single_prim_mode"/> <bitfield name="RASTER_MODE" pos="5" type="a6xx_raster_mode"/> @@ -1003,7 +1082,9 @@ by a particular renderpass/blit. <bitfield name="UNK9" pos="9" type="boolean"/> <bitfield name="ROTATION" low="10" high="11" type="uint"/> <bitfield name="EARLYVIZOUTEN" pos="12" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x80a0" name="GRAS_SC_CNTL" type="a6xx_gras_sc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <enum name="a6xx_render_mode"> <value value="0x0" name="RENDERING_PASS"/> @@ -1024,7 +1105,7 @@ by a particular renderpass/blit. <value value="0x4" name="LRZ_FEEDBACK_LATE_Z"/> </enum> - <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" usage="rp_blit"> + <bitset name="a6xx_bin_cntl" inline="yes"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> @@ -1037,18 +1118,25 @@ by a particular renderpass/blit. In sysmem mode GRAS_LRZ_CNTL.LRZ_WRITE is not considered. </doc> <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - <bitfield name="UNK27" pos="27"/> - </reg32> + <bitfield name="FORCE_LRZ_DIS" pos="27" type="boolean"/> + </bitset> + + <reg32 offset="0x80a1" name="GRAS_SC_BIN_CNTL" type="a6xx_bin_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_sc_ras_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="UNK2" pos="2"/> <bitfield name="UNK3" pos="3"/> - </reg32> - <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80a2" name="GRAS_SC_RAS_MSAA_CNTL" type="a6xx_gras_sc_ras_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_sc_dest_msaa_cntl" inline="yes"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> <bitfield name="MSAA_DISABLE" pos="2" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x80a3" name="GRAS_SC_DEST_MSAA_CNTL" type="a6xx_gras_sc_dest_msaa_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_msaa_sample_pos_cntl" inline="yes"> <bitfield name="UNK0" pos="0"/> @@ -1066,30 +1154,35 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_3_Y" low="28" high="31" radix="4" type="fixed"/> </bitset> - <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> - <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> - <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> + <reg32 offset="0x80a4" name="GRAS_SC_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a5" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80a6" name="GRAS_SC_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x80a7" name="GRAS_UNKNOWN_80A7" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x80a7" name="GRAS_ROTATION_CNTL" variants="A7XX" usage="cmd"/> - <!-- 0x80a7-0x80ae invalid --> - <reg32 offset="0x80af" name="GRAS_UNKNOWN_80AF" pos="0" usage="cmd"/> + <bitset name="a6xx_screen_scissor_cntl" inline="yes"> + <bitfield name="SCISSOR_DISABLE" pos="0" type="boolean"/> + </bitset> + + <reg32 offset="0x80af" name="GRAS_SC_SCREEN_SCISSOR_CNTL" type="a6xx_screen_scissor_cntl" variants="A6XX-A7XX" pos="0" usage="cmd"/> <bitset name="a6xx_scissor_xy" inline="yes"> <bitfield name="X" low="0" high="15" type="uint"/> <bitfield name="Y" low="16" high="31" type="uint"/> </bitset> - <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" usage="rp_blit"> + + <array offset="0x80b0" name="GRAS_SC_SCREEN_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" usage="rp_blit"> + + <array offset="0x80d0" name="GRAS_SC_VIEWPORT_SCISSOR" stride="2" length="16" variants="A6XX-A7XX" usage="rp_blit"> <reg32 offset="0" name="TL" type="a6xx_scissor_xy"/> <reg32 offset="1" name="BR" type="a6xx_scissor_xy"/> </array> - <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x80f0" name="GRAS_SC_WINDOW_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x80f1" name="GRAS_SC_WINDOW_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <enum name="a6xx_fsr_combiner"> <value value="0" name="FSR_COMBINER_OP_KEEP"/> @@ -1099,7 +1192,7 @@ by a particular renderpass/blit. <value value="4" name="FSR_COMBINER_OP_MUL"/> </enum> - <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" variants="A7XX-" usage="rp_blit"> + <bitset name="a6xx_gras_vrs_config"> <bitfield name="PIPELINE_FSR_ENABLE" pos="0" type="boolean"/> <bitfield name="FRAG_SIZE_X" low="1" high="2" type="uint"/> <bitfield name="FRAG_SIZE_Y" low="3" high="4" type="uint"/> @@ -1107,20 +1200,32 @@ by a particular renderpass/blit. <bitfield name="COMBINER_OP_2" low="8" high="10" type="a6xx_fsr_combiner"/> <bitfield name="ATTACHMENT_FSR_ENABLE" pos="13" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="20" type="boolean"/> - </reg32> - <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f4" name="GRAS_VRS_CONFIG" type="a6xx_gras_vrs_config" variants="A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_info" inline="yes"> <bitfield name="LAYERED" pos="0" type="boolean"/> <bitfield name="TILE_MODE" low="1" high="2" type="a6xx_tile_mode"/> - </reg32> - <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f5" name="GRAS_QUALITY_BUFFER_INFO" type="a6xx_gras_quality_buffer_info" variants="A7XX" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_dimension" inline="yes"> <bitfield name="WIDTH" low="0" high="15" type="uint"/> <bitfield name="HEIGHT" low="16" high="31" type="uint"/> - </reg32> - <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX-" type="waddress" usage="rp_blit"/> - <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" variants="A7XX-" usage="rp_blit"> + </bitset> + + <reg32 offset="0x80f6" name="GRAS_QUALITY_BUFFER_DIMENSION" type="a6xx_gras_quality_buffer_dimension" variants="A7XX" usage="rp_blit"/> + + <reg64 offset="0x80f8" name="GRAS_QUALITY_BUFFER_BASE" variants="A7XX" type="waddress" usage="rp_blit"/> + + <bitset name="a6xx_gras_quality_buffer_pitch" inline="yes"> <bitfield name="PITCH" shr="6" low="0" high="7" type="uint"/> <bitfield name="ARRAY_PITCH" shr="6" low="10" high="28" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x80fa" name="GRAS_QUALITY_BUFFER_PITCH" type="a6xx_gras_quality_buffer_pitch" variants="A7XX" usage="rp_blit"/> <enum name="a6xx_lrz_dir_status"> <value value="0x1" name="LRZ_DIR_LE"/> @@ -1128,7 +1233,7 @@ by a particular renderpass/blit. <value value="0x3" name="LRZ_DIR_INVALID"/> </enum> - <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_cntl" inline="yes"> <bitfield name="ENABLE" pos="0" type="boolean"/> <doc>LRZ write also disabled for blend/etc.</doc> <bitfield name="LRZ_WRITE" pos="1" type="boolean"/> @@ -1155,26 +1260,36 @@ by a particular renderpass/blit. </doc> <bitfield name="DISABLE_ON_WRONG_DIR" pos="9" type="boolean" variants="A6XX"/> <bitfield name="Z_FUNC" low="11" high="13" type="adreno_compare_func" variants="A7XX-"/> - </reg32> + </bitset> + + <reg32 offset="0x8100" name="GRAS_LRZ_CNTL" type="a6xx_gras_lrz_cntl" usage="rp_blit" variants="A6XX-A7XX"/> <enum name="a6xx_fragcoord_sample_mode"> <value value="0" name="FRAGCOORD_CENTER"/> <value value="3" name="FRAGCOORD_SAMPLE"/> </enum> - <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" low="0" high="2" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_ps_input_cntl" inline="yes"> <bitfield name="SAMPLEID" pos="0" type="boolean"/> <bitfield name="FRAGCOORDSAMPLEMODE" low="1" high="2" type="a6xx_fragcoord_sample_mode"/> - </reg32> + </bitset> + + <reg32 offset="0x8101" name="GRAS_LRZ_PS_INPUT_CNTL" type="a6xx_gras_lrz_ps_input_cntl" usage="rp_blit" variants="A6XX-A7XX"/> - <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_mrt_buffer_info_0" inline="yes"> <bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/> - </reg32> - <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit"/> - <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" usage="rp_blit"> + </bitset> + + <reg32 offset="0x8102" name="GRAS_LRZ_MRT_BUFFER_INFO_0" type="a6xx_gras_lrz_mrt_buffer_info_0" usage="rp_blit" variants="A6XX-A7XX"/> + + <reg64 offset="0x8103" name="GRAS_LRZ_BUFFER_BASE" align="256" type="waddress" usage="rp_blit" variants="A6XX-A7XX"/> + + <bitset name="a6xx_gras_lrz_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="7" shr="5" type="uint"/> <bitfield name="ARRAY_PITCH" low="10" high="28" shr="8" type="uint"/> - </reg32> + </bitset> + + <reg32 offset="0x8105" name="GRAS_LRZ_BUFFER_PITCH" type="a6xx_gras_lrz_buffer_pitch" usage="rp_blit" variants="A6XX-A7XX"/> <!-- The LRZ "fast clear" buffer is initialized to zero's by blob, and @@ -1207,7 +1322,6 @@ by a particular renderpass/blit. not. --> <reg64 offset="0x8106" name="GRAS_LRZ_FAST_CLEAR_BUFFER_BASE" align="64" type="waddress" usage="rp_blit"/> - <!-- 0x8108 invalid --> <reg32 offset="0x8109" name="GRAS_LRZ_PS_SAMPLEFREQ_CNTL" usage="rp_blit"> <bitfield name="PER_SAMP_MODE" pos="0" type="boolean"/> </reg32> @@ -1232,19 +1346,20 @@ by a particular renderpass/blit. <!-- 0x810c-0x810f invalid --> - <reg32 offset="0x8110" name="GRAS_UNKNOWN_8110" low="0" high="1" usage="cmd"/> + <reg32 offset="0x8110" name="GRAS_MODE_CNTL" low="0" high="1" variants="A6XX-A7XX" usage="cmd"/> <!-- A bit tentative but it's a color and it is followed by LRZ_CLEAR --> - <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX-"/> + <reg32 offset="0x8111" name="GRAS_LRZ_DEPTH_CLEAR" type="float" variants="A7XX"/> - <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> + <bitset name="a6xx_gras_lrz_depth_buffer_info" inline="yes"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> <bitfield name="UNK3" pos="3"/> - </reg32> + </bitset> + + <reg32 offset="0x8113" name="GRAS_LRZ_DEPTH_BUFFER_INFO" type="a6xx_gras_lrz_depth_buffer_info" variants="A7XX" usage="rp_blit"/> - <!-- Always written together and always equal 09510840 00000a62 --> - <reg32 offset="0x8120" name="GRAS_UNKNOWN_8120" variants="A7XX-" usage="cmd"/> - <reg32 offset="0x8121" name="GRAS_UNKNOWN_8121" variants="A7XX-" usage="cmd"/> + <doc>LUT used to convert quality buffer values to HW shading rate values. An array of 4-bit values.</doc> + <array offset="0x8120" name="GRAS_LRZ_QUALITY_LOOKUP_TABLE" variants="A7XX-" stride="1" length="2"/> <!-- 0x8112-0x83ff invalid --> @@ -1269,28 +1384,29 @@ by a particular renderpass/blit. <bitfield name="D24S8" pos="19" type="boolean"/> <!-- some sort of channel mask, disabled channels are set to zero ? --> <bitfield name="MASK" low="20" high="23"/> - <bitfield name="IFMT" low="24" high="28" type="a6xx_2d_ifmt"/> + <bitfield name="IFMT" low="24" high="26" type="a6xx_2d_ifmt"/> + <bitfield name="UNK27" pos="27" type="boolean"/> + <bitfield name="UNK28" pos="28" type="boolean"/> <bitfield name="RASTER_MODE" pos="29" type="a6xx_raster_mode"/> - <bitfield name="UNK30" pos="30" type="boolean" variants="A7XX-"/> + <bitfield name="COPY" pos="30" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" usage="rp_blit"/> + <reg32 offset="0x8400" name="GRAS_A2D_BLT_CNTL" type="a6xx_a2d_bit_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <!-- note: the low 8 bits for src coords are valid, probably fixed point it would be a bit weird though, since we subtract 1 from BR coords apparently signed, gallium driver uses negative coords and it works? --> - <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" usage="rp_blit"/> - <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" usage="rp_blit"/> + <reg32 offset="0x8401" name="GRAS_A2D_SRC_XMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8402" name="GRAS_A2D_SRC_XMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8403" name="GRAS_A2D_SRC_YMIN" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8404" name="GRAS_A2D_SRC_YMAX" low="8" high="24" type="int" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8405" name="GRAS_A2D_DEST_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x8406" name="GRAS_A2D_DEST_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x8407" name="GRAS_2D_UNKNOWN_8407" low="0" high="31"/> <reg32 offset="0x8408" name="GRAS_2D_UNKNOWN_8408" low="0" high="31"/> <reg32 offset="0x8409" name="GRAS_2D_UNKNOWN_8409" low="0" high="31"/> - <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" usage="rp_blit"/> - <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" usage="rp_blit"/> - <!-- 0x840c-0x85ff invalid --> + <reg32 offset="0x840a" name="GRAS_A2D_SCISSOR_TL" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x840b" name="GRAS_A2D_SCISSOR_BR" type="a6xx_reg_xy" variants="A6XX-A7XX" usage="rp_blit"/> <!-- always 0x880 ? (and 0 in a640/a650 traces?) --> <reg32 offset="0x8600" name="GRAS_DBG_ECO_CNTL" usage="cmd"> @@ -1308,22 +1424,7 @@ by a particular renderpass/blit. --> <!-- same as GRAS_BIN_CONTROL, but without bit 27: --> - <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX" usage="rp_blit"> - <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> - <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> - <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> - <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/> - <bitfield name="BUFFERS_LOCATION" low="22" high="23" type="a6xx_buffers_location"/> - <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - </reg32> - - <reg32 offset="0x8800" name="RB_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> - <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> - <bitfield name="RENDER_MODE" low="18" high="20" type="a6xx_render_mode"/> - <bitfield name="FORCE_LRZ_WRITE_DIS" pos="21" type="boolean"/> - <bitfield name="LRZ_FEEDBACK_ZMODE_MASK" low="24" high="26" type="a6xx_lrz_feedback_mask"/> - </reg32> + <reg32 offset="0x8800" name="RB_CNTL" variants="A6XX-A7XX" type="a6xx_bin_cntl" usage="rp_blit"/> <reg32 offset="0x8801" name="RB_RENDER_CNTL" variants="A6XX" usage="rp_blit"> <bitfield name="CCUSINGLECACHELINESIZE" low="3" high="5"/> @@ -1347,9 +1448,6 @@ by a particular renderpass/blit. <bitfield name="CONSERVATIVERASEN" pos="11" type="boolean"/> <bitfield name="INNERCONSERVATIVERASEN" pos="12" type="boolean"/> </reg32> - <reg32 offset="0x8116" name="GRAS_SU_RENDER_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="FS_DISABLE" pos="7" type="boolean"/> - </reg32> <reg32 offset="0x8802" name="RB_RAS_MSAA_CNTL" usage="rp_blit"> <bitfield name="SAMPLES" low="0" high="1" type="a3xx_msaa_samples"/> @@ -1516,9 +1614,7 @@ by a particular renderpass/blit. <bitfield name="SAMPLE_MASK" low="16" high="31"/> </reg32> <!-- 0x8866-0x886f invalid --> - <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" usage="rp_blit"> - <bitfield name="Z_MODE" low="0" high="1" type="a6xx_ztest_mode"/> - </reg32> + <reg32 offset="0x8870" name="RB_DEPTH_PLANE_CNTL" type="a6xx_depth_plane_cntl" usage="rp_blit"/> <reg32 offset="0x8871" name="RB_DEPTH_CNTL" usage="rp_blit"> <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> @@ -1532,14 +1628,9 @@ by a particular renderpass/blit. <bitfield name="Z_READ_ENABLE" pos="6" type="boolean"/> <bitfield name="Z_BOUNDS_ENABLE" pos="7" type="boolean"/> </reg32> - <reg32 offset="0x8114" name="GRAS_SU_DEPTH_CNTL" usage="rp_blit"> - <bitfield name="Z_TEST_ENABLE" pos="0" type="boolean"/> - </reg32> + <!-- duplicates GRAS_SU_DEPTH_BUFFER_INFO: --> - <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" usage="rp_blit"> - <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> - <bitfield name="UNK3" low="3" high="4"/> - </reg32> + <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A6XX" type="a6xx_depth_buffer_info" usage="rp_blit"/> <!-- first 4 bits duplicates GRAS_SU_DEPTH_BUFFER_INFO --> <reg32 offset="0x8872" name="RB_DEPTH_BUFFER_INFO" variants="A7XX-" usage="rp_blit"> <bitfield name="DEPTH_FORMAT" low="0" high="2" type="a6xx_depth_format"/> @@ -1575,9 +1666,7 @@ by a particular renderpass/blit. <bitfield name="ZPASS_BF" low="26" high="28" type="adreno_stencil_op"/> <bitfield name="ZFAIL_BF" low="29" high="31" type="adreno_stencil_op"/> </reg32> - <reg32 offset="0x8115" name="GRAS_SU_STENCIL_CNTL" usage="rp_blit"> - <bitfield name="STENCIL_ENABLE" pos="0" type="boolean"/> - </reg32> + <reg32 offset="0x8881" name="RB_STENCIL_BUFFER_INFO" variants="A6XX" usage="rp_blit"> <bitfield name="SEPARATE_STENCIL" pos="0" type="boolean"/> <bitfield name="UNK1" pos="1" type="boolean"/> @@ -1616,8 +1705,9 @@ by a particular renderpass/blit. <reg32 offset="0x8899" name="RB_UNKNOWN_8899" variants="A7XX-" usage="cmd"/> <!-- 0x8899-0x88bf invalid --> <!-- clamps depth value for depth test/write --> - <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit"/> - <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit"/> + <reg32 offset="0x88c0" name="RB_VIEWPORT_ZCLAMP_MIN" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <reg32 offset="0x88c1" name="RB_VIEWPORT_ZCLAMP_MAX" type="float" usage="rp_blit" variants="A6XX-A7XX"/> + <!-- 0x88c2-0x88cf invalid--> <reg32 offset="0x88d0" name="RB_RESOLVE_CNTL_0" usage="rp_blit"> <bitfield name="UNK0" low="0" high="12"/> @@ -1626,7 +1716,7 @@ by a particular renderpass/blit. <reg32 offset="0x88d1" name="RB_RESOLVE_CNTL_1" type="a6xx_reg_xy" usage="rp_blit"/> <reg32 offset="0x88d2" name="RB_RESOLVE_CNTL_2" type="a6xx_reg_xy" usage="rp_blit"/> <!-- weird to duplicate other regs from same block?? --> - <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" usage="rp_blit"> + <reg32 offset="0x88d3" name="RB_RESOLVE_CNTL_3" variants="A6XX-A7XX" usage="rp_blit"> <bitfield name="BINW" low="0" high="5" shr="5" type="uint"/> <bitfield name="BINH" low="8" high="14" shr="4" type="uint"/> </reg32> @@ -1650,10 +1740,13 @@ by a particular renderpass/blit. <!-- array-pitch is size of layer --> <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint" usage="rp_blit"/> <reg64 offset="0x88dc" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" usage="rp_blit"> + + <bitset name="a6xx_flag_buffer_pitch" inline="yes"> <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> - </reg32> + <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> + </bitset> + + <reg32 offset="0x88de" name="RB_RESOLVE_SYSTEM_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> <reg32 offset="0x88df" name="RB_RESOLVE_CLEAR_COLOR_DW0" usage="rp_blit"/> <reg32 offset="0x88e0" name="RB_RESOLVE_CLEAR_COLOR_DW1" usage="rp_blit"/> @@ -1726,10 +1819,7 @@ by a particular renderpass/blit. <reg32 offset="0x88f0" name="RB_UNKNOWN_88F0" low="0" high="11" usage="cmd"/> <!-- could be for separate stencil? (or may not be a flag buffer at all) --> <reg64 offset="0x88f1" name="RB_UNK_FLAG_BUFFER_BASE" type="waddress" align="64"/> - <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH"> - <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="23" shr="7" type="uint"/> - </reg32> + <reg32 offset="0x88f3" name="RB_UNK_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch"/> <reg32 offset="0x88f4" name="RB_VRS_CONFIG" usage="rp_blit"> <bitfield name="UNK2" pos="2" type="boolean"/> @@ -1737,8 +1827,9 @@ by a particular renderpass/blit. <bitfield name="ATTACHMENT_FSR_ENABLE" pos="5" type="boolean"/> <bitfield name="PRIMITIVE_FSR_ENABLE" pos="18" type="boolean"/> </reg32> - <!-- Connected to VK_EXT_fragment_density_map? --> - <reg32 offset="0x88f5" name="RB_UNKNOWN_88F5" variants="A7XX-"/> + <reg32 offset="0x88f5" name="RB_BIN_FOVEAT" variants="A7XX-" usage="cmd"> + <bitfield name="BINSCALEEN" pos="6" type="boolean"/> + </reg32> <!-- 0x88f6-0x88ff invalid --> <reg64 offset="0x8900" name="RB_DEPTH_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> <reg32 offset="0x8902" name="RB_DEPTH_FLAG_BUFFER_PITCH" usage="rp_blit"> @@ -1747,12 +1838,10 @@ by a particular renderpass/blit. <bitfield name="UNK8" low="8" high="10"/> <bitfield name="ARRAY_PITCH" low="11" high="27" shr="7" type="uint"/> </reg32> + <array offset="0x8903" name="RB_COLOR_FLAG_BUFFER" stride="3" length="8" usage="rp_blit"> <reg64 offset="0" name="ADDR" type="waddress" align="64"/> - <reg32 offset="2" name="PITCH"> - <bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/> - <bitfield name="ARRAY_PITCH" low="11" high="28" shr="7" type="uint"/> - </reg32> + <reg32 offset="2" name="PITCH" type="a6xx_flag_buffer_pitch"/> </array> <!-- 0x891b-0x8926 invalid --> <doc> @@ -1815,7 +1904,7 @@ by a particular renderpass/blit. <reg64 offset="0x8c1e" name="RB_A2D_DEST_BUFFER_BASE_2" type="waddress" align="64" usage="rp_blit"/> <reg64 offset="0x8c20" name="RB_A2D_DEST_FLAG_BUFFER_BASE" type="waddress" align="64" usage="rp_blit"/> - <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> + <reg32 offset="0x8c22" name="RB_A2D_DEST_FLAG_BUFFER_PITCH" type="a6xx_flag_buffer_pitch" usage="rp_blit"/> <!-- this is a guess but seems likely (for NV12 with UBWC): --> <reg64 offset="0x8c23" name="RB_A2D_DEST_FLAG_BUFFER_BASE_1" type="waddress" align="64" usage="rp_blit"/> <reg32 offset="0x8c25" name="RB_A2D_DEST_FLAG_BUFFER_PITCH_1" low="0" high="7" shr="6" type="uint" usage="rp_blit"/> @@ -1921,13 +2010,13 @@ by a particular renderpass/blit. <bitfield name="CLIP_DIST_03_LOC" low="8" high="15" type="uint"/> <bitfield name="CLIP_DIST_47_LOC" low="16" high="23" type="uint"/> </bitset> - <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> + <reg32 offset="0x9101" name="VPC_VS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9102" name="VPC_GS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9103" name="VPC_DS_CLIP_CULL_CNTL" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> - <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" usage="rp_blit"/> + <reg32 offset="0x9311" name="VPC_VS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9312" name="VPC_GS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9313" name="VPC_DS_CLIP_CULL_CNTL_V2" type="a6xx_vpc_xs_clip_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_vpc_xs_siv_cntl" inline="yes"> <bitfield name="LAYERLOC" low="0" high="7" type="uint"/> @@ -1935,23 +2024,33 @@ by a particular renderpass/blit. <bitfield name="SHADINGRATELOC" low="16" high="23" type="uint" variants="A7XX-"/> </bitset> - <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> + <reg32 offset="0x9104" name="VPC_VS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9105" name="VPC_GS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9106" name="VPC_DS_SIV_CNTL" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + - <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> - <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" usage="rp_blit"/> + <reg32 offset="0x9314" name="VPC_VS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9315" name="VPC_GS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9316" name="VPC_DS_SIV_CNTL_V2" type="a6xx_vpc_xs_siv_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_rast_stream_cntl" inline="yes"> + <!-- which stream to send to GRAS --> + <bitfield name="STREAM" low="0" high="1" type="uint"/> + <!-- discard primitives before rasterization --> + <bitfield name="DISCARD" pos="2" type="boolean"/> + </bitset> + + <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" type="a6xx_vpc_rast_stream_cntl" variants="A7XX" usage="rp_blit"/> <reg32 offset="0x9107" name="VPC_UNKNOWN_9107" variants="A6XX" usage="rp_blit"> <!-- this mirrors VPC_RAST_STREAM_CNTL::DISCARD, although it seems it's unused --> <bitfield name="RASTER_DISCARD" pos="0" type="boolean"/> <bitfield name="UNK2" pos="2" type="boolean"/> </reg32> - <reg32 offset="0x9108" name="VPC_RAST_CNTL" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> + <reg32 offset="0x9108" name="VPC_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_pc_cntl" inline="yes"> <bitfield name="PRIMITIVE_RESTART" pos="0" type="boolean"/> <bitfield name="PROVOKING_VTX_LAST" pos="1" type="boolean"/> @@ -1991,10 +2090,10 @@ by a particular renderpass/blit. <bitfield name="VIEWS" low="2" high="6" type="uint"/> </bitset> - <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0x9109" name="VPC_PC_CNTL" type="a6xx_pc_cntl" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910a" name="VPC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910b" name="VPC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A7XX" usage="rp_blit"/> + <reg32 offset="0x910c" name="VPC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A7XX" usage="rp_blit"/> <enum name="a6xx_varying_interp_mode"> <value value="0" name="INTERP_SMOOTH"/> @@ -2011,11 +2110,11 @@ by a particular renderpass/blit. </enum> <!-- 0x9109-0x91ff invalid --> - <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" usage="rp_blit"> + <array offset="0x9200" name="VPC_VARYING_INTERP_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> <doc>Packed array of a6xx_varying_interp_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> - <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE_0" stride="1" length="8" usage="rp_blit"> + <array offset="0x9208" name="VPC_VARYING_REPLACE_MODE" stride="1" length="8" variants="A6XX-A7XX" usage="rp_blit"> <doc>Packed array of a6xx_varying_ps_repl_mode</doc> <reg32 offset="0x0" name="MODE"/> </array> @@ -2024,12 +2123,12 @@ by a particular renderpass/blit. <reg32 offset="0x9210" name="VPC_UNKNOWN_9210" low="0" high="31" variants="A6XX" usage="cmd"/> <reg32 offset="0x9211" name="VPC_UNKNOWN_9211" low="0" high="31" variants="A6XX" usage="cmd"/> - <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL_0" stride="1" length="4" usage="rp_blit"> + <array offset="0x9212" name="VPC_VARYING_LM_TRANSFER_CNTL" stride="1" length="4" variants="A6XX-A7XX" usage="rp_blit"> <!-- one bit per varying component: --> <reg32 offset="0" name="DISABLE"/> </array> - <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" usage="rp_blit"> + <bitset name="a6xx_vpc_so_mapping_wptr" inline="yes"> <!-- Choose which DWORD to write to. There is an array of (4 * 64) DWORD's, dumped in the devcoredump at @@ -2056,20 +2155,25 @@ by a particular renderpass/blit. <bitfield name="ADDR" low="0" high="7" type="hex"/> <!-- clear all A_EN and B_EN bits for all DWORD's --> <bitfield name="RESET" pos="16" type="boolean"/> - </reg32> - <!-- special register, write multiple times to load SO program (not readable) --> - <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" usage="rp_blit"> + </bitset> + + <reg32 offset="0x9216" name="VPC_SO_MAPPING_WPTR" type="a6xx_vpc_so_mapping_wptr" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_so_mapping_port" inline="yes"> <bitfield name="A_BUF" low="0" high="1" type="uint"/> <bitfield name="A_OFF" low="2" high="10" shr="2" type="uint"/> <bitfield name="A_EN" pos="11" type="boolean"/> <bitfield name="B_BUF" low="12" high="13" type="uint"/> <bitfield name="B_OFF" low="14" high="22" shr="2" type="uint"/> <bitfield name="B_EN" pos="23" type="boolean"/> - </reg32> + </bitset> + + <!-- special register, write multiple times to load SO program (not readable) --> + <reg32 offset="0x9217" name="VPC_SO_MAPPING_PORT" type="a6xx_vpc_so_mapping_port" variants="A6XX-A7XX" usage="rp_blit"/> - <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" usage="cmd"/> + <reg64 offset="0x9218" name="VPC_SO_QUERY_BASE" type="waddress" align="32" variants="A6XX-A7XX" usage="cmd"/> - <array offset="0x921a" name="VPC_SO" stride="7" length="4" usage="cmd"> + <array offset="0x921a" name="VPC_SO" stride="7" length="4" variants="A6XX-A7XX" usage="cmd"> <reg64 offset="0" name="BUFFER_BASE" type="waddress" align="32"/> <reg32 offset="2" name="BUFFER_SIZE" low="2" high="31" shr="2"/> <reg32 offset="3" name="BUFFER_STRIDE" low="0" high="9" shr="2"/> @@ -2077,12 +2181,13 @@ by a particular renderpass/blit. <reg64 offset="5" name="FLUSH_BASE" type="waddress" align="32"/> </array> - <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" usage="cmd"> + <bitset name="a6xx_vpc_replace_mode_cntl" inline="yes"> <bitfield name="INVERT" pos="0" type="boolean"/> - </reg32> - <!-- 0x9237-0x92ff invalid --> - <!-- always 0x0 ? --> - <reg32 offset="0x9300" name="VPC_UNKNOWN_9300" low="0" high="2" usage="cmd"/> + </bitset> + + <reg32 offset="0x9236" name="VPC_REPLACE_MODE_CNTL" type="a6xx_vpc_replace_mode_cntl" variants="A6XX-A7XX" usage="cmd"/> + + <reg32 offset="0x9300" name="VPC_ROTATION_CNTL" low="0" high="2" variants="A6XX-A7XX" usage="cmd"/> <bitset name="a6xx_vpc_xs_cntl" inline="yes"> <doc> @@ -2101,11 +2206,12 @@ by a particular renderpass/blit. </doc> </bitfield> </bitset> - <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9304" name="VPC_PS_CNTL" usage="rp_blit"> + <reg32 offset="0x9301" name="VPC_VS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9302" name="VPC_GS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9303" name="VPC_DS_CNTL" type="a6xx_vpc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_ps_cntl" inline="yes"> <bitfield name="NUMNONPOSVAR" low="0" high="7" type="uint"/> <!-- for fixed-function (i.e. no GS) gl_PrimitiveID in FS --> <bitfield name="PRIMIDLOC" low="8" high="15" type="uint"/> @@ -2122,9 +2228,11 @@ by a particular renderpass/blit. ViewID through the VS. </doc> </bitfield> - </reg32> + </bitset> - <reg32 offset="0x9305" name="VPC_SO_CNTL" usage="rp_blit"> + <reg32 offset="0x9304" name="VPC_PS_CNTL" type="a6xx_vpc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_vpc_so_cntl" inline="yes"> <!-- It's offset by 1, and 0 means "disabled" --> @@ -2133,22 +2241,28 @@ by a particular renderpass/blit. <bitfield name="BUF2_STREAM" low="6" high="8" type="uint"/> <bitfield name="BUF3_STREAM" low="9" high="11" type="uint"/> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> - </reg32> - <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" usage="rp_blit"> + </bitset> + + <reg32 offset="0x9305" name="VPC_SO_CNTL" type="a6xx_vpc_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_so_override" inline="yes"> <bitfield name="DISABLE" pos="0" type="boolean"/> - </reg32> - <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" variants="A6XX-" usage="rp_blit"> <!-- A702 + A7xx --> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit"> - <bitfield name="SIZE_GMEM" low="0" high="31"/> - </reg32> - <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX-" usage="rp_blit"> - <bitfield name="BASE_GMEM" low="0" high="31"/> - </reg32> - <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX-" usage="rp_blit"> - <bitfield name="SIZE_GMEM" low="0" high="31"/> - </reg32> + </bitset> + + <reg32 offset="0x9306" name="VPC_SO_OVERRIDE" type="a6xx_so_override" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9807" name="PC_DGEN_SO_OVERRIDE" type="a6xx_so_override" variants="A7XX" usage="rp_blit"/> + + <reg32 offset="0x9307" name="VPC_PS_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9308" name="VPC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/> + <reg32 offset="0x9309" name="VPC_ATTR_BUF_GMEM_BASE" variants="A7XX" type="uint" usage="rp_blit"/> + + <reg32 offset="0x9b09" name="PC_ATTR_BUF_GMEM_SIZE" variants="A7XX" type="uint" usage="rp_blit"/> + + <reg32 offset="0x930a" name="VPC_UNKNOWN_930A" variants="A7XX"/> + + <reg32 offset="0x960a" name="VPC_FLATSHADE_MODE_CNTL" variants="A7XX"/> <!-- 0x9307-0x95ff invalid --> @@ -2163,52 +2277,62 @@ by a particular renderpass/blit. <!-- TODO: regs from 0x9624-0x963a --> <!-- 0x963b-0x97ff invalid --> - <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" usage="rp_blit"/> + <reg32 offset="0x9800" name="PC_HS_PARAM_0" low="0" high="5" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <!-- always 0x0 ? --> - <reg32 offset="0x9801" name="PC_HS_PARAM_1" usage="rp_blit"> + <bitset name="a6xx_pc_hs_param_1" inline="yes"> <bitfield name="SIZE" low="0" high="10" type="uint"/> <bitfield name="UNK13" pos="13"/> - </reg32> + </bitset> + + <reg32 offset="0x9801" name="PC_HS_PARAM_1" type="a6xx_pc_hs_param_1" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9802" name="PC_DS_PARAM" usage="rp_blit"> + <bitset name="a6xx_pc_ds_param" inline="yes"> <bitfield name="SPACING" low="0" high="1" type="a6xx_tess_spacing"/> <bitfield name="OUTPUT" low="2" high="3" type="a6xx_tess_output"/> - </reg32> + </bitset> + + <reg32 offset="0x9802" name="PC_DS_PARAM" type="a6xx_pc_ds_param" variants="A6XX-A7XX" usage="rp_blit"/> + + <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9803" name="PC_RESTART_INDEX" low="0" high="31" type="uint" usage="rp_blit"/> - <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" usage="rp_blit"/> + <reg32 offset="0x9804" name="PC_MODE_CNTL" low="0" high="7" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x9805" name="PC_POWER_CNTL" low="0" high="2" usage="rp_blit"/> - <reg32 offset="0x9806" name="PC_PS_CNTL" usage="rp_blit"> + <bitset name="a6xx_pc_ps_cntl" inline="yes"> <bitfield name="PRIMITIVEIDEN" pos="0" type="boolean"/> - </reg32> + </bitset> - <!-- New in a6xx gen3+ --> - <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" usage="rp_blit"> + <reg32 offset="0x9806" name="PC_PS_CNTL" type="a6xx_pc_ps_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + + <bitset name="a6xx_pc_dgen_so_cntl" inline="yes"> <bitfield name="STREAM_ENABLE" low="15" high="18" type="hex"/> - </reg32> + </bitset> + + <!-- New in a6xx gen3+ --> + <reg32 offset="0x9808" name="PC_DGEN_SO_CNTL" type="a6xx_pc_dgen_so_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL"> + <bitset name="a6xx_pc_dgen_su_conservative_ras_cntl" inline="yes"> <bitfield name="CONSERVATIVERASEN" pos="0" type="boolean"/> - </reg32> - <!-- 0x980b-0x983f invalid --> + </bitset> + + <reg32 offset="0x980a" name="PC_DGEN_SU_CONSERVATIVE_RAS_CNTL" type="a6xx_pc_dgen_su_conservative_ras_cntl" variants="A6XX-A7XX"/> <!-- 0x9840 - 0x9842 are not readable --> - <reg32 offset="0x9840" name="PC_DRAW_INITIATOR"> + <bitset name="a6xx_draw_initiator" inline="yes"> <bitfield name="STATE_ID" low="0" high="7"/> - </reg32> + </bitset> - <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR"> - <bitfield name="STATE_ID" low="0" high="7"/> - </reg32> + <reg32 offset="0x9840" name="PC_DRAW_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/> + <reg32 offset="0x9841" name="PC_KERNEL_INITIATOR" type="a6xx_draw_initiator" variants="A6XX-A7XX"/> - <reg32 offset="0x9842" name="PC_EVENT_INITIATOR"> + <bitset name="a6xx_event_initiator" inline="yes"> <!-- I think only the low bit is actually used? --> <bitfield name="STATE_ID" low="16" high="23"/> <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/> - </reg32> + </bitset> + + <reg32 offset="0x9842" name="PC_EVENT_INITIATOR" type="a6xx_event_initiator" variants="A6XX-A7XX"/> <!-- 0x9880 written in a lot of places by SQE, same value gets written @@ -2219,45 +2343,21 @@ by a particular renderpass/blit. <!-- 0x9843-0x997f invalid --> - <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" variants="A6XX" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" variants="A7XX-" usage="rp_blit"> - <bitfield name="MODE" low="0" high="1" type="a6xx_polygon_mode"/> - </reg32> - - <reg32 offset="0x9980" name="VPC_RAST_STREAM_CNTL" variants="A6XX" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> - <!-- VPC_RAST_STREAM_CNTL --> - <reg32 offset="0x9107" name="VPC_RAST_STREAM_CNTL" variants="A7XX-" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> - <reg32 offset="0x9317" name="VPC_RAST_STREAM_CNTL_V2" variants="A7XX-" usage="rp_blit"> - <!-- which stream to send to GRAS --> - <bitfield name="STREAM" low="0" high="1" type="uint"/> - <!-- discard primitives before rasterization --> - <bitfield name="DISCARD" pos="2" type="boolean"/> - </reg32> + <reg32 offset="0x9981" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A6XX" usage="rp_blit"/> + <reg32 offset="0x9809" name="PC_DGEN_RAST_CNTL" type="a6xx_rast_cntl" variants="A7XX" usage="rp_blit"/> <!-- Both are a750+. Probably needed to correctly overlap execution of several draws. --> - <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x9885" name="PC_HS_BUFFER_SIZE" variants="A7XX" usage="cmd"/> <!-- Blob adds a bit more space {0x10, 0x20, 0x30, 0x40} bytes, but the meaning of this additional space is not known. --> - <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX-" usage="cmd"/> + <reg32 offset="0x9886" name="PC_TF_BUFFER_SIZE" variants="A7XX" usage="cmd"/> <!-- 0x9982-0x9aff invalid --> - <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" usage="rp_blit"/> + <reg32 offset="0x9b00" name="PC_CNTL" type="a6xx_pc_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <bitset name="a6xx_pc_xs_cntl" inline="yes"> <doc> @@ -2270,18 +2370,18 @@ by a particular renderpass/blit. <bitfield name="LAYER" pos="9" type="boolean"/> <bitfield name="VIEW" pos="10" type="boolean"/> <!-- note: PC_VS_CNTL doesn't have the PRIMITIVE_ID bit --> + <!-- since HS can't output anything, only PRIMITIVE_ID is valid --> <bitfield name="PRIMITIVE_ID" pos="11" type="boolean"/> <bitfield name="CLIP_MASK" low="16" high="23" type="uint"/> <bitfield name="SHADINGRATE" pos="24" type="boolean" variants="A7XX-"/> </bitset> - <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <!-- since HS can't output anything, only PRIMITIVE_ID is valid --> - <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> - <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" usage="rp_blit"/> + <reg32 offset="0x9b01" name="PC_VS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b02" name="PC_GS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b03" name="PC_HS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> + <reg32 offset="0x9b04" name="PC_DS_CNTL" type="a6xx_pc_xs_cntl" variants="A6XX-A7XX" usage="rp_blit"/> - <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" usage="rp_blit"/> + <reg32 offset="0x9b05" name="PC_GS_PARAM_0" type="a6xx_gs_param_0" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0x9b06" name="PC_PRIMITIVE_CNTL_6" variants="A6XX" usage="rp_blit"> <doc> @@ -2290,9 +2390,9 @@ by a particular renderpass/blit. <bitfield name="STRIDE_IN_VPC" low="0" high="10" type="uint"/> </reg32> - <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" usage="rp_blit"/> + <reg32 offset="0x9b07" name="PC_STEREO_RENDERING_CNTL" type="a6xx_stereo_rendering_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <!-- mask of enabled views, doesn't exist on A630 --> - <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" usage="rp_blit"/> + <reg32 offset="0x9b08" name="PC_STEREO_RENDERING_VIEWMASK" type="hex" low="0" high="15" variants="A6XX-A7XX" usage="rp_blit"/> <!-- 0x9b09-0x9bff invalid --> <reg32 offset="0x9c00" name="PC_2D_EVENT_CMD"> <!-- special register (but note first 8 bits can be written/read) --> @@ -2303,34 +2403,39 @@ by a particular renderpass/blit. <!-- TODO: 0x9e00-0xa000 range incomplete --> <reg32 offset="0x9e00" name="PC_DBG_ECO_CNTL"/> <reg32 offset="0x9e01" name="PC_ADDR_MODE_CNTL" type="a5xx_address_mode"/> - <reg64 offset="0x9e04" name="PC_DMA_BASE"/> - <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint"/> - <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint"/> + <reg64 offset="0x9e04" name="PC_DMA_BASE" type="address" variants="A6XX-A7XX"/> + <reg32 offset="0x9e06" name="PC_DMA_OFFSET" type="uint" variants="A6XX-A7XX"/> + <reg32 offset="0x9e07" name="PC_DMA_SIZE" type="uint" variants="A6XX-A7XX"/> + <reg64 offset="0x9e08" name="PC_TESS_BASE" variants="A6XX" type="waddress" align="32" usage="cmd"/> - <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX-" type="waddress" align="32" usage="cmd"/> + <reg64 offset="0x9810" name="PC_TESS_BASE" variants="A7XX" type="waddress" align="32" usage="cmd"/> - <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx"> + <reg32 offset="0x9e0b" name="PC_DRAWCALL_CNTL" type="vgt_draw_initiator_a4xx" variants="A6XX-A7XX"> <doc> Possibly not really "initiating" the draw but the layout is similar to VGT_DRAW_INITIATOR on older gens </doc> </reg32> - <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint"/> - <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint"/> + <reg32 offset="0x9e0c" name="PC_DRAWCALL_INSTANCE_NUM" type="uint" variants="A6XX-A7XX"/> + <reg32 offset="0x9e0d" name="PC_DRAWCALL_SIZE" type="uint" variants="A6XX-A7XX"/> <!-- These match the contents of CP_SET_BIN_DATA (not written directly) --> - <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL"> + <bitset name="a6xx_pc_vis_stream_cntl" inline="yes"> <bitfield name="UNK0" low="0" high="15"/> <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> <bitfield name="VSC_N" low="22" high="26" type="uint"/> - </reg32> - <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32"/> - <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32"/> + </bitset> + + <reg32 offset="0x9e11" name="PC_VIS_STREAM_CNTL" type="a6xx_pc_vis_stream_cntl" variants="A6XX-A7XX"/> + <reg64 offset="0x9e12" name="PC_PVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/> + <reg64 offset="0x9e14" name="PC_DVIS_STREAM_BIN_BASE" type="waddress" align="32" variants="A6XX-A7XX"/> - <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE"> + <bitset name="a6xx_pc_drawcall_cntl_override" inline="yes"> <doc>Written by CP_SET_VISIBILITY_OVERRIDE handler</doc> <bitfield name="OVERRIDE" pos="0" type="boolean"/> - </reg32> + </bitset> + + <reg32 offset="0x9e1c" name="PC_DRAWCALL_CNTL_OVERRIDE" type="a6xx_pc_drawcall_cntl_override" variants="A6XX-A7XX"/> <reg32 offset="0x9e24" name="PC_UNKNOWN_9E24" variants="A7XX-" usage="cmd"/> @@ -2936,7 +3041,7 @@ by a particular renderpass/blit. <reg32 offset="0xa9b3" name="SP_CS_PROGRAM_COUNTER_OFFSET" type="uint" usage="cmd"/> <reg64 offset="0xa9b4" name="SP_CS_BASE" type="address" align="32" usage="cmd"/> <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param" usage="cmd"/> - <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" align="32" usage="cmd"/> + <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_BASE" type="waddress" align="32" usage="cmd"/> <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size" usage="cmd"/> <reg32 offset="0xa9ba" name="SP_CS_TSIZE" low="0" high="7" type="uint" usage="cmd"/> <reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config" usage="cmd"/> @@ -3021,7 +3126,7 @@ by a particular renderpass/blit. UAV state for compute shader: --> <reg64 offset="0xa9f2" name="SP_CS_UAV_BASE" type="address" align="16" variants="A6XX"/> - <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX"/> + <reg64 offset="0xa9f8" name="SP_CS_UAV_BASE" type="address" align="16" variants="A7XX-"/> <reg32 offset="0xaa00" name="SP_CS_USIZE" low="0" high="6" type="uint"/> <!-- Correlated with avgs/uvgs usage in FS --> @@ -3104,14 +3209,19 @@ by a particular renderpass/blit. instructions VS/HS/DS/GS/FS. See SP_CS_UAV_BASE_* for compute shaders. --> <reg64 offset="0xab1a" name="SP_GFX_UAV_BASE" type="address" align="16" usage="cmd"/> - <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" usage="cmd"/> + <reg32 offset="0xab20" name="SP_GFX_USIZE" low="0" high="6" type="uint" variants="A6XX-A7XX" usage="cmd"/> - <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX-" usage="cmd"/> + <reg32 offset="0xab22" name="SP_UNKNOWN_AB22" variants="A7XX" usage="cmd"/> + + <enum name="a6xx_sp_a2d_output_ifmt_type"> + <value name="OUTPUT_IFMT_2D_FLOAT" value="0"/> + <value name="OUTPUT_IFMT_2D_SINT" value="1"/> + <value name="OUTPUT_IFMT_2D_UINT" value="2"/> + </enum> <bitset name="a6xx_sp_a2d_output_info" inline="yes"> - <bitfield name="NORM" pos="0" type="boolean"/> - <bitfield name="SINT" pos="1" type="boolean"/> - <bitfield name="UINT" pos="2" type="boolean"/> + <bitfield name="HALF_PRECISION" pos="0" type="boolean"/> + <bitfield name="IFMT_TYPE" low="1" high="2" type="a6xx_sp_a2d_output_ifmt_type"/> <!-- looks like HW only cares about the base type of this format, which matches the ifmt? --> <bitfield name="COLOR_FORMAT" low="3" high="10" type="a6xx_format"/> @@ -3156,7 +3266,7 @@ by a particular renderpass/blit. <reg32 offset="0xae6b" name="SP_UNKNOWN_AE6B" variants="A7XX-" usage="cmd"/> <reg32 offset="0xae6c" name="SP_HLSQ_DBG_ECO_CNTL" variants="A7XX-" usage="cmd"/> <reg32 offset="0xae6d" name="SP_READ_SEL" variants="A7XX-"> - <bitfield name="LOCATION" low="18" high="19" type="a7xx_state_location"/> + <bitfield name="LOCATION" low="18" high="20" type="a7xx_state_location"/> <bitfield name="PIPE" low="16" high="17" type="a7xx_pipe"/> <bitfield name="STATETYPE" low="8" high="15" type="a7xx_statetype_id"/> <bitfield name="USPTP" low="4" high="7"/> @@ -3192,7 +3302,7 @@ by a particular renderpass/blit. <!-- looks to work in the same way as a5xx: --> <reg64 offset="0xb302" name="TPL1_GFX_BORDER_COLOR_BASE" type="address" align="128" usage="cmd"/> - <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" usage="rp_blit"/> + <reg32 offset="0xb304" name="TPL1_MSAA_SAMPLE_POS_CNTL" type="a6xx_msaa_sample_pos_cntl" variants="A6XX-A7XX" usage="rp_blit"/> <reg32 offset="0xb305" name="TPL1_PROGRAMMABLE_MSAA_POS_0" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> <reg32 offset="0xb306" name="TPL1_PROGRAMMABLE_MSAA_POS_1" type="a6xx_programmable_msaa_pos" usage="rp_blit"/> <reg32 offset="0xb307" name="TPL1_WINDOW_OFFSET" type="a6xx_reg_xy" usage="rp_blit"/> @@ -3232,12 +3342,12 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0xb2c0" name="TPL1_A2D_SRC_TEXTURE_INFO" type="a6xx_a2d_src_texture_info" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX"> + <reg32 offset="0xb2c1" name="TPL1_A2D_SRC_TEXTURE_SIZE" variants="A7XX-"> <bitfield name="WIDTH" low="0" high="14" type="uint"/> <bitfield name="HEIGHT" low="15" high="29" type="uint"/> </reg32> <reg64 offset="0xb2c2" name="TPL1_A2D_SRC_TEXTURE_BASE" type="address" align="16" variants="A7XX-" usage="rp_blit"/> - <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX"> + <reg32 offset="0xb2c4" name="TPL1_A2D_SRC_TEXTURE_PITCH" variants="A7XX-"> <!-- Bits from 3..9 must be zero unless 'TPL1_A2D_BLT_CNTL::TYPE' is A6XX_TEX_IMG_BUFFER, which allows for lower alignment. @@ -3270,13 +3380,13 @@ by a particular renderpass/blit. <reg32 offset="0xb2ce" name="SP_PS_UNKNOWN_B4CE" low="0" high="31" variants="A7XX"/> <reg32 offset="0xb2cf" name="SP_PS_UNKNOWN_B4CF" low="0" high="30" variants="A7XX"/> <reg32 offset="0xb2d0" name="SP_PS_UNKNOWN_B4D0" low="0" high="29" variants="A7XX"/> - <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX"/> + <reg32 offset="0xb2d1" name="TPL1_A2D_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-"/> <reg32 offset="0xb2d2" name="TPL1_A2D_BLT_CNTL" variants="A7XX-" usage="rp_blit"> <bitfield name="RAW_COPY" pos="0" type="boolean"/> <bitfield name="START_OFFSET_TEXELS" low="16" high="21"/> <bitfield name="TYPE" low="29" high="31" type="a6xx_tex_type"/> </reg32> - <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX-" usage="rp_blit"/> + <reg32 offset="0xab21" name="SP_WINDOW_OFFSET" type="a6xx_reg_xy" variants="A7XX" usage="rp_blit"/> <!-- always 0x100000 or 0x1000000? --> <reg32 offset="0xb600" name="TPL1_DBG_ECO_CNTL" low="0" high="25" usage="cmd"/> @@ -3296,17 +3406,13 @@ by a particular renderpass/blit. </reg32> <reg32 offset="0xb605" name="TPL1_UNKNOWN_B605" low="0" high="7" type="uint" variants="A6XX" usage="cmd"/> <!-- always 0x0 or 0x44 ? --> - <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A6XX"/> - <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A6XX"/> + <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A6XX"> + <reg32 offset="0" name="REG" low="0" high="29"/> + </array> - <reg32 offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE_0" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb609" name="TPL1_BICUBIC_WEIGHTS_TABLE_1" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60a" name="TPL1_BICUBIC_WEIGHTS_TABLE_2" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60b" name="TPL1_BICUBIC_WEIGHTS_TABLE_3" low="0" high="29" variants="A7XX" usage="cmd"/> - <reg32 offset="0xb60c" name="TPL1_BICUBIC_WEIGHTS_TABLE_4" low="0" high="29" variants="A7XX" usage="cmd"/> + <array offset="0xb608" name="TPL1_BICUBIC_WEIGHTS_TABLE" stride="1" length="5" variants="A7XX"> + <reg32 offset="0" name="REG" low="0" high="29" usage="cmd"/> + </array> <array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="12" variants="A6XX"/> <array offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="18" variants="A7XX"/> @@ -3638,7 +3744,7 @@ by a particular renderpass/blit. <reg32 offset="0xbb10" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A6XX" usage="rp_blit"/> <reg32 offset="0xab03" name="SP_PS_CONST_CONFIG" type="a6xx_xs_const_config" variants="A7XX-" usage="rp_blit"/> - <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX_0" stride="1" length="64" variants="A7XX-"/> + <array offset="0xab40" name="SP_SHARED_CONSTANT_GFX" stride="1" length="64" variants="A7XX"/> <reg32 offset="0xbb11" name="HLSQ_SHARED_CONSTS" variants="A6XX" usage="cmd"> <doc> @@ -3800,7 +3906,7 @@ by a particular renderpass/blit. <reg32 offset="0x0030" name="CFG_DBGBUS_TRACE_BUF2"/> </domain> -<domain name="A7XX_CX_DBGC" width="32"> +<domain name="A7XX_CX_DBGC" width="32" varset="chip"> <!-- Bitfields shifted, but otherwise the same: --> <reg32 offset="0x0000" name="CFG_DBGBUS_SEL_A" variants="A7XX-"> <bitfield high="7" low="0" name="PING_INDEX"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml index 307d43dda8a2..56cfaff614a4 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_descriptors.xml @@ -9,38 +9,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <domain name="A6XX_TEX_SAMP" width="32"> <doc>Texture sampler dwords</doc> - <enum name="a6xx_tex_filter"> <!-- same as a4xx? --> - <value name="A6XX_TEX_NEAREST" value="0"/> - <value name="A6XX_TEX_LINEAR" value="1"/> - <value name="A6XX_TEX_ANISO" value="2"/> - <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only --> - </enum> - <enum name="a6xx_tex_clamp"> <!-- same as a4xx? --> - <value name="A6XX_TEX_REPEAT" value="0"/> - <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/> - <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/> - <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/> - <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/> - </enum> - <enum name="a6xx_tex_aniso"> <!-- same as a4xx? --> - <value name="A6XX_TEX_ANISO_1" value="0"/> - <value name="A6XX_TEX_ANISO_2" value="1"/> - <value name="A6XX_TEX_ANISO_4" value="2"/> - <value name="A6XX_TEX_ANISO_8" value="3"/> - <value name="A6XX_TEX_ANISO_16" value="4"/> - </enum> - <enum name="a6xx_reduction_mode"> - <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/> - <value name="A6XX_REDUCTION_MODE_MIN" value="1"/> - <value name="A6XX_REDUCTION_MODE_MAX" value="2"/> - </enum> - <enum name="a6xx_fast_border_color"> - <!-- R B G A --> - <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/> - <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/> - <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/> - <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/> - </enum> <reg32 offset="0" name="0"> <bitfield name="MIPFILTER_LINEAR_NEAR" pos="0" type="boolean"/> @@ -79,14 +47,6 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <domain name="A6XX_TEX_CONST" width="32" varset="chip"> <doc>Texture constant dwords</doc> - <enum name="a6xx_tex_swiz"> <!-- same as a4xx? --> - <value name="A6XX_TEX_X" value="0"/> - <value name="A6XX_TEX_Y" value="1"/> - <value name="A6XX_TEX_Z" value="2"/> - <value name="A6XX_TEX_W" value="3"/> - <value name="A6XX_TEX_ZERO" value="4"/> - <value name="A6XX_TEX_ONE" value="5"/> - </enum> <reg32 offset="0" name="0"> <bitfield name="TILE_MODE" low="0" high="1" type="a6xx_tile_mode"/> <bitfield name="SRGB" pos="2" type="boolean"/> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml index 665539b098c6..4e42f055b85f 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_enums.xml @@ -320,14 +320,14 @@ to upconvert to 32b float internally? 16b float: 3 --> <enum name="a6xx_2d_ifmt"> - <value value="0x10" name="R2D_UNORM8"/> <value value="0x7" name="R2D_INT32"/> <value value="0x6" name="R2D_INT16"/> <value value="0x5" name="R2D_INT8"/> <value value="0x4" name="R2D_FLOAT32"/> <value value="0x3" name="R2D_FLOAT16"/> + <value value="0x2" name="R2D_SNORM8"/> <value value="0x1" name="R2D_UNORM8_SRGB"/> - <value value="0x0" name="R2D_RAW"/> + <value value="0x0" name="R2D_UNORM8"/> </enum> <enum name="a6xx_tex_type"> @@ -380,4 +380,50 @@ to upconvert to 32b float internally? <value value="0x3" name="TESS_CCW_TRIS"/> </enum> +<enum name="a6xx_tex_filter"> <!-- same as a4xx? --> + <value name="A6XX_TEX_NEAREST" value="0"/> + <value name="A6XX_TEX_LINEAR" value="1"/> + <value name="A6XX_TEX_ANISO" value="2"/> + <value name="A6XX_TEX_CUBIC" value="3"/> <!-- a650 only --> +</enum> + +<enum name="a6xx_tex_clamp"> <!-- same as a4xx? --> + <value name="A6XX_TEX_REPEAT" value="0"/> + <value name="A6XX_TEX_CLAMP_TO_EDGE" value="1"/> + <value name="A6XX_TEX_MIRROR_REPEAT" value="2"/> + <value name="A6XX_TEX_CLAMP_TO_BORDER" value="3"/> + <value name="A6XX_TEX_MIRROR_CLAMP" value="4"/> +</enum> + +<enum name="a6xx_tex_aniso"> <!-- same as a4xx? --> + <value name="A6XX_TEX_ANISO_1" value="0"/> + <value name="A6XX_TEX_ANISO_2" value="1"/> + <value name="A6XX_TEX_ANISO_4" value="2"/> + <value name="A6XX_TEX_ANISO_8" value="3"/> + <value name="A6XX_TEX_ANISO_16" value="4"/> +</enum> + +<enum name="a6xx_reduction_mode"> + <value name="A6XX_REDUCTION_MODE_AVERAGE" value="0"/> + <value name="A6XX_REDUCTION_MODE_MIN" value="1"/> + <value name="A6XX_REDUCTION_MODE_MAX" value="2"/> +</enum> + +<enum name="a6xx_fast_border_color"> + <!-- R B G A --> + <value name="A6XX_BORDER_COLOR_0_0_0_0" value="0"/> + <value name="A6XX_BORDER_COLOR_0_0_0_1" value="1"/> + <value name="A6XX_BORDER_COLOR_1_1_1_0" value="2"/> + <value name="A6XX_BORDER_COLOR_1_1_1_1" value="3"/> +</enum> + +<enum name="a6xx_tex_swiz"> <!-- same as a4xx? --> + <value name="A6XX_TEX_X" value="0"/> + <value name="A6XX_TEX_Y" value="1"/> + <value name="A6XX_TEX_Z" value="2"/> + <value name="A6XX_TEX_W" value="3"/> + <value name="A6XX_TEX_ZERO" value="4"/> + <value name="A6XX_TEX_ONE" value="5"/> +</enum> + </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml index 3d2cc339b8f1..b15a242d974d 100644 --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml @@ -99,6 +99,10 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <bitfield name="GX_HM_GDSC_POWER_OFF" pos="6" type="boolean"/> <bitfield name="GX_HM_CLK_OFF" pos="7" type="boolean"/> </reg32> + <reg32 offset="0x50d0" name="GMU_SPTPRAC_PWR_CLK_STATUS" variants="A7XX"> + <bitfield name="GX_HM_GDSC_POWER_OFF" pos="0" type="boolean"/> + <bitfield name="GX_HM_CLK_OFF" pos="1" type="boolean"/> + </reg32> <reg32 offset="0x50e4" name="GMU_GPU_NAP_CTRL"> <bitfield name="HW_NAP_ENABLE" pos="0"/> <bitfield name="SID" low="4" high="8"/> @@ -127,6 +131,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x5088" name="GMU_ALWAYS_ON_COUNTER_L"/> <reg32 offset="0x5089" name="GMU_ALWAYS_ON_COUNTER_H"/> <reg32 offset="0x50c3" name="GMU_GMU_PWR_COL_KEEPALIVE"/> + <reg32 offset="0x50c4" name="GMU_PWR_COL_PREEMPT_KEEPALIVE"/> <reg32 offset="0x5180" name="GMU_HFI_CTRL_STATUS"/> <reg32 offset="0x5181" name="GMU_HFI_VERSION_INFO"/> <reg32 offset="0x5182" name="GMU_HFI_SFR_ADDR"/> @@ -228,6 +233,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x03ee" name="RSCC_TCS1_DRV0_STATUS"/> <reg32 offset="0x0496" name="RSCC_TCS2_DRV0_STATUS"/> <reg32 offset="0x053e" name="RSCC_TCS3_DRV0_STATUS"/> + <reg32 offset="0x05e6" name="RSCC_TCS4_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x068e" name="RSCC_TCS5_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x0736" name="RSCC_TCS6_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x07de" name="RSCC_TCS7_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x0886" name="RSCC_TCS8_DRV0_STATUS" variants="A7XX"/> + <reg32 offset="0x092e" name="RSCC_TCS9_DRV0_STATUS" variants="A7XX"/> </domain> </database> diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 7abc08635495..0e10e1c6d263 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -120,12 +120,12 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="LRZ_FLUSH" value="38" variants="A5XX-"/> <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/> <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/> - <value name="UNK_40" value="40" variants="A7XX"/> + <value name="LRZ_CACHE_INVALIDATE" value="40" variants="A7XX"/> <value name="LRZ_Q_CACHE_INVALIDATE" value="41" variants="A7XX"/> <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/> <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/> - <value name="UNK_2C" value="44" variants="A5XX-"/> - <value name="UNK_2D" value="45" variants="A5XX-"/> + <value name="VSC_BINNING_START" value="44" variants="A5XX-"/> + <value name="VSC_BINNING_END" value="45" variants="A5XX-"/> <!-- a6xx events --> <doc> @@ -523,7 +523,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <!-- Seems to set the mode flags which control which CP_SET_DRAW_STATE packets are executed, based on their ENABLE_MASK values - + CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE packets w/ ENABLE_MASK & 0x6 to execute immediately --> @@ -640,8 +640,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/> <doc> Clears, adds to local, or adds to global timestamp </doc> <value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/> - <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? --> - <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/> + <value name="CP_NON_CONTEXT_REG_BUNCH" value="0x5d" variants="A7XX-"/> <doc> Write to a scratch memory that is read by CP_REG_TEST with SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers. @@ -918,12 +917,6 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> <stripe varset="chip" variants="A5XX-"> - <reg32 offset="4" name="4"> - <bitfield name="INDX_BASE_LO" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="INDX_BASE_HI" low="0" high="31"/> - </reg32> <reg64 offset="4" name="INDX_BASE" type="address"/> <reg32 offset="6" name="6"> <!-- max # of elements in index buffer --> @@ -1099,8 +1092,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> - <bitfield name="GROUP_ID" low="24" high="28" type="uint"/> + <!-- high bit is 28 until a750: --> + <bitfield name="GROUP_ID" low="24" high="29" type="uint"/> </reg32> + <reg64 offset="1" name="ADDR" type="address"/> <reg32 offset="1" name="1"> <bitfield name="ADDR_LO" low="0" high="31" type="hex"/> </reg32> @@ -1166,26 +1161,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> <stripe varset="a7xx_abs_mask_mode" variants="NO_ABS_MASK"> <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> - <reg32 offset="1" name="1"> - <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="BIN_DATA_ADDR" type="address"/> <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> - <reg32 offset="3" name="3"> - <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> - </reg32> + <reg64 offset="3" name="BIN_SIZE_ADDR" type="address"/> <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> - <reg32 offset="5" name="5"> - <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> - </reg32> - <reg32 offset="6" name="6"> - <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> - </reg32> + <reg64 offset="5" name="BIN_PRIM_STRM" type="address"/> <!-- a7xx adds a few more addresses to the end of the pkt --> @@ -1195,26 +1175,11 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <stripe varset="a7xx_abs_mask_mode" variants="ABS_MASK"> <reg32 offset="1" name="ABS_MASK"/> <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> - <reg32 offset="2" name="2"> - <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="2" name="BIN_DATA_ADDR" type="address"/> <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> - <reg32 offset="4" name="4"> - <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> - </reg32> - <reg32 offset="5" name="5"> - <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> - </reg32> + <reg64 offset="4" name="BIN_SIZE_ADDR" type="address"/> <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> - <reg32 offset="6" name="6"> - <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> - </reg32> - <reg32 offset="7" name="7"> - <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> - </reg32> + <reg64 offset="6" name="BIN_PRIM_STRM" type="address"/> <!-- a7xx adds a few more addresses to the end of the pkt --> @@ -1300,7 +1265,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> -<domain name="CP_REG_TO_MEM" width="32"> +<domain name="CP_REG_TO_MEM" width="32" prefix="chip"> <reg32 offset="0" name="0"> <bitfield name="REG" low="0" high="17" type="hex"/> <!-- number of registers/dwords copied is max(CNT, 1). --> @@ -1308,12 +1273,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="1" name="DEST" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="1" name="DEST" type="address"/> + </stripe> </domain> <domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> @@ -1329,12 +1294,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="DEST" type="waddress"/> <reg32 offset="3" name="3"> <bitfield name="OFFSET0" low="0" high="17" type="hex"/> <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> @@ -1354,18 +1314,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="DEST" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="DEST_HI" low="0" high="31"/> - </reg32> - <reg32 offset="3" name="3"> - <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="4" name="4"> - <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="DEST" type="waddress"/> + <reg64 offset="3" name="OFFSET" type="waddress"/> </domain> <domain name="CP_MEM_TO_REG" width="32"> @@ -1378,12 +1328,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- does the same thing as CP_MEM_TO_MEM::UNK31 --> <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="SRC" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> - <bitfield name="SRC_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="1" name="SRC" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="1" name="SRC" type="address"/> + </stripe> </domain> <domain name="CP_MEM_TO_MEM" width="32"> @@ -1403,6 +1353,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- some other kind of wait --> <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> + <reg64 offset="1" name="DST" type="waddress"/> + <reg64 offset="3" name="SRC_A" type="address"/> + <reg64 offset="5" name="SRC_B" type="address"/> + <reg64 offset="7" name="SRC_C" type="address"/> <!-- followed by sequence of addresses.. the first is the destination and the rest are N src addresses which are @@ -1461,12 +1415,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </domain> <domain name="CP_MEM_WRITE" width="32"> - <reg32 offset="0" name="0"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <stripe varset="chip" variants="A2XX-A4XX"> + <reg32 offset="0" name="ADDR" type="address"/> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg64 offset="0" name="ADDR" type="address"/> + </stripe> <!-- followed by the DWORDs to write --> </domain> @@ -1518,24 +1472,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> <reg32 offset="4" name="4"> <bitfield name="MASK" low="0" high="31"/> </reg32> - <reg32 offset="5" name="5"> - <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="6" name="6"> - <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="5" name="WRITE_ADDR" type="waddress"/> <reg32 offset="7" name="7"> <bitfield name="WRITE_DATA" low="0" high="31"/> </reg32> @@ -1550,12 +1494,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- Reserved for flags, presumably? Unused in FW --> <bitfield name="RESERVED" low="0" high="31" type="hex"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> @@ -1573,12 +1512,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> - <reg32 offset="1" name="1"> - <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> - </reg32> + <reg64 offset="1" name="POLL_ADDR" type="address"/> <reg32 offset="3" name="3"> <bitfield name="REF" low="0" high="31"/> </reg32> @@ -1712,12 +1646,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) TODO what is gpuaddr for, seems to be all 0's.. maybe needed for context switch? --> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_0_LO" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR_0_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="ADDR" type="waddress"/> <reg32 offset="3" name="3"> <!-- ??? --> </reg32> @@ -1832,9 +1761,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> </reg32> <stripe varset="chip" variants="A4XX"> - <reg32 offset="1" name="1"> - <bitfield name="ADDR" low="0" high="31"/> - </reg32> + <reg32 offset="1" name="ADDR" type="address"/> <reg32 offset="2" name="2"> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> @@ -1843,12 +1770,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </stripe> <stripe varset="chip" variants="A5XX-"> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="2" name="2"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <reg64 offset="1" name="ADDR" type="address"/> <reg32 offset="3" name="3"> <!-- localsize is value minus one: --> <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> @@ -2161,12 +2083,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </doc> </value> </enum> - <reg32 offset="0" name="0"> - <bitfield name="ADDR_LO" low="0" high="31"/> - </reg32> - <reg32 offset="1" name="1"> - <bitfield name="ADDR_HI" low="0" high="31"/> - </reg32> + <reg64 offset="0" name="ADDR" type="address"/> <reg32 offset="2" name="2"> <bitfield name="DWORDS" low="0" high="19" type="uint"/> <bitfield name="TYPE" low="20" high="21" type="amble_type"/> diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index 4e5ac0f25dea..f41516dd0567 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -22,7 +22,16 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00018" name="GLBL_CTRL"/> <reg32 offset="0x0001c" name="RBUF_CTRL"/> <reg32 offset="0x00020" name="VREG_CTRL_0"/> - <reg32 offset="0x00024" name="CTRL_0"/> + <reg32 offset="0x00024" name="CTRL_0"> + <bitfield name="CLKSL_SHUTDOWNB" pos="7" type="boolean"/> + <bitfield name="DIGTOP_PWRDN_B" pos="6" type="boolean"/> + <bitfield name="PLL_SHUTDOWNB" pos="5" type="boolean"/> + <bitfield name="DLN3_SHUTDOWNB" pos="4" type="boolean"/> + <bitfield name="DLN2_SHUTDOWNB" pos="3" type="boolean"/> + <bitfield name="CLK_SHUTDOWNB" pos="2" type="boolean"/> + <bitfield name="DLN1_SHUTDOWNB" pos="1" type="boolean"/> + <bitfield name="DLN0_SHUTDOWNB" pos="0" type="boolean"/> + </reg32> <reg32 offset="0x00028" name="CTRL_1"/> <reg32 offset="0x0002c" name="CTRL_2"/> <reg32 offset="0x00030" name="CTRL_3"/> diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index a409404627c7..1d603dadfabd 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,7 +11,6 @@ import collections import argparse import time import datetime -import re class Error(Exception): def __init__(self, message): @@ -31,7 +30,7 @@ class Enum(object): def names(self): return [n for (n, value) in self.values] - def dump(self): + def dump(self, is_deprecated): use_hex = False for (name, value) in self.values: if value > 0x1000: @@ -45,7 +44,7 @@ class Enum(object): print("\t%s = %d," % (name, value)) print("};\n") - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): pass class Field(object): @@ -70,11 +69,11 @@ class Field(object): raise parser.error("booleans should be 1 bit fields") elif self.type == "float" and not (high - low == 31 or high - low == 15): raise parser.error("floats should be 16 or 32 bit fields") - elif not self.type in builtin_types and not self.type in parser.enums: + elif self.type not in builtin_types and self.type not in parser.enums: raise parser.error("unknown type '%s'" % self.type) def ctype(self, var_name): - if self.type == None: + if self.type is None: type = "uint32_t" val = var_name elif self.type == "boolean": @@ -124,7 +123,7 @@ def field_name(reg, f): name = f.name.lower() else: # We hit this path when a reg is defined with no bitset fields, ie. - # <reg32 offset="0x88db" name="RB_BLIT_DST_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> + # <reg32 offset="0x88db" name="RB_RESOLVE_SYSTEM_BUFFER_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/> name = reg.name.lower() if (name in [ "double", "float", "int" ]) or not (name[0].isalpha()): @@ -146,10 +145,23 @@ def indices_strides(indices): "%s(i%d)" % (offset, idx) for (idx, (ctype, stride, offset)) in enumerate(indices)]) +def is_number(str): + try: + int(str) + return True + except ValueError: + return False + +def sanitize_variant(variant): + if variant and "-" in variant: + return variant[:variant.index("-")] + return variant + class Bitset(object): def __init__(self, name, template): self.name = name self.inline = False + self.reg = None if template: self.fields = template.fields[:] else: @@ -175,11 +187,7 @@ class Bitset(object): print("#endif\n") print(" return (struct fd_reg_pair) {") - if reg.array: - print(" .reg = REG_%s(__i)," % reg.full_name) - else: - print(" .reg = REG_%s," % reg.full_name) - + print(" .reg = (uint32_t)%s," % reg.reg_offset()) print(" .value =") for f in self.fields: if f.type in [ "address", "waddress" ]: @@ -204,7 +212,7 @@ class Bitset(object): print(" };") - def dump_pack_struct(self, reg=None): + def dump_pack_struct(self, is_deprecated, reg=None): if not reg: return @@ -229,12 +237,15 @@ class Bitset(object): tab_to(" uint32_t", "dword;") print("};\n") + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED" if reg.array: - print("static inline struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % - (prefix, prefix)) + print("static inline%s struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" % + (depcrstr, prefix, prefix)) else: - print("static inline struct fd_reg_pair\npack_%s(struct %s fields)\n{" % - (prefix, prefix)) + print("static inline%s struct fd_reg_pair\npack_%s(struct %s fields)\n{" % + (depcrstr, prefix, prefix)) self.dump_regpair_builder(reg) @@ -253,18 +264,23 @@ class Bitset(object): (prefix, prefix, prefix, skip)) - def dump(self, prefix=None): - if prefix == None: + def dump(self, is_deprecated, prefix=None): + if prefix is None: prefix = self.name + if self.reg and self.reg.bit_size == 64: + print("static inline uint32_t %s_LO(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") + print("static inline uint32_t %s_HI(uint32_t val)\n{" % prefix) + print("\treturn val;\n}") for f in self.fields: if f.name: name = prefix + "_" + f.name else: name = prefix - if not f.name and f.low == 0 and f.shr == 0 and not f.type in ["float", "fixed", "ufixed"]: + if not f.name and f.low == 0 and f.shr == 0 and f.type not in ["float", "fixed", "ufixed"]: pass - elif f.type == "boolean" or (f.type == None and f.low == f.high): + elif f.type == "boolean" or (f.type is None and f.low == f.high): tab_to("#define %s" % name, "0x%08x" % (1 << f.low)) else: tab_to("#define %s__MASK" % name, "0x%08x" % mask(f.low, f.high)) @@ -286,6 +302,7 @@ class Array(object): self.domain = domain self.variant = variant self.parent = parent + self.children = [] if self.parent: self.name = self.parent.name + "_" + self.local_name else: @@ -337,12 +354,15 @@ class Array(object): offset += self.parent.total_offset() return offset - def dump(self): + def dump(self, is_deprecated): + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED" proto = indices_varlist(self.indices()) strides = indices_strides(self.indices()) array_offset = self.total_offset() if self.fixed_offsets: - print("static inline uint32_t __offset_%s(%s idx)" % (self.local_name, self.index_ctype())) + print("static inline%s uint32_t __offset_%s(%s idx)" % (depcrstr, self.local_name, self.index_ctype())) print("{\n\tswitch (idx) {") if self.index_type: for val, offset in zip(self.index_type.names(), self.offsets): @@ -357,7 +377,7 @@ class Array(object): else: tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, proto), "(0x%08x + %s )\n" % (array_offset, strides)) - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): pass def dump_regpair_builder(self): @@ -373,6 +393,7 @@ class Reg(object): self.bit_size = bit_size if array: self.name = array.name + "_" + self.name + array.children.append(self) self.full_name = self.domain + "_" + self.name if "stride" in attrs: self.stride = int(attrs["stride"], 0) @@ -397,25 +418,34 @@ class Reg(object): else: return self.offset - def dump(self): + def reg_offset(self): + if self.array: + offset = self.array.offset + self.offset + return "(0x%08x + 0x%x*__i)" % (offset, self.array.stride) + return "0x%08x" % self.offset + + def dump(self, is_deprecated): + depcrstr = "" + if is_deprecated: + depcrstr = " FD_DEPRECATED " proto = indices_prototype(self.indices()) strides = indices_strides(self.indices()) offset = self.total_offset() if proto == '': tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset) else: - print("static inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (self.full_name, proto, offset, strides)) + print("static inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (depcrstr, self.full_name, proto, offset, strides)) if self.bitset.inline: - self.bitset.dump(self.full_name) + self.bitset.dump(is_deprecated, self.full_name) + print("") - def dump_pack_struct(self): + def dump_pack_struct(self, is_deprecated): if self.bitset.inline: - self.bitset.dump_pack_struct(self) + self.bitset.dump_pack_struct(is_deprecated, self) def dump_regpair_builder(self): - if self.bitset.inline: - self.bitset.dump_regpair_builder(self) + self.bitset.dump_regpair_builder(self) def dump_py(self): print("\tREG_%s = 0x%08x" % (self.full_name, self.offset)) @@ -444,9 +474,6 @@ class Parser(object): self.variants = set() self.file = [] self.xml_files = [] - self.copyright_year = None - self.authors = [] - self.license = None def error(self, message): parser, filename = self.stack[-1] @@ -454,7 +481,7 @@ class Parser(object): def prefix(self, variant=None): if self.current_prefix_type == "variant" and variant: - return variant + return sanitize_variant(variant) elif self.current_stripe: return self.current_stripe + "_" + self.current_domain elif self.current_prefix: @@ -500,15 +527,22 @@ class Parser(object): return varset def parse_variants(self, attrs): - if not "variants" in attrs: + if "variants" not in attrs: return None - variant = attrs["variants"].split(",")[0] - if "-" in variant: - variant = variant[:variant.index("-")] + variant = attrs["variants"].split(",")[0] varset = self.parse_varset(attrs) - assert varset.has_name(variant) + if "-" in variant: + # if we have a range, validate that both the start and end + # of the range are valid enums: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + assert varset.has_name(start) + if end != "": + assert varset.has_name(end) + else: + assert varset.has_name(variant) return variant @@ -572,9 +606,6 @@ class Parser(object): error_str = str(xmlschema.error_log.filter_from_errors()[0]) raise self.error("Schema validation failed for: " + filename + "\n" + error_str) except ImportError as e: - if self.validate: - raise e - print("lxml not found, skipping validation", file=sys.stderr) def do_parse(self, filename): @@ -620,6 +651,7 @@ class Parser(object): self.current_reg = Reg(attrs, self.prefix(variant), self.current_array, bit_size) self.current_reg.bitset = self.current_bitset + self.current_bitset.reg = self.current_reg if len(self.stack) == 1: self.file.append(self.current_reg) @@ -643,7 +675,7 @@ class Parser(object): elif name == "domain": self.current_domain = attrs["name"] if "prefix" in attrs: - self.current_prefix = self.parse_variants(attrs) + self.current_prefix = sanitize_variant(self.parse_variants(attrs)) self.current_prefix_type = attrs["prefix"] else: self.current_prefix = None @@ -651,7 +683,7 @@ class Parser(object): if "varset" in attrs: self.current_varset = self.enums[attrs["varset"]] elif name == "stripe": - self.current_stripe = self.parse_variants(attrs) + self.current_stripe = sanitize_variant(self.parse_variants(attrs)) elif name == "enum": self.current_enum_value = 0 self.current_enum = Enum(attrs["name"]) @@ -686,10 +718,6 @@ class Parser(object): self.parse_field(attrs["name"], attrs) elif name == "database": self.do_validate(attrs["xsi:schemaLocation"]) - elif name == "copyright": - self.copyright_year = attrs["year"] - elif name == "author": - self.authors.append(attrs["name"] + " <" + attrs["email"] + "> " + attrs["name"]) def end_element(self, name): if name == "domain": @@ -703,11 +731,16 @@ class Parser(object): elif name == "reg32": self.current_reg = None elif name == "array": + # if the array has no Reg children, push an implicit reg32: + if len(self.current_array.children) == 0: + attrs = { + "name": "REG", + "offset": "0", + } + self.parse_reg(attrs, 32) self.current_array = self.current_array.parent elif name == "enum": self.current_enum = None - elif name == "license": - self.license = self.cdata def character_data(self, data): self.cdata += data @@ -720,10 +753,10 @@ class Parser(object): if variants: for variant, vreg in variants.items(): if reg == vreg: - d[(usage, variant)].append(reg) + d[(usage, sanitize_variant(variant))].append(reg) else: for variant in self.variants: - d[(usage, variant)].append(reg) + d[(usage, sanitize_variant(variant))].append(reg) print("#ifdef __cplusplus") @@ -753,6 +786,9 @@ class Parser(object): print("#endif") + def has_variants(self, reg): + return reg.name in self.variant_regs and not is_number(reg.name) and not is_number(reg.name[1:]) + def dump(self): enums = [] bitsets = [] @@ -766,7 +802,7 @@ class Parser(object): regs.append(e) for e in enums + bitsets + regs: - e.dump() + e.dump(self.has_variants(e)) self.dump_reg_usages() @@ -782,8 +818,7 @@ class Parser(object): def dump_reg_variants(self, regname, variants): - # Don't bother for things that only have a single variant: - if len(variants) == 1: + if is_number(regname) or is_number(regname[1:]): return print("#ifdef __cplusplus") print("struct __%s {" % regname) @@ -834,11 +869,20 @@ class Parser(object): xtravar = "__i, " print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname)) for variant in variants.keys(): - print(" if (%s == %s) {" % (varenum.upper(), variant)) + if "-" in variant: + start = variant[:variant.index("-")] + end = variant[variant.index("-") + 1:] + if end != "": + print(" if ((%s >= %s) && (%s <= %s)) {" % (varenum.upper(), start, varenum.upper(), end)) + else: + print(" if (%s >= %s) {" % (varenum.upper(), start)) + else: + print(" if (%s == %s) {" % (varenum.upper(), variant)) reg = variants[variant] reg.dump_regpair_builder() print(" } else") print(" assert(!\"invalid variant\");") + print(" return (struct fd_reg_pair){};") print("}") if bit_size == 64: @@ -851,7 +895,7 @@ class Parser(object): def dump_structs(self): for e in self.file: - e.dump_pack_struct() + e.dump_pack_struct(self.has_variants(e)) for regname in self.variant_regs: self.dump_reg_variants(regname, self.variant_regs[regname]) @@ -868,33 +912,7 @@ def dump_c(args, guard, func): print("#ifndef %s\n#define %s\n" % (guard, guard)) - print("""/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng gen_header.py tool in this git repository: -http://gitlab.freedesktop.org/mesa/mesa/ -git clone https://gitlab.freedesktop.org/mesa/mesa.git - -The rules-ng-ng source files this header was generated from are: -""") - maxlen = 0 - for filepath in p.xml_files: - new_filepath = re.sub("^.+drivers","drivers",filepath) - maxlen = max(maxlen, len(new_filepath)) - for filepath in p.xml_files: - pad = " " * (maxlen - len(new_filepath)) - filesize = str(os.path.getsize(filepath)) - filesize = " " * (7 - len(filesize)) + filesize - filetime = time.ctime(os.path.getmtime(filepath)) - print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)") - if p.copyright_year: - current_year = str(datetime.date.today().year) - print() - print("Copyright (C) %s-%s by the following authors:" % (p.copyright_year, current_year)) - for author in p.authors: - print("- " + author) - if p.license: - print(p.license) - print("*/") + print("/* Autogenerated file, DO NOT EDIT manually! */") print() print("#ifdef __KERNEL__") @@ -912,9 +930,20 @@ The rules-ng-ng source files this header was generated from are: print("#endif") print() + print("#ifndef FD_NO_DEPRECATED_PACK") + print("#define FD_DEPRECATED __attribute__((deprecated))") + print("#else") + print("#define FD_DEPRECATED") + print("#endif") + print() + func(p) - print("\n#endif /* %s */" % guard) + print() + print("#undef FD_DEPRECATED") + print() + + print("#endif /* %s */" % guard) def dump_c_defines(args): @@ -931,7 +960,7 @@ def dump_py_defines(args): p = Parser() try: - p.parse(args.rnn, args.xml) + p.parse(args.rnn, args.xml, args.validate) except Error as e: print(e, file=sys.stderr) exit(1) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index b96f0555ca14..f26562eafffc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -929,7 +929,7 @@ done: nvif_vmm_put(vmm, &old_mem->vma[1]); nvif_vmm_put(vmm, &old_mem->vma[0]); } - return 0; + return ret; } static int diff --git a/drivers/gpu/drm/nova/driver.rs b/drivers/gpu/drm/nova/driver.rs index b28b2e05cc15..91b7380f83ab 100644 --- a/drivers/gpu/drm/nova/driver.rs +++ b/drivers/gpu/drm/nova/driver.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, types::ARef}; +use kernel::{ + auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, sync::aref::ARef, +}; use crate::file::File; use crate::gem::NovaObject; diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs index 33b62d21400c..2760ba4f3450 100644 --- a/drivers/gpu/drm/nova/gem.rs +++ b/drivers/gpu/drm/nova/gem.rs @@ -4,7 +4,7 @@ use kernel::{ drm, drm::{gem, gem::BaseObject}, prelude::*, - types::ARef, + sync::aref::ARef, }; use crate::{ @@ -16,16 +16,14 @@ use crate::{ #[pin_data] pub(crate) struct NovaObject {} -impl gem::BaseDriverObject<gem::Object<NovaObject>> for NovaObject { +impl gem::DriverObject for NovaObject { + type Driver = NovaDriver; + fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit<Self, Error> { try_pin_init!(NovaObject {}) } } -impl gem::DriverObject for NovaObject { - type Driver = NovaDriver; -} - impl NovaObject { /// Create a new DRM GEM object. pub(crate) fn new(dev: &NovaDevice, size: usize) -> Result<ARef<gem::Object<Self>>> { diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 0cc9055f4ee5..f5e01cb16cfc 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -898,8 +898,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * if (IS_ERR_OR_NULL(queue)) return; - if (queue->entity.fence_context) - drm_sched_entity_destroy(&queue->entity); + drm_sched_entity_destroy(&queue->entity); if (queue->scheduler.ops) drm_sched_fini(&queue->scheduler); @@ -3609,11 +3608,6 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) if (!group) return -EINVAL; - for (u32 i = 0; i < group->queue_count; i++) { - if (group->queues[i]) - drm_sched_entity_destroy(&group->queues[i]->entity); - } - mutex_lock(&sched->reset.lock); mutex_lock(&sched->lock); group->destroyed = true; diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 3398160ad75e..5523911b990d 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -7,6 +7,7 @@ */ #include <linux/clk.h> +#include <linux/hw_bitfield.h> #include <linux/iopoll.h> #include <linux/math64.h> #include <linux/mfd/syscon.h> @@ -148,7 +149,7 @@ #define DW_MIPI_NEEDS_GRF_CLK BIT(1) #define PX30_GRF_PD_VO_CON1 0x0438 -#define PX30_DSI_FORCETXSTOPMODE (0xf << 7) +#define PX30_DSI_FORCETXSTOPMODE (0xfUL << 7) #define PX30_DSI_FORCERXMODE BIT(6) #define PX30_DSI_TURNDISABLE BIT(5) #define PX30_DSI_LCDC_SEL BIT(0) @@ -167,16 +168,16 @@ #define RK3399_DSI1_LCDC_SEL BIT(4) #define RK3399_GRF_SOC_CON22 0x6258 -#define RK3399_DSI0_TURNREQUEST (0xf << 12) -#define RK3399_DSI0_TURNDISABLE (0xf << 8) -#define RK3399_DSI0_FORCETXSTOPMODE (0xf << 4) -#define RK3399_DSI0_FORCERXMODE (0xf << 0) +#define RK3399_DSI0_TURNREQUEST (0xfUL << 12) +#define RK3399_DSI0_TURNDISABLE (0xfUL << 8) +#define RK3399_DSI0_FORCETXSTOPMODE (0xfUL << 4) +#define RK3399_DSI0_FORCERXMODE (0xfUL << 0) #define RK3399_GRF_SOC_CON23 0x625c -#define RK3399_DSI1_TURNDISABLE (0xf << 12) -#define RK3399_DSI1_FORCETXSTOPMODE (0xf << 8) -#define RK3399_DSI1_FORCERXMODE (0xf << 4) -#define RK3399_DSI1_ENABLE (0xf << 0) +#define RK3399_DSI1_TURNDISABLE (0xfUL << 12) +#define RK3399_DSI1_FORCETXSTOPMODE (0xfUL << 8) +#define RK3399_DSI1_FORCERXMODE (0xfUL << 4) +#define RK3399_DSI1_ENABLE (0xfUL << 0) #define RK3399_GRF_SOC_CON24 0x6260 #define RK3399_TXRX_MASTERSLAVEZ BIT(7) @@ -186,8 +187,8 @@ #define RK3399_TXRX_TURNREQUEST GENMASK(3, 0) #define RK3568_GRF_VO_CON2 0x0368 -#define RK3568_DSI0_SKEWCALHS (0x1f << 11) -#define RK3568_DSI0_FORCETXSTOPMODE (0xf << 4) +#define RK3568_DSI0_SKEWCALHS (0x1fUL << 11) +#define RK3568_DSI0_FORCETXSTOPMODE (0xfUL << 4) #define RK3568_DSI0_TURNDISABLE BIT(2) #define RK3568_DSI0_FORCERXMODE BIT(0) @@ -197,18 +198,16 @@ * come from. Name GRF_VO_CON3 is assumed. */ #define RK3568_GRF_VO_CON3 0x36c -#define RK3568_DSI1_SKEWCALHS (0x1f << 11) -#define RK3568_DSI1_FORCETXSTOPMODE (0xf << 4) +#define RK3568_DSI1_SKEWCALHS (0x1fUL << 11) +#define RK3568_DSI1_FORCETXSTOPMODE (0xfUL << 4) #define RK3568_DSI1_TURNDISABLE BIT(2) #define RK3568_DSI1_FORCERXMODE BIT(0) #define RV1126_GRF_DSIPHY_CON 0x10220 -#define RV1126_DSI_FORCETXSTOPMODE (0xf << 4) +#define RV1126_DSI_FORCETXSTOPMODE (0xfUL << 4) #define RV1126_DSI_TURNDISABLE BIT(2) #define RV1126_DSI_FORCERXMODE BIT(0) -#define HIWORD_UPDATE(val, mask) (val | (mask) << 16) - enum { DW_DSI_USAGE_IDLE, DW_DSI_USAGE_DSI, @@ -1484,14 +1483,13 @@ static const struct rockchip_dw_dsi_chip_data px30_chip_data[] = { { .reg = 0xff450000, .lcdsel_grf_reg = PX30_GRF_PD_VO_CON1, - .lcdsel_big = HIWORD_UPDATE(0, PX30_DSI_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(PX30_DSI_LCDC_SEL, - PX30_DSI_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(PX30_DSI_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(PX30_DSI_LCDC_SEL, 1), .lanecfg1_grf_reg = PX30_GRF_PD_VO_CON1, - .lanecfg1 = HIWORD_UPDATE(0, PX30_DSI_TURNDISABLE | - PX30_DSI_FORCERXMODE | - PX30_DSI_FORCETXSTOPMODE), + .lanecfg1 = FIELD_PREP_WM16_CONST((PX30_DSI_TURNDISABLE | + PX30_DSI_FORCERXMODE | + PX30_DSI_FORCETXSTOPMODE), 0), .max_data_lanes = 4, }, @@ -1502,9 +1500,9 @@ static const struct rockchip_dw_dsi_chip_data rk3128_chip_data[] = { { .reg = 0x10110000, .lanecfg1_grf_reg = RK3128_GRF_LVDS_CON0, - .lanecfg1 = HIWORD_UPDATE(0, RK3128_DSI_TURNDISABLE | - RK3128_DSI_FORCERXMODE | - RK3128_DSI_FORCETXSTOPMODE), + .lanecfg1 = FIELD_PREP_WM16_CONST((RK3128_DSI_TURNDISABLE | + RK3128_DSI_FORCERXMODE | + RK3128_DSI_FORCETXSTOPMODE), 0), .max_data_lanes = 4, }, { /* sentinel */ } @@ -1514,16 +1512,16 @@ static const struct rockchip_dw_dsi_chip_data rk3288_chip_data[] = { { .reg = 0xff960000, .lcdsel_grf_reg = RK3288_GRF_SOC_CON6, - .lcdsel_big = HIWORD_UPDATE(0, RK3288_DSI0_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3288_DSI0_LCDC_SEL, RK3288_DSI0_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_DSI0_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_DSI0_LCDC_SEL, 1), .max_data_lanes = 4, }, { .reg = 0xff964000, .lcdsel_grf_reg = RK3288_GRF_SOC_CON6, - .lcdsel_big = HIWORD_UPDATE(0, RK3288_DSI1_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3288_DSI1_LCDC_SEL, RK3288_DSI1_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_DSI1_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_DSI1_LCDC_SEL, 1), .max_data_lanes = 4, }, @@ -1539,13 +1537,13 @@ static int rk3399_dphy_tx1rx1_init(struct phy *phy) * Assume ISP0 is supplied by the RX0 dphy. */ regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_SRC_SEL_ISP0)); + FIELD_PREP_WM16(RK3399_TXRX_SRC_SEL_ISP0, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ)); + FIELD_PREP_WM16(RK3399_TXRX_MASTERSLAVEZ, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_BASEDIR)); + FIELD_PREP_WM16(RK3399_TXRX_BASEDIR, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_ENABLE)); + FIELD_PREP_WM16(RK3399_DSI1_ENABLE, 0)); return 0; } @@ -1559,21 +1557,20 @@ static int rk3399_dphy_tx1rx1_power_on(struct phy *phy) usleep_range(100, 150); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ)); + FIELD_PREP_WM16(RK3399_TXRX_MASTERSLAVEZ, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(RK3399_TXRX_BASEDIR, RK3399_TXRX_BASEDIR)); + FIELD_PREP_WM16(RK3399_TXRX_BASEDIR, 1)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_FORCERXMODE)); + FIELD_PREP_WM16(RK3399_DSI1_FORCERXMODE, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_FORCETXSTOPMODE)); + FIELD_PREP_WM16(RK3399_DSI1_FORCETXSTOPMODE, 0)); /* Disable lane turn around, which is ignored in receive mode */ regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_TURNREQUEST)); + FIELD_PREP_WM16(RK3399_TXRX_TURNREQUEST, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(RK3399_DSI1_TURNDISABLE, - RK3399_DSI1_TURNDISABLE)); + FIELD_PREP_WM16(RK3399_DSI1_TURNDISABLE, 0xf)); usleep_range(100, 150); dsi_write(dsi, DSI_PHY_TST_CTRL0, PHY_TESTCLK | PHY_UNTESTCLR); @@ -1581,8 +1578,8 @@ static int rk3399_dphy_tx1rx1_power_on(struct phy *phy) /* Enable dphy lanes */ regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(GENMASK(dsi->dphy_config.lanes - 1, 0), - RK3399_DSI1_ENABLE)); + FIELD_PREP_WM16(RK3399_DSI1_ENABLE, + GENMASK(dsi->dphy_config.lanes - 1, 0))); usleep_range(100, 150); @@ -1594,7 +1591,7 @@ static int rk3399_dphy_tx1rx1_power_off(struct phy *phy) struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_ENABLE)); + FIELD_PREP_WM16(RK3399_DSI1_ENABLE, 0)); return 0; } @@ -1603,15 +1600,14 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { { .reg = 0xff960000, .lcdsel_grf_reg = RK3399_GRF_SOC_CON20, - .lcdsel_big = HIWORD_UPDATE(0, RK3399_DSI0_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3399_DSI0_LCDC_SEL, - RK3399_DSI0_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_DSI0_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_DSI0_LCDC_SEL, 1), .lanecfg1_grf_reg = RK3399_GRF_SOC_CON22, - .lanecfg1 = HIWORD_UPDATE(0, RK3399_DSI0_TURNREQUEST | - RK3399_DSI0_TURNDISABLE | - RK3399_DSI0_FORCETXSTOPMODE | - RK3399_DSI0_FORCERXMODE), + .lanecfg1 = FIELD_PREP_WM16_CONST((RK3399_DSI0_TURNREQUEST | + RK3399_DSI0_TURNDISABLE | + RK3399_DSI0_FORCETXSTOPMODE | + RK3399_DSI0_FORCERXMODE), 0), .flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK, .max_data_lanes = 4, @@ -1619,25 +1615,23 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { { .reg = 0xff968000, .lcdsel_grf_reg = RK3399_GRF_SOC_CON20, - .lcdsel_big = HIWORD_UPDATE(0, RK3399_DSI1_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3399_DSI1_LCDC_SEL, - RK3399_DSI1_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_DSI1_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_DSI1_LCDC_SEL, 1), + .lanecfg1_grf_reg = RK3399_GRF_SOC_CON23, - .lanecfg1 = HIWORD_UPDATE(0, RK3399_DSI1_TURNDISABLE | - RK3399_DSI1_FORCETXSTOPMODE | - RK3399_DSI1_FORCERXMODE | - RK3399_DSI1_ENABLE), + .lanecfg1 = FIELD_PREP_WM16_CONST((RK3399_DSI1_TURNDISABLE | + RK3399_DSI1_FORCETXSTOPMODE | + RK3399_DSI1_FORCERXMODE | + RK3399_DSI1_ENABLE), 0), .lanecfg2_grf_reg = RK3399_GRF_SOC_CON24, - .lanecfg2 = HIWORD_UPDATE(RK3399_TXRX_MASTERSLAVEZ | - RK3399_TXRX_ENABLECLK, - RK3399_TXRX_MASTERSLAVEZ | - RK3399_TXRX_ENABLECLK | - RK3399_TXRX_BASEDIR), + .lanecfg2 = (FIELD_PREP_WM16_CONST(RK3399_TXRX_MASTERSLAVEZ, 1) | + FIELD_PREP_WM16_CONST(RK3399_TXRX_ENABLECLK, 1) | + FIELD_PREP_WM16_CONST(RK3399_TXRX_BASEDIR, 0)), .enable_grf_reg = RK3399_GRF_SOC_CON23, - .enable = HIWORD_UPDATE(RK3399_DSI1_ENABLE, RK3399_DSI1_ENABLE), + .enable = FIELD_PREP_WM16_CONST(RK3399_DSI1_ENABLE, RK3399_DSI1_ENABLE), .flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK, .max_data_lanes = 4, @@ -1653,19 +1647,19 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { { .reg = 0xfe060000, .lanecfg1_grf_reg = RK3568_GRF_VO_CON2, - .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS | - RK3568_DSI0_FORCETXSTOPMODE | - RK3568_DSI0_TURNDISABLE | - RK3568_DSI0_FORCERXMODE), + .lanecfg1 = (FIELD_PREP_WM16_CONST(RK3568_DSI0_SKEWCALHS, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI0_FORCETXSTOPMODE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI0_TURNDISABLE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI0_FORCERXMODE, 0)), .max_data_lanes = 4, }, { .reg = 0xfe070000, .lanecfg1_grf_reg = RK3568_GRF_VO_CON3, - .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS | - RK3568_DSI1_FORCETXSTOPMODE | - RK3568_DSI1_TURNDISABLE | - RK3568_DSI1_FORCERXMODE), + .lanecfg1 = (FIELD_PREP_WM16_CONST(RK3568_DSI1_SKEWCALHS, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI1_FORCETXSTOPMODE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI1_TURNDISABLE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI1_FORCERXMODE, 0)), .max_data_lanes = 4, }, { /* sentinel */ } @@ -1675,9 +1669,9 @@ static const struct rockchip_dw_dsi_chip_data rv1126_chip_data[] = { { .reg = 0xffb30000, .lanecfg1_grf_reg = RV1126_GRF_DSIPHY_CON, - .lanecfg1 = HIWORD_UPDATE(0, RV1126_DSI_TURNDISABLE | - RV1126_DSI_FORCERXMODE | - RV1126_DSI_FORCETXSTOPMODE), + .lanecfg1 = (FIELD_PREP_WM16_CONST(RV1126_DSI_TURNDISABLE, 0) | + FIELD_PREP_WM16_CONST(RV1126_DSI_FORCERXMODE, 0) | + FIELD_PREP_WM16_CONST(RV1126_DSI_FORCETXSTOPMODE, 0)), .max_data_lanes = 4, }, { /* sentinel */ } diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index acb59b25d928..7b613997bb50 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -4,6 +4,7 @@ */ #include <linux/clk.h> +#include <linux/hw_bitfield.h> #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -54,8 +55,6 @@ #define RK3568_HDMI_SDAIN_MSK BIT(15) #define RK3568_HDMI_SCLIN_MSK BIT(14) -#define HIWORD_UPDATE(val, mask) (val | (mask) << 16) - /** * struct rockchip_hdmi_chip_data - splite the grf setting of kind of chips * @lcdsel_grf_reg: grf register offset of lcdc select @@ -355,17 +354,14 @@ static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) dw_hdmi_phy_setup_hpd(dw_hdmi, data); - regmap_write(hdmi->regmap, - RK3228_GRF_SOC_CON6, - HIWORD_UPDATE(RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL | - RK3228_HDMI_SCL_VSEL, - RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL | - RK3228_HDMI_SCL_VSEL)); - - regmap_write(hdmi->regmap, - RK3228_GRF_SOC_CON2, - HIWORD_UPDATE(RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK, - RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK)); + regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON6, + FIELD_PREP_WM16(RK3228_HDMI_HPD_VSEL, 1) | + FIELD_PREP_WM16(RK3228_HDMI_SDA_VSEL, 1) | + FIELD_PREP_WM16(RK3228_HDMI_SCL_VSEL, 1)); + + regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON2, + FIELD_PREP_WM16(RK3228_HDMI_SDAIN_MSK, 1) | + FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1)); } static enum drm_connector_status @@ -377,15 +373,13 @@ dw_hdmi_rk3328_read_hpd(struct dw_hdmi *dw_hdmi, void *data) status = dw_hdmi_phy_read_hpd(dw_hdmi, data); if (status == connector_status_connected) - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON4, - HIWORD_UPDATE(RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V, - RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4, + FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 1) | + FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 1)); else - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON4, - HIWORD_UPDATE(0, RK3328_HDMI_SDA_5V | - RK3328_HDMI_SCL_5V)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4, + FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 0)); return status; } @@ -396,21 +390,21 @@ static void dw_hdmi_rk3328_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) dw_hdmi_phy_setup_hpd(dw_hdmi, data); /* Enable and map pins to 3V grf-controlled io-voltage */ - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON4, - HIWORD_UPDATE(0, RK3328_HDMI_HPD_SARADC | RK3328_HDMI_CEC_5V | - RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V | - RK3328_HDMI_HPD_5V)); - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON3, - HIWORD_UPDATE(0, RK3328_HDMI_SDA5V_GRF | RK3328_HDMI_SCL5V_GRF | - RK3328_HDMI_HPD5V_GRF | - RK3328_HDMI_CEC5V_GRF)); - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON2, - HIWORD_UPDATE(RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK, - RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK | - RK3328_HDMI_HPD_IOE)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4, + FIELD_PREP_WM16(RK3328_HDMI_HPD_SARADC, 0) | + FIELD_PREP_WM16(RK3328_HDMI_CEC_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_HPD_5V, 0)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON3, + FIELD_PREP_WM16(RK3328_HDMI_SDA5V_GRF, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SCL5V_GRF, 0) | + FIELD_PREP_WM16(RK3328_HDMI_HPD5V_GRF, 0) | + FIELD_PREP_WM16(RK3328_HDMI_CEC5V_GRF, 0)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON2, + FIELD_PREP_WM16(RK3328_HDMI_SDAIN_MSK, 1) | + FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1) | + FIELD_PREP_WM16(RK3328_HDMI_HPD_IOE, 0)); dw_hdmi_rk3328_read_hpd(dw_hdmi, data); } @@ -438,8 +432,8 @@ static const struct dw_hdmi_plat_data rk3228_hdmi_drv_data = { static struct rockchip_hdmi_chip_data rk3288_chip_data = { .lcdsel_grf_reg = RK3288_GRF_SOC_CON6, - .lcdsel_big = HIWORD_UPDATE(0, RK3288_HDMI_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3288_HDMI_LCDC_SEL, RK3288_HDMI_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_HDMI_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_HDMI_LCDC_SEL, 1), .max_tmds_clock = 340000, }; @@ -475,8 +469,8 @@ static const struct dw_hdmi_plat_data rk3328_hdmi_drv_data = { static struct rockchip_hdmi_chip_data rk3399_chip_data = { .lcdsel_grf_reg = RK3399_GRF_SOC_CON20, - .lcdsel_big = HIWORD_UPDATE(0, RK3399_HDMI_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3399_HDMI_LCDC_SEL, RK3399_HDMI_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_HDMI_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_HDMI_LCDC_SEL, 1), .max_tmds_clock = 594000, }; @@ -589,10 +583,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, if (hdmi->chip_data == &rk3568_chip_data) { regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1, - HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK | - RK3568_HDMI_SCLIN_MSK, - RK3568_HDMI_SDAIN_MSK | - RK3568_HDMI_SCLIN_MSK)); + FIELD_PREP_WM16(RK3568_HDMI_SDAIN_MSK, 1) | + FIELD_PREP_WM16(RK3568_HDMI_SCLIN_MSK, 1)); } drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs); diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c index 7d531b6f4c09..ed6e8f036f4b 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c @@ -9,6 +9,7 @@ #include <linux/clk.h> #include <linux/gpio/consumer.h> +#include <linux/hw_bitfield.h> #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -66,7 +67,8 @@ #define RK3588_HDMI1_HPD_INT_MSK BIT(15) #define RK3588_HDMI1_HPD_INT_CLR BIT(14) #define RK3588_GRF_SOC_CON7 0x031c -#define RK3588_SET_HPD_PATH_MASK GENMASK(13, 12) +#define RK3588_HPD_HDMI0_IO_EN_MASK BIT(12) +#define RK3588_HPD_HDMI1_IO_EN_MASK BIT(13) #define RK3588_GRF_SOC_STATUS1 0x0384 #define RK3588_HDMI0_LEVEL_INT BIT(16) #define RK3588_HDMI1_LEVEL_INT BIT(24) @@ -80,7 +82,6 @@ #define RK3588_HDMI0_GRANT_SEL BIT(10) #define RK3588_HDMI1_GRANT_SEL BIT(12) -#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) #define HOTPLUG_DEBOUNCE_MS 150 #define MAX_HDMI_PORT_NUM 2 @@ -185,11 +186,11 @@ static void dw_hdmi_qp_rk3588_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data) u32 val; if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR, - RK3588_HDMI1_HPD_INT_CLR | RK3588_HDMI1_HPD_INT_MSK); + val = (FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_CLR, 1) | + FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 0)); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, - RK3588_HDMI0_HPD_INT_CLR | RK3588_HDMI0_HPD_INT_MSK); + val = (FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_CLR, 1) | + FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 0)); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); } @@ -218,8 +219,8 @@ static void dw_hdmi_qp_rk3576_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data) struct rockchip_hdmi_qp *hdmi = (struct rockchip_hdmi_qp *)data; u32 val; - val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_CLR, - RK3576_HDMI_HPD_INT_CLR | RK3576_HDMI_HPD_INT_MSK); + val = (FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1) | + FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0)); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); regmap_write(hdmi->regmap, 0xa404, 0xffff0102); @@ -254,7 +255,7 @@ static irqreturn_t dw_hdmi_qp_rk3576_hardirq(int irq, void *dev_id) regmap_read(hdmi->regmap, RK3576_IOC_HDMI_HPD_STATUS, &intr_stat); if (intr_stat) { - val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_MSK, RK3576_HDMI_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 1); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); return IRQ_WAKE_THREAD; @@ -273,12 +274,12 @@ static irqreturn_t dw_hdmi_qp_rk3576_irq(int irq, void *dev_id) if (!intr_stat) return IRQ_NONE; - val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_CLR, RK3576_HDMI_HPD_INT_CLR); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); mod_delayed_work(system_wq, &hdmi->hpd_work, msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS)); - val = HIWORD_UPDATE(0, RK3576_HDMI_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); return IRQ_HANDLED; @@ -293,11 +294,9 @@ static irqreturn_t dw_hdmi_qp_rk3588_hardirq(int irq, void *dev_id) if (intr_stat) { if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, - RK3588_HDMI1_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, - RK3588_HDMI0_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); return IRQ_WAKE_THREAD; } @@ -315,20 +314,18 @@ static irqreturn_t dw_hdmi_qp_rk3588_irq(int irq, void *dev_id) return IRQ_NONE; if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR, - RK3588_HDMI1_HPD_INT_CLR); + val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_CLR, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, - RK3588_HDMI0_HPD_INT_CLR); + val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_CLR, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); mod_delayed_work(system_wq, &hdmi->hpd_work, msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS)); if (hdmi->port_id) - val |= HIWORD_UPDATE(0, RK3588_HDMI1_HPD_INT_MSK); + val |= FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 0); else - val |= HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK); + val |= FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 0); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); return IRQ_HANDLED; @@ -338,14 +335,14 @@ static void dw_hdmi_qp_rk3576_io_init(struct rockchip_hdmi_qp *hdmi) { u32 val; - val = HIWORD_UPDATE(RK3576_SCLIN_MASK, RK3576_SCLIN_MASK) | - HIWORD_UPDATE(RK3576_SDAIN_MASK, RK3576_SDAIN_MASK) | - HIWORD_UPDATE(RK3576_HDMI_GRANT_SEL, RK3576_HDMI_GRANT_SEL) | - HIWORD_UPDATE(RK3576_I2S_SEL_MASK, RK3576_I2S_SEL_MASK); + val = FIELD_PREP_WM16(RK3576_SCLIN_MASK, 1) | + FIELD_PREP_WM16(RK3576_SDAIN_MASK, 1) | + FIELD_PREP_WM16(RK3576_HDMI_GRANT_SEL, 1) | + FIELD_PREP_WM16(RK3576_I2S_SEL_MASK, 1); regmap_write(hdmi->vo_regmap, RK3576_VO0_GRF_SOC_CON14, val); - val = HIWORD_UPDATE(0, RK3576_HDMI_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); } @@ -353,27 +350,28 @@ static void dw_hdmi_qp_rk3588_io_init(struct rockchip_hdmi_qp *hdmi) { u32 val; - val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) | - HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) | - HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) | - HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK); + val = FIELD_PREP_WM16(RK3588_SCLIN_MASK, 1) | + FIELD_PREP_WM16(RK3588_SDAIN_MASK, 1) | + FIELD_PREP_WM16(RK3588_MODE_MASK, 1) | + FIELD_PREP_WM16(RK3588_I2S_SEL_MASK, 1); regmap_write(hdmi->vo_regmap, hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3, val); - val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, RK3588_SET_HPD_PATH_MASK); + val = FIELD_PREP_WM16(RK3588_HPD_HDMI0_IO_EN_MASK, 1) | + FIELD_PREP_WM16(RK3588_HPD_HDMI1_IO_EN_MASK, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val); if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, RK3588_HDMI1_GRANT_SEL); + val = FIELD_PREP_WM16(RK3588_HDMI1_GRANT_SEL, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, RK3588_HDMI0_GRANT_SEL); + val = FIELD_PREP_WM16(RK3588_HDMI0_GRANT_SEL, 1); regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val); if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, RK3588_HDMI1_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, RK3588_HDMI0_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); } diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 1ab3ad4bde9e..f24827dc1421 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -10,6 +10,7 @@ #include <linux/delay.h> #include <linux/err.h> #include <linux/hdmi.h> +#include <linux/hw_bitfield.h> #include <linux/mfd/syscon.h> #include <linux/mod_devicetable.h> #include <linux/module.h> @@ -382,8 +383,6 @@ enum { #define HDMI_CEC_BUSFREETIME_H 0xdd #define HDMI_CEC_LOGICADDR 0xde -#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) - #define RK3036_GRF_SOC_CON2 0x148 #define RK3036_HDMI_PHSYNC BIT(4) #define RK3036_HDMI_PVSYNC BIT(5) @@ -756,10 +755,10 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, int value, psync; if (hdmi->variant->dev_type == RK3036_HDMI) { - psync = mode->flags & DRM_MODE_FLAG_PHSYNC ? RK3036_HDMI_PHSYNC : 0; - value = HIWORD_UPDATE(psync, RK3036_HDMI_PHSYNC); - psync = mode->flags & DRM_MODE_FLAG_PVSYNC ? RK3036_HDMI_PVSYNC : 0; - value |= HIWORD_UPDATE(psync, RK3036_HDMI_PVSYNC); + psync = mode->flags & DRM_MODE_FLAG_PHSYNC ? 1 : 0; + value = FIELD_PREP_WM16(RK3036_HDMI_PHSYNC, psync); + psync = mode->flags & DRM_MODE_FLAG_PVSYNC ? 1 : 0; + value |= FIELD_PREP_WM16(RK3036_HDMI_PVSYNC, psync); regmap_write(hdmi->grf, RK3036_GRF_SOC_CON2, value); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h index fa5c56f16047..9124191899ba 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h @@ -33,7 +33,6 @@ #define WIN_FEATURE_AFBDC BIT(0) #define WIN_FEATURE_CLUSTER BIT(1) -#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l))) /* * the delay number of a window in different mode. */ diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.h b/drivers/gpu/drm/rockchip/rockchip_lvds.h index ca83d7b6bea7..2d92447d819b 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.h +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.h @@ -9,6 +9,9 @@ #ifndef _ROCKCHIP_LVDS_ #define _ROCKCHIP_LVDS_ +#include <linux/bits.h> +#include <linux/hw_bitfield.h> + #define RK3288_LVDS_CH0_REG0 0x00 #define RK3288_LVDS_CH0_REG0_LVDS_EN BIT(7) #define RK3288_LVDS_CH0_REG0_TTL_EN BIT(6) @@ -106,18 +109,16 @@ #define LVDS_VESA_18 2 #define LVDS_JEIDA_18 3 -#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l))) - #define PX30_LVDS_GRF_PD_VO_CON0 0x434 -#define PX30_LVDS_TIE_CLKS(val) HIWORD_UPDATE(val, 8, 8) -#define PX30_LVDS_INVERT_CLKS(val) HIWORD_UPDATE(val, 9, 9) -#define PX30_LVDS_INVERT_DCLK(val) HIWORD_UPDATE(val, 5, 5) +#define PX30_LVDS_TIE_CLKS(val) FIELD_PREP_WM16(BIT(8), (val)) +#define PX30_LVDS_INVERT_CLKS(val) FIELD_PREP_WM16(BIT(9), (val)) +#define PX30_LVDS_INVERT_DCLK(val) FIELD_PREP_WM16(BIT(5), (val)) #define PX30_LVDS_GRF_PD_VO_CON1 0x438 -#define PX30_LVDS_FORMAT(val) HIWORD_UPDATE(val, 14, 13) -#define PX30_LVDS_MODE_EN(val) HIWORD_UPDATE(val, 12, 12) -#define PX30_LVDS_MSBSEL(val) HIWORD_UPDATE(val, 11, 11) -#define PX30_LVDS_P2S_EN(val) HIWORD_UPDATE(val, 6, 6) -#define PX30_LVDS_VOP_SEL(val) HIWORD_UPDATE(val, 1, 1) +#define PX30_LVDS_FORMAT(val) FIELD_PREP_WM16(GENMASK(14, 13), (val)) +#define PX30_LVDS_MODE_EN(val) FIELD_PREP_WM16(BIT(12), (val)) +#define PX30_LVDS_MSBSEL(val) FIELD_PREP_WM16(BIT(11), (val)) +#define PX30_LVDS_P2S_EN(val) FIELD_PREP_WM16(BIT(6), (val)) +#define PX30_LVDS_VOP_SEL(val) FIELD_PREP_WM16(BIT(1), (val)) #endif /* _ROCKCHIP_LVDS_ */ diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 45c5e3987813..38c49030c7ab 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -7,6 +7,7 @@ #include <linux/bitfield.h> #include <linux/kernel.h> #include <linux/component.h> +#include <linux/hw_bitfield.h> #include <linux/mod_devicetable.h> #include <linux/platform_device.h> #include <linux/of.h> @@ -1695,8 +1696,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); val = rk3588_get_hdmi_pol(polflags); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1)); - regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(1), 1)); + regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, + FIELD_PREP_WM16(GENMASK(6, 5), val)); break; case ROCKCHIP_VOP2_EP_HDMI1: div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; @@ -1707,8 +1709,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); val = rk3588_get_hdmi_pol(polflags); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4)); - regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(4), 1)); + regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, + FIELD_PREP_WM16(GENMASK(8, 7), val)); break; case ROCKCHIP_VOP2_EP_EDP0: div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV; @@ -1718,7 +1721,7 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX; die |= RK3588_SYS_DSP_INFACE_EN_EDP0 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(0), 1)); break; case ROCKCHIP_VOP2_EP_EDP1: div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; @@ -1728,7 +1731,7 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX; die |= RK3588_SYS_DSP_INFACE_EN_EDP1 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(3), 1)); break; case ROCKCHIP_VOP2_EP_MIPI0: div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV; diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 94a5bf61a115..7d9e85e932d7 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -86,7 +86,7 @@ config DRM_PIXPAPER tristate "DRM support for PIXPAPER display panels" depends on DRM && SPI select DRM_CLIENT_SELECTION - select DRM_GEM_DMA_HELPER + select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER help DRM driver for the Mayqueen Pixpaper e-ink display panel. diff --git a/drivers/gpu/drm/tiny/pixpaper.c b/drivers/gpu/drm/tiny/pixpaper.c index b1379cb5f030..32598fb2fee7 100644 --- a/drivers/gpu/drm/tiny/pixpaper.c +++ b/drivers/gpu/drm/tiny/pixpaper.c @@ -968,8 +968,8 @@ static const struct drm_crtc_funcs pixpaper_crtc_funcs = { .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; -static int pixpaper_mode_valid(struct drm_crtc *crtc, - const struct drm_display_mode *mode) +static enum drm_mode_status +pixpaper_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) { if (mode->hdisplay == PIXPAPER_WIDTH && mode->vdisplay == PIXPAPER_HEIGHT) { diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig new file mode 100644 index 000000000000..4b55308fd2eb --- /dev/null +++ b/drivers/gpu/drm/tyr/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +config DRM_TYR + tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)" + depends on DRM=y + depends on RUST + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + default n + help + Rust DRM driver for ARM Mali CSF-based GPUs. + + This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. + + Note that the Mali-G68 and Mali-G78, while Valhall architecture, will + be supported with the panfrost driver as they are not CSF GPUs. + + if M is selected, the module will be called tyr. This driver is work + in progress and may not be functional. diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile new file mode 100644 index 000000000000..ba545f65f2c0 --- /dev/null +++ b/drivers/gpu/drm/tyr/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +obj-$(CONFIG_DRM_TYR) += tyr.o diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs new file mode 100644 index 000000000000..d5625dd1e41c --- /dev/null +++ b/drivers/gpu/drm/tyr/driver.rs @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::c_str; +use kernel::clk::Clk; +use kernel::clk::OptionalClk; +use kernel::device::Bound; +use kernel::device::Core; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::drm; +use kernel::drm::ioctl; +use kernel::new_mutex; +use kernel::of; +use kernel::platform; +use kernel::prelude::*; +use kernel::regulator; +use kernel::regulator::Regulator; +use kernel::sizes::SZ_2M; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time; +use kernel::types::ARef; + +use crate::file::File; +use crate::gem::TyrObject; +use crate::gpu; +use crate::gpu::GpuInfo; +use crate::regs; + +pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>; + +/// Convenience type alias for the DRM device type for this driver. +pub(crate) type TyrDevice = drm::Device<TyrDriver>; + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrDriver { + device: ARef<TyrDevice>, +} + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrData { + pub(crate) pdev: ARef<platform::Device>, + + #[pin] + clks: Mutex<Clocks>, + + #[pin] + regulators: Mutex<Regulators>, + + /// Some information on the GPU. + /// + /// This is mainly queried by userspace, i.e.: Mesa. + pub(crate) gpu_info: GpuInfo, +} + +// Both `Clk` and `Regulator` do not implement `Send` or `Sync`, but they +// should. There are patches on the mailing list to address this, but they have +// not landed yet. +// +// For now, add this workaround so that this patch compiles with the promise +// that it will be removed in a future patch. +// +// SAFETY: This will be removed in a future patch. +unsafe impl Send for TyrData {} +// SAFETY: This will be removed in a future patch. +unsafe impl Sync for TyrData {} + +fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { + regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED == 0 { + dev_err!(dev, "GPU reset failed with errno\n"); + dev_err!( + dev, + "GPU_INT_RAWSTAT is {}\n", + regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? + ); + + return Err(EIO); + } + + Ok(()) +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <TyrDriver as platform::Driver>::IdInfo, + [ + (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()), + (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ()) + ] +); + +impl platform::Driver for TyrDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + _info: Option<&Self::IdInfo>, + ) -> Result<Pin<KBox<Self>>> { + let core_clk = Clk::get(pdev.as_ref(), Some(c_str!("core")))?; + let stacks_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("stacks")))?; + let coregroup_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("coregroup")))?; + + core_clk.prepare_enable()?; + stacks_clk.prepare_enable()?; + coregroup_clk.prepare_enable()?; + + let mali_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("mali"))?; + let sram_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("sram"))?; + + let request = pdev.io_request_by_index(0).ok_or(ENODEV)?; + let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?; + + issue_soft_reset(pdev.as_ref(), &iomem)?; + gpu::l2_power_on(pdev.as_ref(), &iomem)?; + + let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?; + gpu_info.log(pdev); + + let platform: ARef<platform::Device> = pdev.into(); + + let data = try_pin_init!(TyrData { + pdev: platform.clone(), + clks <- new_mutex!(Clocks { + core: core_clk, + stacks: stacks_clk, + coregroup: coregroup_clk, + }), + regulators <- new_mutex!(Regulators { + mali: mali_regulator, + sram: sram_regulator, + }), + gpu_info, + }); + + let tdev: ARef<TyrDevice> = drm::Device::new(pdev.as_ref(), data)?; + drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?; + + let driver = KBox::pin_init(try_pin_init!(TyrDriver { device: tdev }), GFP_KERNEL)?; + + // We need this to be dev_info!() because dev_dbg!() does not work at + // all in Rust for now, and we need to see whether probe succeeded. + dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n"); + Ok(driver) + } +} + +#[pinned_drop] +impl PinnedDrop for TyrDriver { + fn drop(self: Pin<&mut Self>) {} +} + +#[pinned_drop] +impl PinnedDrop for TyrData { + fn drop(self: Pin<&mut Self>) { + // TODO: the type-state pattern for Clks will fix this. + let clks = self.clks.lock(); + clks.core.disable_unprepare(); + clks.stacks.disable_unprepare(); + clks.coregroup.disable_unprepare(); + } +} + +// We need to retain the name "panthor" to achieve drop-in compatibility with +// the C driver in the userspace stack. +const INFO: drm::DriverInfo = drm::DriverInfo { + major: 1, + minor: 5, + patchlevel: 0, + name: c_str!("panthor"), + desc: c_str!("ARM Mali Tyr DRM driver"), +}; + +#[vtable] +impl drm::Driver for TyrDriver { + type Data = TyrData; + type File = File; + type Object = drm::gem::Object<TyrObject>; + + const INFO: drm::DriverInfo = INFO; + + kernel::declare_drm_ioctls! { + (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + } +} + +#[pin_data] +struct Clocks { + core: Clk, + stacks: OptionalClk, + coregroup: OptionalClk, +} + +#[pin_data] +struct Regulators { + mali: Regulator<regulator::Enabled>, + sram: Regulator<regulator::Enabled>, +} diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs new file mode 100644 index 000000000000..0ef432947b73 --- /dev/null +++ b/drivers/gpu/drm/tyr/file.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::drm; +use kernel::prelude::*; +use kernel::uaccess::UserSlice; +use kernel::uapi; + +use crate::driver::TyrDevice; +use crate::TyrDriver; + +#[pin_data] +pub(crate) struct File {} + +/// Convenience type alias for our DRM `File` type +pub(crate) type DrmFile = drm::file::File<File>; + +impl drm::file::DriverFile for File { + type Driver = TyrDriver; + + fn open(_dev: &drm::Device<Self::Driver>) -> Result<Pin<KBox<Self>>> { + KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL) + } +} + +impl File { + pub(crate) fn dev_query( + tdev: &TyrDevice, + devquery: &mut uapi::drm_panthor_dev_query, + _file: &DrmFile, + ) -> Result<u32> { + if devquery.pointer == 0 { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32; + Ok(0) + } + _ => Err(EINVAL), + } + } else { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + let mut writer = UserSlice::new( + UserPtr::from_addr(devquery.pointer as usize), + devquery.size as usize, + ) + .writer(); + + writer.write(&tdev.gpu_info)?; + + Ok(0) + } + _ => Err(EINVAL), + } + } + } +} diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs new file mode 100644 index 000000000000..1273bf89dbd5 --- /dev/null +++ b/drivers/gpu/drm/tyr/gem.rs @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use kernel::drm::gem; +use kernel::prelude::*; + +/// GEM Object inner driver data +#[pin_data] +pub(crate) struct TyrObject {} + +impl gem::DriverObject for TyrObject { + type Driver = TyrDriver; + + fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> { + try_pin_init!(TyrObject {}) + } +} diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs new file mode 100644 index 000000000000..6c582910dd5d --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::bits::genmask_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::platform; +use kernel::prelude::*; +use kernel::time; +use kernel::transmute::AsBytes; + +use crate::driver::IoMem; +use crate::regs; + +/// Struct containing information that can be queried by userspace. This is read from +/// the GPU's registers. +/// +/// # Invariants +/// +/// - The layout of this struct identical to the C `struct drm_panthor_gpu_info`. +#[repr(C)] +pub(crate) struct GpuInfo { + pub(crate) gpu_id: u32, + pub(crate) gpu_rev: u32, + pub(crate) csf_id: u32, + pub(crate) l2_features: u32, + pub(crate) tiler_features: u32, + pub(crate) mem_features: u32, + pub(crate) mmu_features: u32, + pub(crate) thread_features: u32, + pub(crate) max_threads: u32, + pub(crate) thread_max_workgroup_size: u32, + pub(crate) thread_max_barrier_size: u32, + pub(crate) coherency_features: u32, + pub(crate) texture_features: [u32; 4], + pub(crate) as_present: u32, + pub(crate) pad0: u32, + pub(crate) shader_present: u64, + pub(crate) l2_present: u64, + pub(crate) tiler_present: u64, + pub(crate) core_features: u32, + pub(crate) pad: u32, +} + +impl GpuInfo { + pub(crate) fn new(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<Self> { + let gpu_id = regs::GPU_ID.read(dev, iomem)?; + let csf_id = regs::GPU_CSF_ID.read(dev, iomem)?; + let gpu_rev = regs::GPU_REVID.read(dev, iomem)?; + let core_features = regs::GPU_CORE_FEATURES.read(dev, iomem)?; + let l2_features = regs::GPU_L2_FEATURES.read(dev, iomem)?; + let tiler_features = regs::GPU_TILER_FEATURES.read(dev, iomem)?; + let mem_features = regs::GPU_MEM_FEATURES.read(dev, iomem)?; + let mmu_features = regs::GPU_MMU_FEATURES.read(dev, iomem)?; + let thread_features = regs::GPU_THREAD_FEATURES.read(dev, iomem)?; + let max_threads = regs::GPU_THREAD_MAX_THREADS.read(dev, iomem)?; + let thread_max_workgroup_size = regs::GPU_THREAD_MAX_WORKGROUP_SIZE.read(dev, iomem)?; + let thread_max_barrier_size = regs::GPU_THREAD_MAX_BARRIER_SIZE.read(dev, iomem)?; + let coherency_features = regs::GPU_COHERENCY_FEATURES.read(dev, iomem)?; + + let texture_features = regs::GPU_TEXTURE_FEATURES0.read(dev, iomem)?; + + let as_present = regs::GPU_AS_PRESENT.read(dev, iomem)?; + + let shader_present = u64::from(regs::GPU_SHADER_PRESENT_LO.read(dev, iomem)?); + let shader_present = + shader_present | u64::from(regs::GPU_SHADER_PRESENT_HI.read(dev, iomem)?) << 32; + + let tiler_present = u64::from(regs::GPU_TILER_PRESENT_LO.read(dev, iomem)?); + let tiler_present = + tiler_present | u64::from(regs::GPU_TILER_PRESENT_HI.read(dev, iomem)?) << 32; + + let l2_present = u64::from(regs::GPU_L2_PRESENT_LO.read(dev, iomem)?); + let l2_present = l2_present | u64::from(regs::GPU_L2_PRESENT_HI.read(dev, iomem)?) << 32; + + Ok(Self { + gpu_id, + gpu_rev, + csf_id, + l2_features, + tiler_features, + mem_features, + mmu_features, + thread_features, + max_threads, + thread_max_workgroup_size, + thread_max_barrier_size, + coherency_features, + // TODO: Add texture_features_{1,2,3}. + texture_features: [texture_features, 0, 0, 0], + as_present, + pad0: 0, + shader_present, + l2_present, + tiler_present, + core_features, + pad: 0, + }) + } + + pub(crate) fn log(&self, pdev: &platform::Device) { + let major = (self.gpu_id >> 16) & 0xff; + let minor = (self.gpu_id >> 8) & 0xff; + let status = self.gpu_id & 0xff; + + let model_name = if let Some(model) = GPU_MODELS + .iter() + .find(|&f| f.major == major && f.minor == minor) + { + model.name + } else { + "unknown" + }; + + dev_info!( + pdev.as_ref(), + "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", + model_name, + self.gpu_id >> 16, + major, + minor, + status + ); + + dev_info!( + pdev.as_ref(), + "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}", + self.l2_features, + self.tiler_features, + self.mem_features, + self.mmu_features, + self.as_present + ); + + dev_info!( + pdev.as_ref(), + "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}", + self.shader_present, + self.l2_present, + self.tiler_present + ); + } + + /// Returns the number of virtual address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn va_bits(&self) -> u32 { + self.mmu_features & genmask_u32(0..=7) + } + + /// Returns the number of physical address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn pa_bits(&self) -> u32 { + (self.mmu_features >> 8) & genmask_u32(0..=7) + } +} + +// SAFETY: `GpuInfo`'s invariant guarantees that it is the same type that is +// already exposed to userspace by the C driver. This implies that it fulfills +// the requirements for `AsBytes`. +// +// This means: +// +// - No implicit padding, +// - No kernel pointers, +// - No interior mutability. +unsafe impl AsBytes for GpuInfo {} + +struct GpuModels { + name: &'static str, + major: u32, + minor: u32, +} + +const GPU_MODELS: [GpuModels; 1] = [GpuModels { + name: "g610", + major: 10, + minor: 7, +}]; + +#[allow(dead_code)] +pub(crate) struct GpuId { + pub(crate) arch_major: u32, + pub(crate) arch_minor: u32, + pub(crate) arch_rev: u32, + pub(crate) prod_major: u32, + pub(crate) ver_major: u32, + pub(crate) ver_minor: u32, + pub(crate) ver_status: u32, +} + +impl From<u32> for GpuId { + fn from(value: u32) -> Self { + GpuId { + arch_major: (value & genmask_u32(28..=31)) >> 28, + arch_minor: (value & genmask_u32(24..=27)) >> 24, + arch_rev: (value & genmask_u32(20..=23)) >> 20, + prod_major: (value & genmask_u32(16..=19)) >> 16, + ver_major: (value & genmask_u32(12..=15)) >> 12, + ver_minor: (value & genmask_u32(4..=11)) >> 4, + ver_status: value & genmask_u32(0..=3), + } + } +} + +/// Powers on the l2 block. +pub(crate) fn l2_power_on(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { + regs::L2_PWRON_LO.write(dev, iomem, 1)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::L2_READY_LO.read(dev, iomem)? != 1 { + dev_err!(dev, "Failed to power on the GPU\n"); + return Err(EIO); + } + + Ok(()) +} diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs new file mode 100644 index 000000000000..f46933aaa221 --- /dev/null +++ b/drivers/gpu/drm/tyr/regs.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +// We don't expect that all the registers and fields will be used, even in the +// future. +// +// Nevertheless, it is useful to have most of them defined, like the C driver +// does. +#![allow(dead_code)] + +use kernel::bits::bit_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::prelude::*; + +use crate::driver::IoMem; + +/// Represents a register in the Register Set +/// +/// TODO: Replace this with the Nova `register!()` macro when it is available. +/// In particular, this will automatically give us 64bit register reads and +/// writes. +pub(crate) struct Register<const OFFSET: usize>; + +impl<const OFFSET: usize> Register<OFFSET> { + #[inline] + pub(crate) fn read(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<u32> { + let value = (*iomem).access(dev)?.read32(OFFSET); + Ok(value) + } + + #[inline] + pub(crate) fn write(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>, value: u32) -> Result { + (*iomem).access(dev)?.write32(value, OFFSET); + Ok(()) + } +} + +pub(crate) const GPU_ID: Register<0x0> = Register; +pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register; +pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register; +pub(crate) const GPU_CSF_ID: Register<0x1c> = Register; +pub(crate) const GPU_REVID: Register<0x280> = Register; +pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register; +pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register; +pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register; +pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register; +pub(crate) const GPU_IRQ_RAWSTAT: Register<0x20> = Register; + +pub(crate) const GPU_IRQ_RAWSTAT_FAULT: u32 = bit_u32(0); +pub(crate) const GPU_IRQ_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1); +pub(crate) const GPU_IRQ_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10); +pub(crate) const GPU_IRQ_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17); +pub(crate) const GPU_IRQ_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18); +pub(crate) const GPU_IRQ_RAWSTAT_MCU_STATUS: u32 = bit_u32(19); + +pub(crate) const GPU_IRQ_CLEAR: Register<0x24> = Register; +pub(crate) const GPU_IRQ_MASK: Register<0x28> = Register; +pub(crate) const GPU_IRQ_STAT: Register<0x2c> = Register; +pub(crate) const GPU_CMD: Register<0x30> = Register; +pub(crate) const GPU_CMD_SOFT_RESET: u32 = 1 | (1 << 8); +pub(crate) const GPU_CMD_HARD_RESET: u32 = 1 | (2 << 8); +pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register; +pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register; +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register; +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register; +pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register; +pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register; +pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register; +pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register; +pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register; +pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register; +pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register; +pub(crate) const L2_READY_LO: Register<0x160> = Register; +pub(crate) const L2_READY_HI: Register<0x164> = Register; +pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register; +pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register; +pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register; +pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register; +pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register; +pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register; + +pub(crate) const MCU_CONTROL: Register<0x700> = Register; +pub(crate) const MCU_CONTROL_ENABLE: u32 = 1; +pub(crate) const MCU_CONTROL_AUTO: u32 = 2; +pub(crate) const MCU_CONTROL_DISABLE: u32 = 0; + +pub(crate) const MCU_STATUS: Register<0x704> = Register; +pub(crate) const MCU_STATUS_DISABLED: u32 = 0; +pub(crate) const MCU_STATUS_ENABLED: u32 = 1; +pub(crate) const MCU_STATUS_HALT: u32 = 2; +pub(crate) const MCU_STATUS_FATAL: u32 = 3; + +pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register; + +pub(crate) const JOB_IRQ_RAWSTAT: Register<0x1000> = Register; +pub(crate) const JOB_IRQ_CLEAR: Register<0x1004> = Register; +pub(crate) const JOB_IRQ_MASK: Register<0x1008> = Register; +pub(crate) const JOB_IRQ_STAT: Register<0x100c> = Register; + +pub(crate) const JOB_IRQ_GLOBAL_IF: u32 = bit_u32(31); + +pub(crate) const MMU_IRQ_RAWSTAT: Register<0x2000> = Register; +pub(crate) const MMU_IRQ_CLEAR: Register<0x2004> = Register; +pub(crate) const MMU_IRQ_MASK: Register<0x2008> = Register; +pub(crate) const MMU_IRQ_STAT: Register<0x200c> = Register; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs new file mode 100644 index 000000000000..861d1db43072 --- /dev/null +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Arm Mali Tyr DRM driver. +//! +//! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of +//! naming their GPUs after Nordic mythological figures and places. + +use crate::driver::TyrDriver; + +mod driver; +mod file; +mod gem; +mod gpu; +mod regs; + +kernel::module_platform_driver! { + type: TyrDriver, + name: "tyr", + authors: ["The Tyr driver authors"], + description: "Arm Mali Tyr DRM driver", + license: "Dual MIT/GPL", +} diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 0317f3d7452a..1884686985b8 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -62,6 +62,8 @@ struct v3d_queue_state { /* Currently active job for this queue */ struct v3d_job *active_job; spinlock_t queue_lock; + /* Protect dma fence for signalling job completion */ + spinlock_t fence_lock; }; /* Performance monitor object. The perform lifetime is controlled by userspace diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c index 8f8471adae34..c82500a1df73 100644 --- a/drivers/gpu/drm/v3d/v3d_fence.c +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -15,7 +15,7 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q) fence->dev = &v3d->drm; fence->queue = q; fence->seqno = ++queue->emit_seqno; - dma_fence_init(&fence->base, &v3d_fence_ops, &queue->queue_lock, + dma_fence_init(&fence->base, &v3d_fence_ops, &queue->fence_lock, queue->fence_context, fence->seqno); return &fence->base; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index c77d90aa9b82..bb110d35f749 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -273,6 +273,7 @@ v3d_gem_init(struct drm_device *dev) seqcount_init(&queue->stats.lock); spin_lock_init(&queue->queue_lock); + spin_lock_init(&queue->fence_lock); } spin_lock_init(&v3d->mm_lock); diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c index 8ec6ed82b3d9..c1a30166c099 100644 --- a/drivers/gpu/drm/v3d/v3d_gemfs.c +++ b/drivers/gpu/drm/v3d/v3d_gemfs.c @@ -7,11 +7,6 @@ #include "v3d_drv.h" -static int add_param(struct fs_context *fc, const char *key, const char *val) -{ - return vfs_parse_fs_string(fc, key, val, strlen(val)); -} - void v3d_gemfs_init(struct v3d_dev *v3d) { struct file_system_type *type; @@ -38,9 +33,9 @@ void v3d_gemfs_init(struct v3d_dev *v3d) fc = fs_context_for_mount(type, SB_KERNMOUNT); if (IS_ERR(fc)) goto err; - ret = add_param(fc, "source", "tmpfs"); + ret = vfs_parse_fs_string(fc, "source", "tmpfs"); if (!ret) - ret = add_param(fc, "huge", "within_size"); + ret = vfs_parse_fs_string(fc, "huge", "within_size"); if (!ret) gemfs = fc_mount_longterm(fc); put_fs_context(fc); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 819704ac675d..d539f25b5fbe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1497,6 +1497,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, SVGA3dCmdHeader *header) { struct vmw_bo *vmw_bo = NULL; + struct vmw_resource *res; struct vmw_surface *srf = NULL; VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdSurfaceDMA); int ret; @@ -1532,18 +1533,24 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, dirty = (cmd->body.transfer == SVGA3D_WRITE_HOST_VRAM) ? VMW_RES_DIRTY_SET : 0; - ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, - dirty, user_surface_converter, - &cmd->body.host.sid, NULL); + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, dirty, + user_surface_converter, &cmd->body.host.sid, + NULL); if (unlikely(ret != 0)) { if (unlikely(ret != -ERESTARTSYS)) VMW_DEBUG_USER("could not find surface for DMA.\n"); return ret; } - srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res); + res = sw_context->res_cache[vmw_res_surface].res; + if (!res) { + VMW_DEBUG_USER("Invalid DMA surface.\n"); + return -EINVAL; + } - vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->tbo, header); + srf = vmw_res_to_srf(res); + vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->tbo, + header); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index 7ee93e7191c7..35dc94c3db39 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -308,8 +308,10 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, hash_add_rcu(ctx->sw_context->res_ht, &node->hash.head, node->hash.key); } node->res = vmw_resource_reference_unless_doomed(res); - if (!node->res) + if (!node->res) { + hash_del_rcu(&node->hash.head); return -ESRCH; + } node->first_usage = 1; if (!res->dev_priv->has_mob) { @@ -636,7 +638,7 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx) hash_del_rcu(&val->hash.head); list_for_each_entry(val, &ctx->resource_ctx_list, head) - hash_del_rcu(&entry->hash.head); + hash_del_rcu(&val->hash.head); ctx->sw_context = NULL; } diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 714d5702dfd7..7219f6b884b6 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -40,12 +40,12 @@ config DRM_XE select DRM_TTM select DRM_TTM_HELPER select DRM_EXEC + select DRM_GPUSVM if !UML && DEVICE_PRIVATE select DRM_GPUVM select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP select AUXILIARY_BUS - select HMM_MIRROR select REGMAP if I2C help Driver for Intel Xe2 series GPUs and later. Experimental support diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 01735c6ece8b..87902b4bd6d3 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -104,6 +104,7 @@ config DRM_XE_DEBUG_GUC config DRM_XE_USERPTR_INVAL_INJECT bool "Inject userptr invalidation -EINVAL errors" + depends on DRM_GPUSVM default n help Choose this option when debugging error paths that diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 987e4fe10538..d9c6cf0f189e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -84,6 +84,7 @@ xe-y += xe_bb.o \ xe_hw_error.o \ xe_hw_fence.o \ xe_irq.o \ + xe_late_bind_fw.o \ xe_lrc.o \ xe_migrate.o \ xe_mmio.o \ @@ -130,6 +131,7 @@ xe-y += xe_bb.o \ xe_tuning.o \ xe_uc.o \ xe_uc_fw.o \ + xe_validation.o \ xe_vm.o \ xe_vm_madvise.o \ xe_vram.o \ @@ -140,8 +142,8 @@ xe-y += xe_bb.o \ xe_wopcm.o xe-$(CONFIG_I2C) += xe_i2c.o -xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o +xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o @@ -210,6 +212,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ display/xe_hdcp_gsc.o \ + display/xe_panic.o \ display/xe_plane_initial.o \ display/xe_tdf.o @@ -325,6 +328,7 @@ ifeq ($(CONFIG_DEBUG_FS),y) xe_gt_stats.o \ xe_guc_debugfs.o \ xe_huc_debugfs.o \ + xe_tile_debugfs.o \ xe_uc_debugfs.o xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index d8cf68a0516d..31090c69dfbe 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum xe_guc_action { XE_GUC_ACTION_ENTER_S_STATE = 0x501, XE_GUC_ACTION_EXIT_S_STATE = 0x502, XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, @@ -154,6 +155,8 @@ enum xe_guc_action { XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001, + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002, XE_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index b28c8fa061f7..ce5c59517528 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -210,6 +210,11 @@ struct slpc_shared_data { u8 reserved_mode_definition[4096]; } __packed; +enum slpc_power_profile { + SLPC_POWER_PROFILE_BASE = 0x0, + SLPC_POWER_PROFILE_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0e78351c6ef5..265a135e7061 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -17,6 +17,7 @@ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | * | | | - `GuC Opt In Feature KLVs`_ | + * | | | - `GuC Scheduling Policies KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -153,6 +154,30 @@ enum { #define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u /** + * DOC: GuC Scheduling Policies KLVs + * + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. + * + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 + * Some platforms do not allow concurrent execution of RCS and CCS + * workloads from different address spaces. By default, the GuC prioritizes + * RCS submissions over CCS ones, which can lead to CCS workloads being + * significantly (or completely) starved of execution time. This KLV allows + * the driver to specify a quantum (in ms) and a ratio (percentage value + * between 0 and 100), and the GuC will prioritize the CCS for that + * percentage of each quantum. For example, specifying 100ms and 30% will + * make the GuC prioritize the CCS for 30ms of every 100ms. + * Note that this does not necessarly mean that RCS and CCS engines will + * only be active for their percentage of the quantum, as the restriction + * only kicks in if both classes are fully busy with non-compatible address + * spaces; i.e., if one engine is idle or running the same address space, + * a pending job on the other engine will still be submitted to the HW no + * matter what the ratio is + */ +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 41d39d67817a..f097fc6d5127 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -8,6 +8,7 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_res_cursor.h" +#include "xe_validation.h" struct xe_bo; @@ -21,7 +22,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, u32 start, u32 end) { struct xe_bo *bo; - int err; + int err = 0; u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; if (start < SZ_4K) @@ -32,21 +33,13 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, start = ALIGN(start, align); } - bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), - NULL, size, start, end, - ttm_bo_type_kernel, flags, 0); + bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe), + size, start, end, ttm_bo_type_kernel, flags); if (IS_ERR(bo)) { err = PTR_ERR(bo); bo = NULL; return err; } - err = xe_bo_pin(bo); - xe_bo_unlock_vm_held(bo); - - if (err) { - xe_bo_put(fb->bo); - bo = NULL; - } fb->bo = bo; diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 910632f57c3d..27437c22bd70 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -1,12 +1,7 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ -#include <drm/drm_cache.h> #include <drm/drm_gem.h> -#include <drm/drm_panic.h> - -#include "intel_fb.h" -#include "intel_display_types.h" #include "xe_bo.h" #include "intel_bo.h" @@ -64,89 +59,3 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { /* FIXME */ } - -struct xe_panic_data { - struct page **pages; - int page; - void *vaddr; -}; - -struct xe_framebuffer { - struct intel_framebuffer base; - struct xe_panic_data panic; -}; - -static inline struct xe_panic_data *to_xe_panic_data(struct intel_framebuffer *fb) -{ - return &container_of_const(fb, struct xe_framebuffer, base)->panic; -} - -static void xe_panic_kunmap(struct xe_panic_data *panic) -{ - if (panic->vaddr) { - drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); - kunmap_local(panic->vaddr); - panic->vaddr = NULL; - } -} - -/* - * The scanout buffer pages are not mapped, so for each pixel, - * use kmap_local_page_try_from_panic() to map the page, and write the pixel. - * Try to keep the map from the previous pixel, to avoid too much map/unmap. - */ -static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, - unsigned int y, u32 color) -{ - struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct xe_panic_data *panic = to_xe_panic_data(fb); - struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); - unsigned int new_page; - unsigned int offset; - - if (fb->panic_tiling) - offset = fb->panic_tiling(sb->width, x, y); - else - offset = y * sb->pitch[0] + x * sb->format->cpp[0]; - - new_page = offset >> PAGE_SHIFT; - offset = offset % PAGE_SIZE; - if (new_page != panic->page) { - xe_panic_kunmap(panic); - panic->page = new_page; - panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, - panic->page); - } - if (panic->vaddr) { - u32 *pix = panic->vaddr + offset; - *pix = color; - } -} - -struct intel_framebuffer *intel_bo_alloc_framebuffer(void) -{ - struct xe_framebuffer *xe_fb; - - xe_fb = kzalloc(sizeof(*xe_fb), GFP_KERNEL); - if (xe_fb) - return &xe_fb->base; - return NULL; -} - -int intel_bo_panic_setup(struct drm_scanout_buffer *sb) -{ - struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; - struct xe_panic_data *panic = to_xe_panic_data(fb); - - panic->page = -1; - sb->set_pixel = xe_panic_page_set_pixel; - return 0; -} - -void intel_bo_panic_finish(struct intel_framebuffer *fb) -{ - struct xe_panic_data *panic = to_xe_panic_data(fb); - - xe_panic_kunmap(panic); - panic->page = -1; -} diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index d96ba2b51065..8ea9a472113c 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -42,11 +42,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, obj = ERR_PTR(-ENODEV); if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) { - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), - NULL, size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), + size, + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT, false); if (!IS_ERR(obj)) drm_info(&xe->drm, "Allocated fbdev into stolen\n"); else @@ -54,10 +54,10 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } if (IS_ERR(obj)) { - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_GGTT, false); } if (IS_ERR(obj)) { diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b68d70db6c8..19e691fccf8c 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -20,7 +20,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" -#include "intel_display_core.h" +#include "intel_display_device.h" #include "intel_display_driver.h" #include "intel_display_irq.h" #include "intel_display_types.h" @@ -37,13 +37,6 @@ /* Xe device functions */ -static bool has_display(struct xe_device *xe) -{ - struct intel_display *display = xe->display; - - return HAS_DISPLAY(display); -} - /** * xe_display_driver_probe_defer - Detect if we need to wait for other drivers * early on @@ -290,7 +283,7 @@ static void xe_display_enable_d3cold(struct xe_device *xe) intel_dmc_suspend(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_enable(display); } @@ -303,14 +296,14 @@ static void xe_display_disable_d3cold(struct xe_device *xe) intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); intel_hpd_init(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -333,7 +326,7 @@ void xe_display_pm_suspend(struct xe_device *xe) intel_power_domains_disable(display); drm_client_dev_suspend(&xe->drm, false); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -345,7 +338,7 @@ void xe_display_pm_suspend(struct xe_device *xe) intel_hpd_cancel_work(display); - if (has_display(xe)) { + if (intel_display_device_present(display)) { intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); } @@ -365,7 +358,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) intel_power_domains_disable(display); drm_client_dev_suspend(&xe->drm, false); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -376,7 +369,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) intel_encoder_block_all_hpds(display); intel_hpd_cancel_work(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -465,25 +458,25 @@ void xe_display_pm_resume(struct xe_device *xe) intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); intel_encoder_unblock_all_hpds(display); - if (has_display(xe)) { + if (intel_display_device_present(display)) { intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); intel_display_driver_enable_user_access(display); } - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -548,7 +541,7 @@ int xe_display_probe(struct xe_device *xe) xe->display = display; - if (has_display(xe)) + if (intel_display_device_present(display)) return 0; no_display: diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 9f941fc2e36b..58581d7aaae6 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -43,11 +43,11 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d return false; /* Set scanout flag for WC mapping */ - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), - NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), + PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false); if (IS_ERR(obj)) { kfree(vma); return false; diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index f1f8b5ab53ef..1fd4a815e784 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -102,29 +102,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_PAGE_SIZE); if (IS_DGFX(xe)) - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM0 | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); else - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); if (IS_ERR(dpt)) - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -281,7 +281,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); - int ret; + struct xe_validation_ctx ctx; + struct drm_exec exec; + int ret = 0; if (!vma) return ERR_PTR(-ENODEV); @@ -308,17 +310,22 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the * assumptions are incorrect for framebuffers */ - ret = ttm_bo_reserve(&bo->ttm, false, false, NULL); - if (ret) - goto err; - - if (IS_DGFX(xe)) - ret = xe_bo_migrate(bo, XE_PL_VRAM0); - else - ret = xe_bo_validate(bo, NULL, true); - if (!ret) - ttm_bo_pin(&bo->ttm); - ttm_bo_unreserve(&bo->ttm); + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + if (IS_DGFX(xe)) + ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec); + else + ret = xe_bo_validate(bo, NULL, true, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &ret); + if (!ret) + ttm_bo_pin(&bo->ttm); + } if (ret) goto err; diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 30f1073141fc..4ae847b628e2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -72,10 +72,10 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, int ret = 0; /* allocate object of two page for HDCP command memory and store it */ - bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) { drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c new file mode 100644 index 000000000000..f32b23338331 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_panic.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_cache.h> +#include <drm/drm_panic.h> + +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" +#include "xe_bo.h" + +struct intel_panic { + struct page **pages; + int page; + void *vaddr; +}; + +static void xe_panic_kunmap(struct intel_panic *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct intel_panic *panic = fb->panic; + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + xe_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, + panic->page); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_panic *intel_panic_alloc(void) +{ + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); + + return panic; +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_panic_finish(struct intel_panic *panic) +{ + xe_panic_kunmap(panic); + panic->page = -1; +} diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 826ac3d578b7..94f00def811b 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -140,8 +140,8 @@ initial_plane_bo(struct xe_device *xe, page_size); size -= base; - bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base, - ttm_bo_type_kernel, flags); + bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base, + ttm_bo_type_kernel, flags, 0, false); if (IS_ERR(bo)) { drm_dbg(&xe->drm, "Failed to create bo phys_base=%pa size %u with flags %x: %li\n", diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index f96b2e2b3064..06cb6b02ec64 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -522,6 +522,7 @@ #define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) #define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) +#define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5) #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 1b101edb838b..b5eff383902c 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -40,7 +40,4 @@ #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) #define INDIRECT_CTX_RING_CTL (0x0a + 1) -#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) -#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) - #endif diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 7b40cc8be1c9..2294cf89f3e1 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -23,7 +23,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, bool clear, u64 get_val, u64 assign_val, - struct kunit *test) + struct kunit *test, struct drm_exec *exec) { struct dma_fence *fence; struct ttm_tt *ttm; @@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, u32 offset; /* Move bo to VRAM if not already there. */ - ret = xe_bo_validate(bo, NULL, false); + ret = xe_bo_validate(bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate bo.\n"); return ret; @@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Evict to system. CCS data should be copied. */ - ret = xe_bo_evict(bo); + ret = xe_bo_evict(bo, exec); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return ret; @@ -132,14 +132,15 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, /* TODO: Sanity check */ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; if (IS_DGFX(xe)) kunit_info(test, "Testing vram id %u\n", tile->id); else kunit_info(test, "Testing system memory\n"); - bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, + bo_flags, exec); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -149,18 +150,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, - test); + test, exec); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data survives migration.\n"); ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, - 0xdeadbeefdeadbeefULL, test); + 0xdeadbeefdeadbeefULL, test, exec); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); - ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); + ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec); out_unlock: xe_bo_unlock(bo); @@ -210,6 +211,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; struct xe_gt *__gt; int err, i, id; @@ -218,25 +220,25 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for (i = 0; i < 2; ++i) { xe_vm_lock(vm, false); - bo = xe_bo_create_user(xe, NULL, vm, 0x10000, + bo = xe_bo_create_user(xe, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo_flags, exec); xe_vm_unlock(vm); if (IS_ERR(bo)) { KUNIT_FAIL(test, "bo create err=%pe\n", bo); break; } - external = xe_bo_create_user(xe, NULL, NULL, 0x10000, + external = xe_bo_create_user(xe, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo_flags, NULL); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; } xe_bo_lock(external, false); - err = xe_bo_pin_external(external, false); + err = xe_bo_pin_external(external, false, exec); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo pin err=%pe\n", @@ -294,7 +296,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc if (i) { down_read(&vm->lock); xe_vm_lock(vm, false); - err = xe_bo_validate(bo, bo->vm, false); + err = xe_bo_validate(bo, bo->vm, false, exec); xe_vm_unlock(vm); up_read(&vm->lock); if (err) { @@ -303,7 +305,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc goto cleanup_all; } xe_bo_lock(external, false); - err = xe_bo_validate(external, NULL, false); + err = xe_bo_validate(external, NULL, false, exec); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo valid err=%pe\n", @@ -495,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe) INIT_LIST_HEAD(&link->link); /* We can create bos using WC caching here. But it is slower. */ - bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE, + bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE, DRM_XE_GEM_CPU_CACHING_WB, - XE_BO_FLAG_SYSTEM); + XE_BO_FLAG_SYSTEM, NULL); if (IS_ERR(bo)) { if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) && bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS)) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index 5baeab6b6fb7..a7e548a2bdfb 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -27,7 +27,8 @@ static bool is_dynamic(struct dma_buf_test_params *params) } static void check_residency(struct kunit *test, struct xe_bo *exported, - struct xe_bo *imported, struct dma_buf *dmabuf) + struct xe_bo *imported, struct dma_buf *dmabuf, + struct drm_exec *exec) { struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); u32 mem_type; @@ -62,7 +63,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * importer is on a different device. If they're on the same device, * the exporter and the importer should be the same bo. */ - ret = xe_bo_evict(exported); + ret = xe_bo_evict(exported, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", @@ -77,7 +78,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, } /* Re-validate the importer. This should move also exporter in. */ - ret = xe_bo_validate(imported, NULL, false); + ret = xe_bo_validate(imported, NULL, false, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", @@ -113,8 +114,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) size = SZ_64K; kunit_info(test, "running %s\n", __func__); - bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - params->mem_mask); + bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, + params->mem_mask, NULL); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); @@ -142,11 +143,12 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) KUNIT_FAIL(test, "xe_gem_prime_import() succeeded when it shouldn't have\n"); } else { + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; int err; /* Is everything where we expect it to be? */ xe_bo_lock(import_bo, false); - err = xe_bo_validate(import_bo, NULL, false); + err = xe_bo_validate(import_bo, NULL, false, exec); /* Pinning in VRAM is not allowed. */ if (!is_dynamic(params) && @@ -159,7 +161,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) err == -ERESTARTSYS); if (!err) - check_residency(test, bo, import_bo, dmabuf); + check_residency(test, bo, import_bo, dmabuf, exec); xe_bo_unlock(import_bo); } drm_gem_object_put(import); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c new file mode 100644 index 000000000000..3b213fcae916 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/delay.h> + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_kunit_helpers.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_pm.h" + +/* + * There are different ways to allocate the G2G buffers. The plan for this test + * is to make sure that all the possible options work. The particular option + * chosen by the driver may vary from one platform to another, it may also change + * with time. So to ensure consistency of testing, the relevant driver code is + * replicated here to guarantee it won't change without the test being updated + * to keep testing the other options. + * + * In order to test the actual code being used by the driver, there is also the + * 'default' scheme. That will use the official driver routines to test whatever + * method the driver is using on the current platform at the current time. + */ +enum { + /* Driver defined allocation scheme */ + G2G_CTB_TYPE_DEFAULT, + /* Single buffer in host memory */ + G2G_CTB_TYPE_HOST, + /* Single buffer in a specific tile, loops across all tiles */ + G2G_CTB_TYPE_TILE, +}; + +/* + * Payload is opaque to GuC. So KMD can define any structure or size it wants. + */ +struct g2g_test_payload { + u32 tx_dev; + u32 tx_tile; + u32 rx_dev; + u32 rx_tile; + u32 seqno; +}; + +static void g2g_test_send(struct kunit *test, struct xe_guc *guc, + u32 far_tile, u32 far_dev, + struct g2g_test_payload *payload) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 *action, total; + size_t payload_len; + int ret; + + static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32))); + payload_len = sizeof(*payload) / sizeof(u32); + + total = 4 + payload_len; + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action); + + action[0] = XE_GUC_ACTION_TEST_G2G_SEND; + action[1] = far_tile; + action[2] = far_dev; + action[3] = payload_len; + memcpy(action + 4, payload, payload_len * sizeof(u32)); + + atomic_inc(&xe->g2g_test_count); + + /* + * Should specify the expected response notification here. Problem is that + * the response will be coming from a different GuC. By the end, it should + * all add up as long as an equal number of messages are sent from each GuC + * and to each GuC. However, in the middle negative reservation space errors + * and such like can occur. Rather than add intrusive changes to the CT layer + * it is simpler to just not bother counting it at all. The system should be + * idle when running the selftest, and the selftest's notification total size + * is well within the G2H allocation size. So there should be no issues with + * needing to block for space, which is all the tracking code is really for. + */ + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0); + kunit_kfree(test, action); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret, + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev); +} + +/* + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously + * from the G2H notification handler. Need that to actually complete rather than + * thread-abort in order to keep the rest of the driver alive! + */ +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL; + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len; + struct g2g_test_payload *payload; + size_t payload_len; + int ret = 0, i; + + payload_len = sizeof(*payload) / sizeof(u32); + + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) { + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len); + ret = -EPROTO; + goto done; + } + + tx_tile = msg[0]; + tx_dev = msg[1]; + got_len = msg[2]; + payload = (struct g2g_test_payload *)(msg + 3); + + rx_tile = gt_to_tile(rx_gt)->id; + rx_dev = G2G_DEV(rx_gt); + + if (got_len != payload_len) { + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len); + ret = -EPROTO; + goto done; + } + + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile || + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) { + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n", + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev, + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno); + ret = -EPROTO; + goto done; + } + + if (!xe->g2g_test_array) { + xe_gt_err(rx_gt, "G2G: Missing test array!\n"); + ret = -ENOMEM; + goto done; + } + + for_each_gt(test_gt, xe, i) { + if (gt_to_tile(test_gt)->id != tx_tile) + continue; + + if (G2G_DEV(test_gt) != tx_dev) + continue; + + if (tx_gt) { + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n", + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev); + ret = -EINVAL; + goto done; + } + + tx_gt = test_gt; + } + if (!tx_gt) { + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev); + ret = -EINVAL; + goto done; + } + + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; + + if (xe->g2g_test_array[idx] != payload->seqno - 1) { + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", + xe->g2g_test_array[idx], payload->seqno - 1, + tx_tile, tx_dev, rx_tile, rx_dev); + ret = -EINVAL; + goto done; + } + + xe->g2g_test_array[idx] = payload->seqno; + +done: + atomic_dec(&xe->g2g_test_count); + return ret; +} + +/* + * Send the given seqno from all GuCs to all other GuCs in tile/GT order + */ +static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) +{ + struct xe_gt *near_gt, *far_gt; + int i, j; + + for_each_gt(near_gt, xe, i) { + u32 near_tile = gt_to_tile(near_gt)->id; + u32 near_dev = G2G_DEV(near_gt); + + for_each_gt(far_gt, xe, j) { + u32 far_tile = gt_to_tile(far_gt)->id; + u32 far_dev = G2G_DEV(far_gt); + struct g2g_test_payload payload; + + if (far_gt->info.id == near_gt->info.id) + continue; + + payload.tx_dev = near_dev; + payload.tx_tile = near_tile; + payload.rx_dev = far_dev; + payload.rx_tile = far_tile; + payload.seqno = seqno; + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); + } + } +} + +#define WAIT_TIME_MS 100 +#define WAIT_COUNT (1000 / WAIT_TIME_MS) + +static void g2g_wait_for_complete(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + struct kunit *test = kunit_get_current_test(); + int wait = 0; + + /* Wait for all G2H messages to be received */ + while (atomic_read(&xe->g2g_test_count)) { + if (++wait > WAIT_COUNT) + break; + + msleep(WAIT_TIME_MS); + } + + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count), + "Timed out waiting for notifications\n"); + kunit_info(test, "Got all notifications back\n"); +} + +#undef WAIT_TIME_MS +#undef WAIT_COUNT + +static void g2g_clean_array(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + + xe->g2g_test_array = NULL; +} + +#define NUM_LOOPS 16 + +static void g2g_run_test(struct kunit *test, struct xe_device *xe) +{ + u32 seqno, max_array; + int ret, i, j; + + max_array = xe->info.gt_count * xe->info.gt_count; + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array); + + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); + + /* + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order. + * Tile/GT order doesn't really mean anything to the hardware but it is going + * to be a fixed sequence every time. + * + * Verify that each one comes back having taken the correct route. + */ + ret = kunit_add_action(test, g2g_wait_for_complete, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); + for (seqno = 1; seqno < NUM_LOOPS; seqno++) + g2g_test_in_order(test, xe, seqno); + seqno--; + + kunit_release_action(test, &g2g_wait_for_complete, xe); + + /* Check for the final seqno in each slot */ + for (i = 0; i < xe->info.gt_count; i++) { + for (j = 0; j < xe->info.gt_count; j++) { + u32 idx = (j * xe->info.gt_count) + i; + + if (i == j) + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx], + "identity seqno modified: %d for %dx%d!\n", + xe->g2g_test_array[idx], i, j); + else + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx], + "invalid seqno: %d vs %d for %dx%d!\n", + xe->g2g_test_array[idx], seqno, i, j); + } + } + + kunit_kfree(test, xe->g2g_test_array); + kunit_release_action(test, &g2g_clean_array, xe); + + kunit_info(test, "Test passed\n"); +} + +#undef NUM_LOOPS + +static void g2g_ct_stop(struct xe_guc *guc) +{ + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + int i, t; + + for_each_gt(remote_gt, xe, i) { + u32 tile, dev; + + if (remote_gt->info.id == gt->info.id) + continue; + + tile = gt_to_tile(remote_gt)->id; + dev = G2G_DEV(remote_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) + guc_g2g_deregister(guc, tile, dev, t); + } +} + +/* Size of a single allocation that contains all G2G CTBs across all GTs */ +static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe) +{ + unsigned int count = xe->info.gt_count; + u32 num_channels = (count * (count - 1)) / 2; + + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n", + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE, + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE, + num_channels * XE_G2G_TYPE_LIMIT); + + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; +} + +/* + * Use the driver's regular CTB allocation scheme. + */ +static void g2g_alloc_default(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + int i; + + kunit_info(test, "Default [tiles = %d, GTs = %d]\n", + xe->info.tile_count, xe->info.gt_count); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + int ret; + + ret = guc_g2g_alloc(guc); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret)); + continue; + } +} + +static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo) +{ + struct xe_gt *root_gt, *gt; + int i; + + root_gt = xe_device_get_gt(xe, 0); + root_gt->uc.guc.g2g.bo = bo; + root_gt->uc.guc.g2g.owned = true; + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo); + + for_each_gt(gt, xe, i) { + if (gt->info.id != 0) { + gt->uc.guc.g2g.owned = false; + gt->uc.guc.g2g.bo = xe_bo_get(bo); + kunit_info(test, "[%d.%d] Pinned 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo); + } + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo); + } +} + +/* + * Allocate a single blob on the host and split between all G2G CTBs. + */ +static void g2g_alloc_host(struct kunit *test, struct xe_device *xe) +{ + struct xe_bo *bo; + u32 g2g_size; + + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count); + + g2g_size = g2g_ctb_size(test, xe); + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + + g2g_distribute(test, xe, bo); +} + +/* + * Allocate a single blob on the given tile and split between all G2G CTBs. + */ +static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile) +{ + struct xe_bo *bo; + u32 g2g_size; + + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); + + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n", + tile->id, xe->info.tile_count, xe->info.gt_count); + + g2g_size = g2g_ctb_size(test, xe); + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + + g2g_distribute(test, xe, bo); +} + +static void g2g_free(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + struct xe_bo *bo; + int i; + + for_each_gt(gt, xe, i) { + bo = gt->uc.guc.g2g.bo; + if (!bo) + continue; + + if (gt->uc.guc.g2g.owned) { + xe_managed_bo_unpin_map_no_vm(bo); + kunit_info(test, "[%d.%d] Unmapped 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, bo); + } else { + xe_bo_put(bo); + kunit_info(test, "[%d.%d] Unpinned 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, bo); + } + + gt->uc.guc.g2g.bo = NULL; + } +} + +static void g2g_stop(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + int i; + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (!guc->g2g.bo) + continue; + + g2g_ct_stop(guc); + } + + g2g_free(test, xe); +} + +/* + * Generate a unique id for each bi-directional CTB for each pair of + * near and far tiles/devices. The id can then be used as an index into + * a single allocation that is sub-divided into multiple CTBs. + * + * For example, with two devices per tile and two tiles, the table should + * look like: + * Far <tile>.<dev> + * 0.0 0.1 1.0 1.1 + * N 0.0 --/-- 00/01 02/03 04/05 + * e 0.1 01/00 --/-- 06/07 08/09 + * a 1.0 03/02 07/06 --/-- 10/11 + * r 1.1 05/04 09/08 11/10 --/-- + * + * Where each entry is Rx/Tx channel id. + * + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would + * be reading from channel #11 and writing to channel #10. Whereas, + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. + */ +static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, + u32 type, u32 max_inst, bool have_dev) +{ + u32 near = near_tile, far = far_tile; + u32 idx = 0, x, y, direction; + int i; + + if (have_dev) { + near = (near << 1) | near_dev; + far = (far << 1) | far_dev; + } + + /* No need to send to one's self */ + if (far == near) + return -1; + + if (far > near) { + /* Top right table half */ + x = far; + y = near; + + /* T/R is 'forwards' direction */ + direction = type; + } else { + /* Bottom left table half */ + x = near; + y = far; + + /* B/L is 'backwards' direction */ + direction = (1 - type); + } + + /* Count the rows prior to the target */ + for (i = y; i > 0; i--) + idx += max_inst - i; + + /* Count this row up to the target */ + idx += (x - 1 - y); + + /* Slots are in Rx/Tx pairs */ + idx *= 2; + + /* Pick Rx/Tx direction */ + idx += direction; + + return idx; +} + +static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 near_tile = gt_to_tile(gt)->id; + u32 near_dev = G2G_DEV(gt); + u32 max = xe->info.gt_count; + int idx; + u32 base, desc, buf; + + if (!guc->g2g.bo) + return -ENODEV; + + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); + xe_assert(xe, idx >= 0); + + base = guc_bo_ggtt_addr(guc, guc->g2g.bo); + desc = base + idx * G2G_DESC_SIZE; + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; + + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo)); + + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev, + desc, buf, G2G_BUFFER_SIZE); +} + +static void g2g_start(struct kunit *test, struct xe_guc *guc) +{ + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int i; + int t, ret; + bool have_dev; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo); + + /* GuC interface will need extending if more GT device types are ever created. */ + KUNIT_ASSERT_TRUE(test, + (gt->info.type == XE_GT_TYPE_MAIN) || + (gt->info.type == XE_GT_TYPE_MEDIA)); + + /* Channel numbering depends on whether there are multiple GTs per tile */ + have_dev = xe->info.gt_count > xe->info.tile_count; + + for_each_gt(remote_gt, xe, i) { + u32 tile, dev; + + if (remote_gt->info.id == gt->info.id) + continue; + + tile = gt_to_tile(remote_gt)->id; + dev = G2G_DEV(remote_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { + ret = g2g_register_flat(guc, tile, dev, t, have_dev); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret)); + } + } +} + +static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile) +{ + struct xe_gt *gt; + int i, found = 0; + + g2g_stop(test, xe); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + KUNIT_ASSERT_NULL(test, guc->g2g.bo); + } + + switch (ctb_type) { + case G2G_CTB_TYPE_DEFAULT: + g2g_alloc_default(test, xe); + break; + + case G2G_CTB_TYPE_HOST: + g2g_alloc_host(test, xe); + break; + + case G2G_CTB_TYPE_TILE: + g2g_alloc_tile(test, xe, tile); + break; + + default: + KUNIT_ASSERT_TRUE(test, false); + } + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (!guc->g2g.bo) + continue; + + if (ctb_type == G2G_CTB_TYPE_DEFAULT) + guc_g2g_start(guc); + else + g2g_start(test, guc); + found++; + } + + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found); + + kunit_info(test, "Testing across %d GTs\n", found); +} + +static void g2g_recreate_ctb(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + struct kunit *test = kunit_get_current_test(); + + g2g_stop(test, xe); + + if (xe_guc_g2g_wanted(xe)) + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); +} + +static void g2g_pm_runtime_put(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + + xe_pm_runtime_put(xe); +} + +static void g2g_pm_runtime_get(struct kunit *test) +{ + struct xe_device *xe = test->priv; + int ret; + + xe_pm_runtime_get(xe); + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n"); +} + +static void g2g_check_skip(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_gt *gt; + int i; + + if (IS_SRIOV_VF(xe)) + kunit_skip(test, "not supported from a VF"); + + if (xe->info.gt_count <= 1) + kunit_skip(test, "not enough GTs"); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD) + kunit_skip(test, + "G2G test interface not available in production firmware builds\n"); + } +} + +/* + * Simple test that does not try to recreate the CTBs. + * Requires that the platform already enables G2G comms + * but has no risk of leaving the system in a broken state + * afterwards. + */ +static void xe_live_guc_g2g_kunit_default(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + if (!xe_guc_g2g_wanted(xe)) + kunit_skip(test, "G2G not enabled"); + + g2g_check_skip(test); + + g2g_pm_runtime_get(test); + + kunit_info(test, "Testing default CTBs\n"); + g2g_run_test(test, xe); + + kunit_release_action(test, &g2g_pm_runtime_put, xe); +} + +/* + * More complex test that re-creates the CTBs in various location to + * test access to each location from each GuC. Can be run even on + * systems that do not enable G2G by default. On the other hand, + * because it recreates the CTBs, if something goes wrong it could + * leave the system with broken G2G comms. + */ +static void xe_live_guc_g2g_kunit_allmem(struct kunit *test) +{ + struct xe_device *xe = test->priv; + int ret; + + g2g_check_skip(test); + + g2g_pm_runtime_get(test); + + /* Make sure to leave the system as we found it */ + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n"); + + kunit_info(test, "Testing CTB type 'default'...\n"); + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); + g2g_run_test(test, xe); + + kunit_info(test, "Testing CTB type 'host'...\n"); + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL); + g2g_run_test(test, xe); + + if (IS_DGFX(xe)) { + struct xe_tile *tile; + int id; + + for_each_tile(tile, xe, id) { + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id); + + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile); + g2g_run_test(test, xe); + } + } else { + kunit_info(test, "Skipping local memory on integrated platform\n"); + } + + kunit_release_action(test, g2g_recreate_ctb, xe); + kunit_release_action(test, g2g_pm_runtime_put, xe); +} + +static struct kunit_case xe_guc_g2g_tests[] = { + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_guc_g2g_test_suite = { + .name = "xe_guc_g2g", + .test_cases = xe_guc_g2g_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index 81277c77016d..c55e46f1ae92 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite; extern struct kunit_suite xe_dma_buf_test_suite; extern struct kunit_suite xe_migrate_test_suite; extern struct kunit_suite xe_mocs_test_suite; +extern struct kunit_suite xe_guc_g2g_test_suite; kunit_test_suite(xe_bo_test_suite); kunit_test_suite(xe_bo_shrink_test_suite); kunit_test_suite(xe_dma_buf_test_suite); kunit_test_suite(xe_migrate_test_suite); kunit_test_suite(xe_mocs_test_suite); +kunit_test_suite(xe_guc_g2g_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index edd1e701aa1c..5904d658d1f2 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -70,7 +70,7 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, } } while (0) static void test_copy(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test, u32 region) + struct kunit *test, u32 region, struct drm_exec *exec) { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; @@ -84,14 +84,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, + exec); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", str, remote); return; } - err = xe_bo_validate(remote, NULL, false); + err = xe_bo_validate(remote, NULL, false, exec); if (err) { KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n", str, err); @@ -161,13 +162,13 @@ out_unlock: } static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { - test_copy(m, bo, test, XE_BO_FLAG_SYSTEM); + test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec); } static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { u32 region; @@ -178,10 +179,11 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, region = XE_BO_FLAG_VRAM1; else region = XE_BO_FLAG_VRAM0; - test_copy(m, bo, test, region); + test_copy(m, bo, test, region, exec); } -static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) +static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test, + struct drm_exec *exec) { struct xe_tile *tile = m->tile; struct xe_device *xe = tile_to_xe(tile); @@ -202,7 +204,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -210,7 +213,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -220,7 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", PTR_ERR(tiny)); @@ -290,10 +295,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); - test_copy_sysmem(m, tiny, test); + test_copy_sysmem(m, tiny, exec, test); if (xe->info.tile_count > 1) { kunit_info(test, "Copying small buffer object to other vram\n"); - test_copy_vram(m, tiny, test); + test_copy_vram(m, tiny, exec, test); } /* Clear a big bo */ @@ -312,10 +317,10 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); - test_copy_sysmem(m, big, test); + test_copy_sysmem(m, big, exec, test); if (xe->info.tile_count > 1) { kunit_info(test, "Copying big buffer object to other vram\n"); - test_copy_vram(m, big, test); + test_copy_vram(m, big, exec, test); } out: @@ -343,10 +348,11 @@ static int migrate_test_run_device(struct xe_device *xe) for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->q->vm, false); - xe_migrate_sanity_test(m, test); + xe_migrate_sanity_test(m, test, exec); xe_vm_unlock(m->q->vm); } @@ -490,7 +496,7 @@ err_sync: static void test_migrate(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { struct dma_fence *fence; u64 expected, retval; @@ -509,7 +515,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Evict vram buffer object\n"); - ret = xe_bo_evict(vram_bo); + ret = xe_bo_evict(vram_bo, exec); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return; @@ -538,7 +544,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Restore vram buffer object\n"); - ret = xe_bo_validate(vram_bo, NULL, false); + ret = xe_bo_validate(vram_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); return; @@ -636,13 +642,14 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til { struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL; unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec; long ret; - sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + sys_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(sys_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -650,8 +657,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til return; } + exec = XE_VALIDATION_OPT_OUT; xe_bo_lock(sys_bo, false); - ret = xe_bo_validate(sys_bo, NULL, false); + ret = xe_bo_validate(sys_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); goto free_sysbo; @@ -664,10 +672,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -676,7 +684,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_lock(ccs_bo, false); - ret = xe_bo_validate(ccs_bo, NULL, false); + ret = xe_bo_validate(ccs_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); goto free_ccsbo; @@ -689,10 +697,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + vram_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); @@ -700,7 +708,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_lock(vram_bo, false); - ret = xe_bo_validate(vram_bo, NULL, false); + ret = xe_bo_validate(vram_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); goto free_vrambo; @@ -713,7 +721,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } test_clear(xe, tile, sys_bo, vram_bo, test); - test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test); + test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test); xe_bo_unlock(vram_bo); xe_bo_lock(vram_bo, false); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index db30c5156d0c..69e2840c7ef0 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,12 +12,219 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> +#define PLATFORM_CASE(platform__, graphics_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ + media_verx100__, media_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .graphics_verx100 = graphics_verx100__, \ + .media_verx100 = media_verx100__, \ + .step = { .graphics = STEP_ ## graphics_step__, \ + .media = STEP_ ## media_step__ } \ + } + +static const struct xe_pci_fake_data cases[] = { + PLATFORM_CASE(TIGERLAKE, B0), + PLATFORM_CASE(DG1, A0), + PLATFORM_CASE(DG1, B0), + PLATFORM_CASE(ALDERLAKE_S, A0), + PLATFORM_CASE(ALDERLAKE_S, B0), + PLATFORM_CASE(ALDERLAKE_S, C0), + PLATFORM_CASE(ALDERLAKE_S, D0), + PLATFORM_CASE(ALDERLAKE_P, A0), + PLATFORM_CASE(ALDERLAKE_P, B0), + PLATFORM_CASE(ALDERLAKE_P, C0), + SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), + SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), + SUBPLATFORM_CASE(DG2, G10, C0), + SUBPLATFORM_CASE(DG2, G11, B1), + SUBPLATFORM_CASE(DG2, G12, A1), + GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), + GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), + GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), + GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), + GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), +}; + +KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); + +/** + * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_pci_fake_data parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc) +{ + return platform_gen_params(test, prev, desc); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params); + +static const struct xe_device_desc *lookup_desc(enum xe_platform p) +{ + const struct xe_device_desc *desc; + const struct pci_device_id *ids; + + for (ids = pciidlist; ids->driver_data; ids++) { + desc = (const void *)ids->driver_data; + if (desc->platform == p) + return desc; + } + return NULL; +} + +static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s) +{ + const struct xe_device_desc *desc = lookup_desc(p); + const struct xe_subplatform_desc *spd; + + if (desc && desc->subplatforms) + for (spd = desc->subplatforms; spd->subplatform; spd++) + if (spd->subplatform == s) + return spd; + return NULL; +} + +static const char *lookup_platform_name(enum xe_platform p) +{ + const struct xe_device_desc *desc = lookup_desc(p); + + return desc ? desc->platform_name : "INVALID"; +} + +static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) +{ + const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s); + + return desc ? desc->name : "INVALID"; +} + +static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) +{ + return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s); +} + +static const char *subplatform_prefix(enum xe_subplatform s) +{ + return s == XE_SUBPLATFORM_NONE ? "" : " "; +} + +static const char *step_prefix(enum xe_step step) +{ + return step == STEP_NONE ? "" : " "; +} + +static const char *step_name(enum xe_step step) +{ + return step == STEP_NONE ? "" : xe_step_name(step); +} + +static const char *sriov_prefix(enum xe_sriov_mode mode) +{ + return mode <= XE_SRIOV_MODE_NONE ? "" : " "; +} + +static const char *sriov_name(enum xe_sriov_mode mode) +{ + return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode); +} + +static const char *lookup_graphics_name(unsigned int verx100) +{ + const struct xe_ip *ip = find_graphics_ip(verx100); + + return ip ? ip->name : ""; +} + +static const char *lookup_media_name(unsigned int verx100) +{ + const struct xe_ip *ip = find_media_ip(verx100); + + return ip ? ip->name : ""; +} + +/** + * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter + * @param: the &struct xe_pci_fake_data parameter to describe + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares description of the struct xe_pci_fake_data parameter. + * + * It is tailored for use in parameterized KUnit tests where parameter generator + * is based on the struct xe_pci_fake_data arrays. + */ +void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc) +{ + if (param->graphics_verx100 || param->media_verx100) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s", + lookup_platform_name(param->platform), + subplatform_prefix(param->subplatform), + lookup_subplatform_name(param->platform, param->subplatform), + param->graphics_verx100 / 100, param->graphics_verx100 % 100, + lookup_graphics_name(param->graphics_verx100), + step_prefix(param->step.graphics), step_name(param->step.graphics), + param->media_verx100 / 100, param->media_verx100 % 100, + lookup_media_name(param->media_verx100), + step_prefix(param->step.media), step_name(param->step.media), + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); + else + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s", + lookup_platform_name(param->platform), + subplatform_prefix(param->subplatform), + lookup_subplatform_name(param->platform, param->subplatform), + step_prefix(param->step.graphics), step_name(param->step.graphics), + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc); + static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) { snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", param->verx100 / 100, param->verx100 % 100, param->name); } +/* + * Pre-GMDID Graphics and Media IPs definitions. + * + * Mimic the way GMDID IPs are declared so the same + * param generator can be used for both + */ +static const struct xe_ip pre_gmdid_graphics_ips[] = { + { 1200, "Xe_LP", &graphics_xelp }, + { 1210, "Xe_LP+", &graphics_xelp }, + { 1255, "Xe_HPG", &graphics_xehpg }, + { 1260, "Xe_HPC", &graphics_xehpc }, +}; + +static const struct xe_ip pre_gmdid_media_ips[] = { + { 1200, "Xe_M", &media_xem }, + { 1255, "Xe_HPM", &media_xem }, +}; + +KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc); +KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc); + KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); @@ -44,9 +251,16 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); * * Return: pointer to the next parameter or NULL if no more parameters */ -const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc) +const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc) { - return graphics_ip_gen_params(prev, desc); + const void *next = pre_gmdid_graphics_ip_gen_params(test, prev, desc); + + if (next) + return next; + if (is_insidevar(prev, pre_gmdid_graphics_ips)) + prev = NULL; + + return graphics_ip_gen_params(test, prev, desc); } EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); @@ -61,9 +275,16 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); * * Return: pointer to the next parameter or NULL if no more parameters */ -const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) +const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc) { - return media_ip_gen_params(prev, desc); + const void *next = pre_gmdid_media_ip_gen_params(test, prev, desc); + + if (next) + return next; + if (is_insidevar(prev, pre_gmdid_media_ips)) + prev = NULL; + + return media_ip_gen_params(test, prev, desc); } EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); @@ -78,9 +299,9 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); * * Return: pointer to the next parameter or NULL if no more parameters */ -const void *xe_pci_id_gen_param(const void *prev, char *desc) +const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc) { - const struct pci_device_id *pci = pci_id_gen_params(prev, desc); + const struct pci_device_id *pci = pci_id_gen_params(test, prev, desc); return pci->driver_data ? pci : NULL; } @@ -94,10 +315,10 @@ static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, if (type == GMDID_MEDIA) { *ver = data->media_verx100; - *revid = xe_step_to_gmdid(data->media_step); + *revid = xe_step_to_gmdid(data->step.media); } else { *ver = data->graphics_verx100; - *revid = xe_step_to_gmdid(data->graphics_step); + *revid = xe_step_to_gmdid(data->step.graphics); } } @@ -166,7 +387,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); * Return: pointer to the next &struct xe_device ready to be used as a parameter * or NULL if there are no more Xe devices on the system. */ -const void *xe_pci_live_device_gen_param(const void *prev, char *desc) +const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc) { const struct xe_device *xe = prev; struct device *dev = xe ? xe->drm.dev : NULL; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index ce4d2b86b778..30505d1cbefc 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -7,9 +7,11 @@ #define _XE_PCI_TEST_H_ #include <linux/types.h> +#include <kunit/test.h> #include "xe_platform_types.h" #include "xe_sriov_types.h" +#include "xe_step_types.h" struct xe_device; @@ -17,17 +19,18 @@ struct xe_pci_fake_data { enum xe_sriov_mode sriov_mode; enum xe_platform platform; enum xe_subplatform subplatform; + struct xe_step_info step; u32 graphics_verx100; u32 media_verx100; - u32 graphics_step; - u32 media_step; }; int xe_pci_fake_device_init(struct xe_device *xe); +const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc); +void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc); -const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); -const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); -const void *xe_pci_id_gen_param(const void *prev, char *desc); -const void *xe_pci_live_device_gen_param(const void *prev, char *desc); +const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 416258c193f6..49d191043dfa 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -15,87 +15,10 @@ #include "xe_tuning.h" #include "xe_wa.h" -struct platform_test_case { - const char *name; - enum xe_platform platform; - enum xe_subplatform subplatform; - u32 graphics_verx100; - u32 media_verx100; - struct xe_step_info step; -}; - -#define PLATFORM_CASE(platform__, graphics_step__) \ - { \ - .name = #platform__ " (" #graphics_step__ ")", \ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_NONE, \ - .step = { .graphics = STEP_ ## graphics_step__ } \ - } - - -#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ - { \ - .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ - .step = { .graphics = STEP_ ## graphics_step__ } \ - } - -#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ - media_verx100__, media_step__) \ - { \ - .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_NONE, \ - .graphics_verx100 = graphics_verx100__, \ - .media_verx100 = media_verx100__, \ - .step = { .graphics = STEP_ ## graphics_step__, \ - .media = STEP_ ## media_step__ } \ - } - -static const struct platform_test_case cases[] = { - PLATFORM_CASE(TIGERLAKE, B0), - PLATFORM_CASE(DG1, A0), - PLATFORM_CASE(DG1, B0), - PLATFORM_CASE(ALDERLAKE_S, A0), - PLATFORM_CASE(ALDERLAKE_S, B0), - PLATFORM_CASE(ALDERLAKE_S, C0), - PLATFORM_CASE(ALDERLAKE_S, D0), - PLATFORM_CASE(ALDERLAKE_P, A0), - PLATFORM_CASE(ALDERLAKE_P, B0), - PLATFORM_CASE(ALDERLAKE_P, C0), - SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), - SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), - SUBPLATFORM_CASE(DG2, G10, C0), - SUBPLATFORM_CASE(DG2, G11, B1), - SUBPLATFORM_CASE(DG2, G12, A1), - GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), - GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), - GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), - GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), - GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), - GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), - GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), -}; - -static void platform_desc(const struct platform_test_case *t, char *desc) -{ - strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); -} - -KUNIT_ARRAY_PARAM(platform, cases, platform_desc); - static int xe_wa_test_init(struct kunit *test) { - const struct platform_test_case *param = test->param_value; - struct xe_pci_fake_data data = { - .platform = param->platform, - .subplatform = param->subplatform, - .graphics_verx100 = param->graphics_verx100, - .media_verx100 = param->media_verx100, - .graphics_step = param->step.graphics, - .media_step = param->step.media, - }; + const struct xe_pci_fake_data *param = test->param_value; + struct xe_pci_fake_data data = *param; struct xe_device *xe; struct device *dev; int ret; @@ -120,13 +43,6 @@ static int xe_wa_test_init(struct kunit *test) return 0; } -static void xe_wa_test_exit(struct kunit *test) -{ - struct xe_device *xe = test->priv; - - drm_kunit_helper_free_device(test, xe->drm.dev); -} - static void xe_wa_gt(struct kunit *test) { struct xe_device *xe = test->priv; @@ -144,14 +60,13 @@ static void xe_wa_gt(struct kunit *test) } static struct kunit_case xe_wa_tests[] = { - KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params), + KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params), {} }; static struct kunit_suite xe_rtp_test_suite = { .name = "xe_wa", .init = xe_wa_test_init, - .exit = xe_wa_test_exit, .test_cases = xe_wa_tests, }; diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index feb6e013dc38..6d20229c11de 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -64,7 +64,7 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, enum xe_sriov_vf_ccs_rw_ctxs ctx_id) { struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); - struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); struct xe_sa_manager *bb_pool; int err; @@ -78,7 +78,7 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, * So, this extra DW acts as a guard here. */ - bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool; + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); if (IS_ERR(bb->bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 81bae7a59038..fbe81cda15c2 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -975,11 +975,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, * CCS meta data is migrated from TT -> SMEM. So, let us detach the * BBs from BO as it is no longer needed. */ - if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT && + if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT && new_mem->mem_type == XE_PL_SYSTEM) xe_sriov_vf_ccs_detach_bo(bo); - if (IS_SRIOV_VF(xe) && + if (IS_VF_CCS_READY(xe) && ((move_lacks_source && new_mem->mem_type == XE_PL_TT) || (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) && handle_system_ccs) @@ -995,7 +995,7 @@ out: if (timeout < 0) ret = timeout; - if (IS_VF_CCS_BB_VALID(xe, bo)) + if (IS_VF_CCS_READY(xe)) xe_sriov_vf_ccs_detach_bo(bo); xe_tt_unmap_sg(xe, ttm_bo->ttm); @@ -1142,42 +1142,47 @@ out_unref: int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_bo *backup; int ret = 0; - xe_bo_lock(bo, false); + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + xe_assert(xe, !ret); + xe_assert(xe, !bo->backup_obj); - xe_assert(xe, !bo->backup_obj); + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + break; - /* - * Since this is called from the PM notifier we might have raced with - * someone unpinning this after we dropped the pinned list lock and - * grabbing the above bo lock. - */ - if (!xe_bo_is_pinned(bo)) - goto out_unlock_bo; + if (!xe_bo_is_vram(bo)) + break; - if (!xe_bo_is_vram(bo)) - goto out_unlock_bo; + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + break; - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) - goto out_unlock_bo; + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED, &exec); + if (IS_ERR(backup)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(backup); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ - ttm_bo_pin(&backup->ttm); - bo->backup_obj = backup; - -out_unlock_bo: - xe_bo_unlock(bo); return ret; } @@ -1203,57 +1208,12 @@ int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) return 0; } -/** - * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory - * @bo: The buffer object to move. - * - * On successful completion, the object memory will be moved to system memory. - * - * This is needed to for special handling of pinned VRAM object during - * suspend-resume. - * - * Return: 0 on success. Negative error code on failure. - */ -int xe_bo_evict_pinned(struct xe_bo *bo) +static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup) { - struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_bo *backup = bo->backup_obj; - bool backup_created = false; + struct xe_device *xe = xe_bo_device(bo); bool unmap = false; int ret = 0; - xe_bo_lock(bo, false); - - if (WARN_ON(!bo->ttm.resource)) { - ret = -EINVAL; - goto out_unlock_bo; - } - - if (WARN_ON(!xe_bo_is_pinned(bo))) { - ret = -EINVAL; - goto out_unlock_bo; - } - - if (!xe_bo_is_vram(bo)) - goto out_unlock_bo; - - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) - goto out_unlock_bo; - - if (!backup) { - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, - NULL, xe_bo_size(bo), - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; - } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ - backup_created = true; - } - if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { struct xe_migrate *migrate; struct dma_fence *fence; @@ -1263,14 +1223,11 @@ int xe_bo_evict_pinned(struct xe_bo *bo) else migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); + xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv); ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); if (ret) goto out_backup; - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); - if (ret) - goto out_backup; - fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, backup->ttm.resource, false); if (IS_ERR(fence)) { @@ -1280,8 +1237,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo) dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_KERNEL); - dma_resv_add_fence(backup->ttm.base.resv, fence, - DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } else { ret = xe_bo_vmap(backup); @@ -1291,7 +1246,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (iosys_map_is_null(&bo->vmap)) { ret = xe_bo_vmap(bo); if (ret) - goto out_backup; + goto out_vunmap; unmap = true; } @@ -1301,15 +1256,78 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (!bo->backup_obj) bo->backup_obj = backup; - -out_backup: +out_vunmap: xe_bo_vunmap(backup); - if (ret && backup_created) - xe_bo_put(backup); -out_unlock_bo: +out_backup: if (unmap) xe_bo_vunmap(bo); - xe_bo_unlock(bo); + + return ret; +} + +/** + * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved to system memory. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; + int ret = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + xe_assert(xe, !ret); + + if (WARN_ON(!bo->ttm.resource)) { + ret = -EINVAL; + break; + } + + if (WARN_ON(!xe_bo_is_pinned(bo))) { + ret = -EINVAL; + break; + } + + if (!xe_bo_is_vram(bo)) + break; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + break; + + if (!backup) { + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, + xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED, &exec); + if (IS_ERR(backup)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(backup); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; + } + + ret = xe_bo_evict_pinned_copy(bo, backup); + } + + if (ret && backup_created) + xe_bo_put(backup); + return ret; } @@ -1359,10 +1377,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (ret) goto out_unlock_bo; - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); - if (ret) - goto out_unlock_bo; - fence = xe_migrate_copy(migrate, backup, bo, backup->ttm.resource, bo->ttm.resource, false); @@ -1373,8 +1387,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_KERNEL); - dma_resv_add_fence(backup->ttm.base.resv, fence, - DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } else { ret = xe_bo_vmap(backup); @@ -1530,7 +1542,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) if (!xe_bo_is_xe_bo(ttm_bo)) return; - if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo)) + if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev))) xe_sriov_vf_ccs_detach_bo(bo); /* @@ -1726,65 +1738,246 @@ static bool should_migrate_to_smem(struct xe_bo *bo) bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; } -static vm_fault_t xe_gem_fault(struct vm_fault *vmf) +static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx) +{ + long lerr; + + if (ctx->no_wait_gpu) + return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ? + 0 : -EBUSY; + + lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + ctx->interruptible, MAX_SCHEDULE_TIMEOUT); + if (lerr < 0) + return lerr; + if (lerr == 0) + return -EBUSY; + + return 0; +} + +/* Populate the bo if swapped out, or migrate if the access mode requires that. */ +static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, + struct drm_exec *exec) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + int err = 0; + + if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { + err = xe_bo_wait_usage_kernel(bo, ctx); + if (!err) + err = ttm_bo_populate(&bo->ttm, ctx); + } else if (should_migrate_to_smem(bo)) { + xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); + err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); + } + + return err; +} + +/* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */ +static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo) +{ + vm_fault_t ret; + + trace_xe_bo_cpu_fault(bo); + + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + /* + * When TTM is actually called to insert PTEs, ensure no blocking conditions + * remain, in which case TTM may drop locks and return VM_FAULT_RETRY. + */ + xe_assert(xe, ret != VM_FAULT_RETRY); + + if (ret == VM_FAULT_NOPAGE && + mem_type_is_vram(bo->ttm.resource->mem_type)) { + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (list_empty(&bo->vram_userfault_link)) + list_add(&bo->vram_userfault_link, + &xe->mem_access.vram_userfault.list); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + } + + return ret; +} + +static vm_fault_t xe_err_to_fault_t(int err) +{ + switch (err) { + case 0: + case -EINTR: + case -ERESTARTSYS: + case -EAGAIN: + return VM_FAULT_NOPAGE; + case -ENOMEM: + case -ENOSPC: + return VM_FAULT_OOM; + default: + break; + } + return VM_FAULT_SIGBUS; +} + +static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo) +{ + dma_resv_assert_held(tbo->base.resv); + + return tbo->ttm && + (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) == + TTM_TT_FLAG_EXTERNAL; +} + +static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe, + struct xe_bo *bo, bool needs_rpm) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + vm_fault_t ret = VM_FAULT_RETRY; + struct xe_validation_ctx ctx; + struct ttm_operation_ctx tctx = { + .interruptible = true, + .no_wait_gpu = true, + .gfp_retry_mayfail = true, + + }; + int err; + + if (needs_rpm && !xe_pm_runtime_get_if_active(xe)) + return VM_FAULT_RETRY; + + err = xe_validation_ctx_init(&ctx, &xe->val, NULL, + (struct xe_val_flags) { + .interruptible = true, + .no_block = true + }); + if (err) + goto out_pm; + + if (!dma_resv_trylock(tbo->base.resv)) + goto out_validation; + + if (xe_ttm_bo_is_imported(tbo)) { + ret = VM_FAULT_SIGBUS; + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); + goto out_unlock; + } + + err = xe_bo_fault_migrate(bo, &tctx, NULL); + if (err) { + /* Return VM_FAULT_RETRY on these errors. */ + if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY) + ret = xe_err_to_fault_t(err); + goto out_unlock; + } + + if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) + ret = __xe_bo_cpu_fault(vmf, xe, bo); + +out_unlock: + dma_resv_unlock(tbo->base.resv); +out_validation: + xe_validation_ctx_fini(&ctx); +out_pm: + if (needs_rpm) + xe_pm_runtime_put(xe); + + return ret; +} + +static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; struct drm_device *ddev = tbo->base.dev; struct xe_device *xe = to_xe_device(ddev); struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; + bool retry_after_wait = false; + struct xe_validation_ctx ctx; + struct drm_exec exec; vm_fault_t ret; - int idx, r = 0; + int err = 0; + int idx; + + if (!drm_dev_enter(&xe->drm, &idx)) + return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + + ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm); + if (ret != VM_FAULT_RETRY) + goto out; + + if (fault_flag_allow_retry_first(vmf->flags)) { + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) + goto out; + retry_after_wait = true; + xe_bo_get(bo); + mmap_read_unlock(vmf->vma->vm_mm); + } else { + ret = VM_FAULT_NOPAGE; + } + + /* + * The fastpath failed and we were not required to return and retry immediately. + * We're now running in one of two modes: + * + * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying + * to resolve blocking waits. But we can't resolve the fault since the + * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath + * should succeed. But it may fail since we drop the bo lock. + * + * 2) retry_after_wait == false: The fastpath failed, typically even after + * a retry. Do whatever's necessary to resolve the fault. + * + * This construct is recommended to avoid excessive waits under the mmap_lock. + */ if (needs_rpm) xe_pm_runtime_get(xe); - ret = ttm_bo_vm_reserve(tbo, vmf); - if (ret) - goto out; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + struct ttm_operation_ctx tctx = { + .interruptible = true, + .no_wait_gpu = false, + .gfp_retry_mayfail = retry_after_wait, + }; - if (drm_dev_enter(ddev, &idx)) { - trace_xe_bo_cpu_fault(bo); + err = drm_exec_lock_obj(&exec, &tbo->base); + drm_exec_retry_on_contention(&exec); + if (err) + break; - if (should_migrate_to_smem(bo)) { - xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM); + if (xe_ttm_bo_is_imported(tbo)) { + err = -EFAULT; + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); + break; + } - r = xe_bo_migrate(bo, XE_PL_TT); - if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR) - ret = VM_FAULT_NOPAGE; - else if (r) - ret = VM_FAULT_SIGBUS; + err = xe_bo_fault_migrate(bo, &tctx, &exec); + if (err) { + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; } - if (!ret) - ret = ttm_bo_vm_fault_reserved(vmf, - vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); - drm_dev_exit(idx); - if (ret == VM_FAULT_RETRY && - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - goto out; + err = xe_bo_wait_usage_kernel(bo, &tctx); + if (err) + break; - /* - * ttm_bo_vm_reserve() already has dma_resv_lock. - */ - if (ret == VM_FAULT_NOPAGE && - mem_type_is_vram(tbo->resource->mem_type)) { - mutex_lock(&xe->mem_access.vram_userfault.lock); - if (list_empty(&bo->vram_userfault_link)) - list_add(&bo->vram_userfault_link, - &xe->mem_access.vram_userfault.list); - mutex_unlock(&xe->mem_access.vram_userfault.lock); - } - } else { - ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + if (!retry_after_wait) + ret = __xe_bo_cpu_fault(vmf, xe, bo); } + /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */ + if (err && !retry_after_wait) + ret = xe_err_to_fault_t(err); - dma_resv_unlock(tbo->base.resv); -out: if (needs_rpm) xe_pm_runtime_put(xe); + if (retry_after_wait) + xe_bo_put(bo); +out: + drm_dev_exit(idx); + return ret; } @@ -1828,7 +2021,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size) } static const struct vm_operations_struct xe_gem_vm_ops = { - .fault = xe_gem_fault, + .fault = xe_bo_cpu_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, .access = xe_bo_vm_access, @@ -1876,11 +2069,32 @@ void xe_bo_free(struct xe_bo *bo) kfree(bo); } -struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - u16 cpu_caching, enum ttm_bo_type type, - u32 flags) +/** + * xe_bo_init_locked() - Initialize or create an xe_bo. + * @xe: The xe device. + * @bo: An already allocated buffer object or NULL + * if the function should allocate a new one. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @resv: Pointer to a locked shared reservation object to use fo this bo, + * or NULL for the xe_bo to use its own. + * @bulk: The bulk move to use for LRU bumping, or NULL for external bos. + * @size: The storage size to use for the bo. + * @cpu_caching: The cpu caching used for system memory backing store. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Initialize or create an xe buffer object. On failure, any allocated buffer + * object passed in @bo will have been unreferenced. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags, struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -1949,6 +2163,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, ctx.resv = resv; } + xe_validation_assert_exec(xe, exec, &bo->ttm.base); if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { err = __xe_bo_placement_for_flags(xe, bo, bo->flags); if (WARN_ON(err)) { @@ -2050,7 +2265,7 @@ __xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, u16 cpu_caching, enum ttm_bo_type type, u32 flags, - u64 alignment) + u64 alignment, struct drm_exec *exec) { struct xe_bo *bo = NULL; int err; @@ -2071,11 +2286,11 @@ __xe_bo_create_locked(struct xe_device *xe, } } - bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, - vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_FLAG_USER ? - &vm->lru_bulk_move : NULL, size, - cpu_caching, type, flags); + bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_FLAG_USER ? + &vm->lru_bulk_move : NULL, size, + cpu_caching, type, flags, exec); if (IS_ERR(bo)) return bo; @@ -2109,9 +2324,10 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + xe_bo_size(bo), U64_MAX); + start + xe_bo_size(bo), U64_MAX, + exec); } else { - err = xe_ggtt_insert_bo(t->mem.ggtt, bo); + err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec); } if (err) goto err_unlock_put_bo; @@ -2128,82 +2344,166 @@ err_unlock_put_bo: return ERR_PTR(err); } -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags, u64 alignment) -{ - return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, - flags, alignment); -} - +/** + * xe_bo_create_locked() - Create a BO + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @vm: The local vm or NULL for external objects. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Create a locked xe BO with no range- nor alignment restrictions. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec) { return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, - flags, 0); + flags, 0, exec); } -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - u16 cpu_caching, - u32 flags) +static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u16 cpu_caching, + enum ttm_bo_type type, u32 flags, + u64 alignment, bool intr) { - struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, ttm_bo_type_device, - flags | XE_BO_FLAG_USER, 0); - if (!IS_ERR(bo)) - xe_bo_unlock_vm_held(bo); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int ret = 0; - return bo; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, + ret) { + bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL, + cpu_caching, type, flags, alignment, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + } else { + xe_bo_unlock(bo); + } + } + + return ret ? ERR_PTR(ret) : bo; } -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_user() - Create a user BO + * @xe: The xe device. + * @vm: The local vm or NULL for external objects. + * @size: The storage size to use for the bo. + * @cpu_caching: The caching mode to be used for system backing store. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL + * if such a transaction should be initiated by the call. + * + * Create a bo on behalf of user-space. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_create_user(struct xe_device *xe, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + u32 flags, struct drm_exec *exec) { - struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags); + struct xe_bo *bo; - if (!IS_ERR(bo)) - xe_bo_unlock_vm_held(bo); + flags |= XE_BO_FLAG_USER; + + if (vm || exec) { + xe_assert(xe, exec); + bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL, + cpu_caching, ttm_bo_type_device, + flags, 0, exec); + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + } else { + bo = xe_bo_create_novm(xe, NULL, size, cpu_caching, + ttm_bo_type_device, flags, 0, true); + } return bo; } -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_pin_range_novm() - Create and pin a BO with range options. + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @start: Start of fixed VRAM range or 0. + * @end: End of fixed VRAM range or ~0ULL. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * + * Create an Xe BO with range- and options. If @start and @end indicate + * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement + * only. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) { - return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset, - type, flags, 0); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int err = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end, + 0, type, flags, 0, &exec); + if (IS_ERR(bo)) { + drm_exec_retry_on_contention(&exec); + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + + err = xe_bo_pin(bo, &exec); + xe_bo_unlock(bo); + if (err) { + xe_bo_put(bo); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } + + return err ? ERR_PTR(err) : bo; } -struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, - struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags, - u64 alignment) +static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment, struct drm_exec *exec) { struct xe_bo *bo; int err; u64 start = offset == ~0ull ? 0 : offset; - u64 end = offset == ~0ull ? offset : start + size; + u64 end = offset == ~0ull ? ~0ull : start + size; if (flags & XE_BO_FLAG_STOLEN && xe_ttm_stolen_cpu_access_needs_ggtt(xe)) flags |= XE_BO_FLAG_GGTT; - bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, - alignment); + bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, + alignment, exec); if (IS_ERR(bo)) return bo; - err = xe_bo_pin(bo); + err = xe_bo_pin(bo, exec); if (err) goto err_put; @@ -2223,11 +2523,100 @@ err_put: return ERR_PTR(err); } +/** + * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @offset: Optional VRAM offset or %~0ull for don't care. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @alignment: GGTT alignment. + * @intr: Whether to execute any waits for backing store interruptible. + * + * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment + * options. The bo will be external and not associated with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set + * to true on entry. + */ +struct xe_bo * +xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 offset, enum ttm_bo_type type, u32 flags, + u64 alignment, bool intr) +{ + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int ret = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, + ret) { + bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset, + type, flags, alignment, &exec); + if (IS_ERR(bo)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + } + } + + return ret ? ERR_PTR(ret) : bo; +} + +/** + * xe_bo_create_pin_map() - Create pinned and mapped bo + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * @vm: The vm to associate the buffer object with. The vm's resv must be locked + * with the transaction represented by @exec. + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction, and + * previously used for locking @vm's resv. + * + * Create a pinned and mapped bo. The bo will be external and not associated + * with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @exec was + * configured for interruptible locking. + */ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec) { - return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); + return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags, + 0, exec); +} + +/** + * xe_bo_create_pin_map_novm() - Create pinned and mapped bo + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @intr: Whether to execut any waits for backing store interruptible. + * + * Create a pinned and mapped bo. The bo will be external and not associated + * with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set + * to true on entry. + */ +struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, enum ttm_bo_type type, u32 flags, + bool intr) +{ + return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr); } static void __xe_bo_unpin_map_no_vm(void *arg) @@ -2242,8 +2631,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile int ret; KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags); - - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); + bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true); if (IS_ERR(bo)) return bo; @@ -2254,6 +2642,11 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile return bo; } +void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo) +{ + devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo); +} + struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags) { @@ -2326,6 +2719,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) * xe_bo_pin_external - pin an external BO * @bo: buffer object to be pinned * @in_place: Pin in current placement, don't attempt to migrate. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) * BO. Unique call compared to xe_bo_pin as this function has it own set of @@ -2333,7 +2727,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) * * Returns 0 for success, negative error code otherwise. */ -int xe_bo_pin_external(struct xe_bo *bo, bool in_place) +int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec) { struct xe_device *xe = xe_bo_device(bo); int err; @@ -2343,7 +2737,7 @@ int xe_bo_pin_external(struct xe_bo *bo, bool in_place) if (!xe_bo_is_pinned(bo)) { if (!in_place) { - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; } @@ -2366,7 +2760,17 @@ int xe_bo_pin_external(struct xe_bo *bo, bool in_place) return 0; } -int xe_bo_pin(struct xe_bo *bo) +/** + * xe_bo_pin() - Pin a kernel bo after potentially migrating it + * @bo: The kernel bo to pin. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Attempts to migrate a bo to @bo->placement. If that succeeds, + * pins the bo. + * + * Return: %0 on success, negative error code on migration failure. + */ +int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec) { struct ttm_place *place = &bo->placements[0]; struct xe_device *xe = xe_bo_device(bo); @@ -2388,7 +2792,7 @@ int xe_bo_pin(struct xe_bo *bo) /* We only expect at most 1 pin */ xe_assert(xe, !xe_bo_is_pinned(bo)); - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; @@ -2481,6 +2885,7 @@ void xe_bo_unpin(struct xe_bo *bo) * NULL. Used together with @allow_res_evict. * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's * reservation object. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Make sure the bo is in allowed placement, migrating it if necessary. If * needed, other bos will be evicted. If bos selected for eviction shares @@ -2490,7 +2895,8 @@ void xe_bo_unpin(struct xe_bo *bo) * Return: 0 on success, negative error code on failure. May return * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. */ -int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, + struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -2512,6 +2918,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); xe_vm_clear_validating(vm, allow_res_evict); @@ -2707,8 +3114,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_gem_create *args = data; + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_vm *vm = NULL; - ktime_t end = 0; struct xe_bo *bo; unsigned int bo_flags; u32 handle; @@ -2782,25 +3190,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } -retry: - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto out_vm; + err = 0; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + if (vm) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + if (err) + break; + } + bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching, + bo_flags, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } } - - bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - bo_flags); - - if (vm) - xe_vm_unlock(vm); - - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry; + if (err) goto out_vm; - } if (args->extensions) { err = gem_create_user_extensions(xe, bo, args->extensions, 0); @@ -2949,6 +3358,9 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) * xe_bo_migrate - Migrate an object to the desired region id * @bo: The buffer object to migrate. * @mem_type: The TTM region type to migrate to. + * @tctx: A pointer to a struct ttm_operation_ctx or NULL if + * a default interruptibe ctx is to be used. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Attempt to migrate the buffer object to the desired memory region. The * buffer object may not be pinned, and must be locked. @@ -2960,7 +3372,8 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) * Return: 0 on success. Negative error code on failure. In particular may * return -EINTR or -ERESTARTSYS if signal pending. */ -int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx, + struct drm_exec *exec) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct ttm_operation_ctx ctx = { @@ -2972,6 +3385,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) struct ttm_place requested; xe_bo_assert_held(bo); + tctx = tctx ? tctx : &ctx; if (bo->ttm.resource->mem_type == mem_type) return 0; @@ -2998,19 +3412,22 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) add_vram(xe, bo, &requested, bo->flags, mem_type, &c); } - return ttm_bo_validate(&bo->ttm, &placement, &ctx); + if (!tctx->no_wait_gpu) + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); + return ttm_bo_validate(&bo->ttm, &placement, tctx); } /** * xe_bo_evict - Evict an object to evict placement * @bo: The buffer object to migrate. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * On successful completion, the object memory will be moved to evict * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ -int xe_bo_evict(struct xe_bo *bo) +int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = false, @@ -3169,11 +3586,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv, if (err) return err; - bo = xe_bo_create_user(xe, NULL, NULL, args->size, + bo = xe_bo_create_user(xe, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index cfb1ec266a6d..a77af42b5f9e 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -10,6 +10,7 @@ #include "xe_bo_types.h" #include "xe_macros.h" +#include "xe_validation.h" #include "xe_vm_types.h" #include "xe_vm.h" #include "xe_vram_types.h" @@ -88,40 +89,34 @@ struct sg_table; struct xe_bo *xe_bo_alloc(void); void xe_bo_free(struct xe_bo *bo); -struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - u16 cpu_caching, enum ttm_bo_type type, - u32 flags); -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags, u64 alignment); +struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags, struct drm_exec *exec); struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - u16 cpu_caching, - u32 flags); + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec); +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size, + u16 cpu_caching, u32 flags, struct drm_exec *exec); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, u64 offset, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, - struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags, - u64 alignment); + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec); +struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, enum ttm_bo_type type, u32 flags, + bool intr); +struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags); +struct xe_bo * +xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 offset, enum ttm_bo_type type, + u32 flags, u64 alignment, bool intr); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); +void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags); int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); @@ -200,11 +195,12 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) } } -int xe_bo_pin_external(struct xe_bo *bo, bool in_place); -int xe_bo_pin(struct xe_bo *bo); +int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec); +int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec); void xe_bo_unpin_external(struct xe_bo *bo); void xe_bo_unpin(struct xe_bo *bo); -int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, + struct drm_exec *exec); static inline bool xe_bo_is_pinned(struct xe_bo *bo) { @@ -285,8 +281,9 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_evict(struct xe_bo *bo); +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc, + struct drm_exec *exec); +int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec); int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); @@ -315,6 +312,21 @@ static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) return PAGE_ALIGN(xe_bo_size(bo)); } +/** + * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO. + * @bo: the &xe_bo to check + * + * The CCS's BBs should only be setup by the driver VF, but it is safe + * to call this function also by non-VF driver. + * + * Return: true iff the CCS's BBs are setup, false otherwise. + */ +static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo) +{ + return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && + bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; +} + static inline bool xe_bo_has_pages(struct xe_bo *bo) { if ((bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) || diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7484ce55a303..d5dbc51e8612 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -158,8 +158,8 @@ int xe_bo_evict_all(struct xe_device *xe) if (ret) return ret; - ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, - &xe->pinned.late.evicted, xe_bo_evict_pinned); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, xe_bo_evict_pinned); if (!ret) ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 314652afdca7..d4fe3c8dca5b 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -25,7 +25,9 @@ struct xe_vm; /* TODO: To be selected with VM_MADVISE */ #define XE_BO_PRIORITY_NORMAL 1 -/** @xe_bo: XE buffer object */ +/** + * struct xe_bo - Xe buffer object + */ struct xe_bo { /** @ttm: TTM base buffer object */ struct ttm_buffer_object ttm; @@ -47,7 +49,7 @@ struct xe_bo { struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; - /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ + /** @kmap: TTM bo kmap object for internal use only. Keep off. */ struct ttm_bo_kmap_obj kmap; /** @pinned_link: link to present / evicted list of pinned BO */ struct list_head pinned_link; @@ -82,10 +84,10 @@ struct xe_bo { /** @created: Whether the bo has passed initial creation */ bool created; - /** @ccs_cleared */ + /** @ccs_cleared: true means that CCS region of BO is already cleared */ bool ccs_cleared; - /** @bb_ccs_rw: BB instructions of CCS read/write. Valid only for VF */ + /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; /** @@ -99,9 +101,10 @@ struct xe_bo { struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ - struct list_head vram_userfault_link; + struct list_head vram_userfault_link; - /** @min_align: minimum alignment needed for this BO if different + /** + * @min_align: minimum alignment needed for this BO if different * from default */ u64 min_align; diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 1025d3979b06..139663423185 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -4,6 +4,7 @@ */ #include <linux/bitops.h> +#include <linux/ctype.h> #include <linux/configfs.h> #include <linux/cleanup.h> #include <linux/find.h> @@ -12,6 +13,7 @@ #include <linux/pci.h> #include <linux/string.h> +#include "instructions/xe_mi_commands.h" #include "xe_configfs.h" #include "xe_hw_engine_types.h" #include "xe_module.h" @@ -21,7 +23,7 @@ * DOC: Xe Configfs * * Overview - * ========= + * ======== * * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a * configfs subsystem called ``xe`` that creates a directory in the mounted @@ -34,7 +36,7 @@ * * To create a device, the ``xe`` module should already be loaded, but some * attributes can only be set before binding the device. It can be accomplished - * by blocking the driver autoprobe: + * by blocking the driver autoprobe:: * * # echo 0 > /sys/bus/pci/drivers_autoprobe * # modprobe xe @@ -115,6 +117,58 @@ * * This attribute can only be set before binding to the device. * + * Context restore BB + * ------------------ + * + * Allow to execute a batch buffer during any context switches. When the + * GPU is restoring the context, it executes additional commands. It's useful + * for testing additional workarounds and validating certain HW behaviors: it's + * not intended for normal execution and will taint the kernel with TAINT_TEST + * when used. + * + * The syntax allows to pass straight instructions to be executed by the engine + * in a batch buffer or set specific registers. + * + * #. Generic instruction:: + * + * <engine-class> cmd <instr> [[dword0] [dword1] [...]] + * + * #. Simple register setting:: + * + * <engine-class> reg <address> <value> + * + * Commands are saved per engine class: all instances of that class will execute + * those commands during context switch. The instruction, dword arguments, + * addresses and values are in hex format like in the examples below. + * + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the + * normal context restore:: + * + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb + * + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the + * beginning of the context restore:: + * + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb + + * #. Load certain values in a couple of registers (it can be used as a simpler + * alternative to the `cmd`) action:: + * + * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF + * rcs reg 4F100 DEADBEEF + * rcs reg 4F104 FFFFFFFF + * EOF + * + * .. note:: + * + * When using multiple lines, make sure to use a command that is + * implemented with a single write syscall, like HEREDOC. + * + * Currently this is implemented only for post and mid context restore and + * these attributes can only be set before binding to the device. + * * Remove devices * ============== * @@ -123,17 +177,27 @@ * # rmdir /sys/kernel/config/xe/0000:03:00.0/ */ +/* Similar to struct xe_bb, but not tied to HW (yet) */ +struct wa_bb { + u32 *cs; + u32 len; /* in dwords */ +}; + struct xe_config_group_device { struct config_group group; struct xe_config_device { u64 engines_allowed; + struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX]; + struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX]; bool survivability_mode; bool enable_psmi; } config; /* protects attributes */ struct mutex lock; + /* matching descriptor */ + const struct xe_device_desc *desc; }; static const struct xe_config_device device_defaults = { @@ -150,6 +214,7 @@ static void set_device_defaults(struct xe_config_device *config) struct engine_info { const char *cls; u64 mask; + enum xe_engine_class engine_class; }; /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ @@ -157,12 +222,12 @@ struct engine_info { #define MAX_ENGINE_INSTANCE_CHARS 2 static const struct engine_info engine_info[] = { - { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK }, - { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK }, - { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK }, - { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK }, - { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK }, - { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY }, + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE }, + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE }, + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE }, + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER }, }; static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item) @@ -251,7 +316,18 @@ static ssize_t engines_allowed_show(struct config_item *item, char *page) return p - page; } -static bool lookup_engine_mask(const char *pattern, u64 *mask) +/* + * Lookup engine_info. If @mask is not NULL, reduce the mask according to the + * instance in @pattern. + * + * Examples of inputs: + * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and + * mask == BIT_ULL(XE_HW_ENGINE_RCS0) + * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and + * mask == XE_HW_ENGINE_RCS_MASK + * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info + */ +static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask) { for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { u8 instance; @@ -261,44 +337,62 @@ static bool lookup_engine_mask(const char *pattern, u64 *mask) continue; pattern += strlen(engine_info[i].cls); + if (!mask) + return *pattern ? NULL : &engine_info[i]; if (!strcmp(pattern, "*")) { *mask = engine_info[i].mask; - return true; + return &engine_info[i]; } if (kstrtou8(pattern, 10, &instance)) - return false; + return NULL; bit = __ffs64(engine_info[i].mask) + instance; if (bit >= fls64(engine_info[i].mask)) - return false; + return NULL; *mask = BIT_ULL(bit); - return true; + return &engine_info[i]; } - return false; + return NULL; +} + +static int parse_engine(const char *s, const char *end_chars, u64 *mask, + const struct engine_info **pinfo) +{ + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; + const struct engine_info *info; + size_t len; + + len = strcspn(s, end_chars); + if (len >= sizeof(buf)) + return -EINVAL; + + memcpy(buf, s, len); + buf[len] = '\0'; + + info = lookup_engine_info(buf, mask); + if (!info) + return -ENOENT; + + if (pinfo) + *pinfo = info; + + return len; } static ssize_t engines_allowed_store(struct config_item *item, const char *page, size_t len) { struct xe_config_group_device *dev = to_xe_config_group_device(item); - size_t patternlen, p; + ssize_t patternlen, p; u64 mask, val = 0; for (p = 0; p < len; p += patternlen + 1) { - char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; - - patternlen = strcspn(page + p, ",\n"); - if (patternlen >= sizeof(buf)) - return -EINVAL; - - memcpy(buf, page + p, patternlen); - buf[patternlen] = '\0'; - - if (!lookup_engine_mask(buf, &mask)) + patternlen = parse_engine(page + p, ",\n", &mask, NULL); + if (patternlen < 0) return -EINVAL; val |= mask; @@ -339,11 +433,250 @@ static ssize_t enable_psmi_store(struct config_item *item, const char *page, siz return len; } +static bool wa_bb_read_advance(bool dereference, char **p, + const char *append, size_t len, + size_t *max_size) +{ + if (dereference) { + if (len >= *max_size) + return false; + *max_size -= len; + if (append) + memcpy(*p, append, len); + } + + *p += len; + + return true; +} + +static ssize_t wa_bb_show(struct xe_config_group_device *dev, + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], + char *data, size_t sz) +{ + char *p = data; + + guard(mutex)(&dev->lock); + + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + enum xe_engine_class ec = engine_info[i].engine_class; + size_t len; + + if (!wa_bb[ec].len) + continue; + + len = snprintf(p, sz, "%s:", engine_info[i].cls); + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) + return -ENOBUFS; + + for (size_t j = 0; j < wa_bb[ec].len; j++) { + len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]); + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) + return -ENOBUFS; + } + + if (!wa_bb_read_advance(data, &p, "\n", 1, &sz)) + return -ENOBUFS; + } + + if (!wa_bb_read_advance(data, &p, "", 1, &sz)) + return -ENOBUFS; + + /* Reserve one more to match check for '\0' */ + if (!data) + p++; + + return p - data; +} + +static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K); +} + +static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K); +} + +static void wa_bb_append(struct wa_bb *wa_bb, u32 val) +{ + if (wa_bb->cs) + wa_bb->cs[wa_bb->len] = val; + + wa_bb->len++; +} + +static ssize_t parse_hex(const char *line, u32 *pval) +{ + char numstr[12]; + const char *p; + ssize_t numlen; + + p = line + strspn(line, " \t"); + if (!*p || *p == '\n') + return 0; + + numlen = strcspn(p, " \t\n"); + if (!numlen || numlen >= sizeof(numstr) - 1) + return -EINVAL; + + memcpy(numstr, p, numlen); + numstr[numlen] = '\0'; + p += numlen; + + if (kstrtou32(numstr, 16, pval)) + return -EINVAL; + + return p - line; +} + +/* + * Parse lines with the format + * + * <engine-class> cmd <u32> <u32...> + * <engine-class> reg <u32_addr> <u32_val> + * + * and optionally save them in @wa_bb[i].cs is non-NULL. + * + * Return the number of dwords parsed. + */ +static ssize_t parse_wa_bb_lines(const char *lines, + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX]) +{ + ssize_t dwords = 0, ret; + const char *p; + + for (p = lines; *p; p++) { + const struct engine_info *info = NULL; + u32 val, val2; + + /* Also allow empty lines */ + p += strspn(p, " \t\n"); + if (!*p) + break; + + ret = parse_engine(p, " \t\n", NULL, &info); + if (ret < 0) + return ret; + + p += ret; + p += strspn(p, " \t"); + + if (str_has_prefix(p, "cmd")) { + for (p += strlen("cmd"); *p;) { + ret = parse_hex(p, &val); + if (ret < 0) + return -EINVAL; + if (!ret) + break; + + p += ret; + dwords++; + wa_bb_append(&wa_bb[info->engine_class], val); + } + } else if (str_has_prefix(p, "reg")) { + p += strlen("reg"); + ret = parse_hex(p, &val); + if (ret <= 0) + return -EINVAL; + + p += ret; + ret = parse_hex(p, &val2); + if (ret <= 0) + return -EINVAL; + + p += ret; + dwords += 3; + wa_bb_append(&wa_bb[info->engine_class], + MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1)); + wa_bb_append(&wa_bb[info->engine_class], val); + wa_bb_append(&wa_bb[info->engine_class], val2); + } else { + return -EINVAL; + } + } + + return dwords; +} + +static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], + struct xe_config_group_device *dev, + const char *page, size_t len) +{ + /* tmp_wa_bb must match wa_bb's size */ + struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { }; + ssize_t count, class; + u32 *tmp; + + /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */ + count = parse_wa_bb_lines(page, tmp_wa_bb); + if (count < 0) + return count; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + /* + * 2. Allocate a u32 array and set the pointers to the right positions + * according to the length of each class' wa_bb + */ + tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (!count) { + memset(wa_bb, 0, sizeof(tmp_wa_bb)); + return len; + } + + for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + tmp_wa_bb[class].cs = tmp + count; + count += tmp_wa_bb[class].len; + tmp_wa_bb[class].len = 0; + } + + /* 3. Parse wa_bb lines again, this time saving the values */ + count = parse_wa_bb_lines(page, tmp_wa_bb); + if (count < 0) + return count; + + memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb)); + + return len; +} + +static ssize_t ctx_restore_mid_bb_store(struct config_item *item, + const char *data, size_t sz) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz); +} + +static ssize_t ctx_restore_post_bb_store(struct config_item *item, + const char *data, size_t sz) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz); +} + +CONFIGFS_ATTR(, ctx_restore_mid_bb); +CONFIGFS_ATTR(, ctx_restore_post_bb); CONFIGFS_ATTR(, enable_psmi); CONFIGFS_ATTR(, engines_allowed); CONFIGFS_ATTR(, survivability_mode); static struct configfs_attribute *xe_config_device_attrs[] = { + &attr_ctx_restore_mid_bb, + &attr_ctx_restore_post_bb, &attr_enable_psmi, &attr_engines_allowed, &attr_survivability_mode, @@ -355,6 +688,8 @@ static void xe_config_device_release(struct config_item *item) struct xe_config_group_device *dev = to_xe_config_group_device(item); mutex_destroy(&dev->lock); + + kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); } @@ -362,8 +697,26 @@ static struct configfs_item_operations xe_config_device_ops = { .release = xe_config_device_release, }; +static bool xe_config_device_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + if (attr == &attr_survivability_mode) { + if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE) + return false; + } + + return true; +} + +static struct configfs_group_operations xe_config_device_group_ops = { + .is_visible = xe_config_device_is_visible, +}; + static const struct config_item_type xe_config_device_type = { .ct_item_ops = &xe_config_device_ops, + .ct_group_ops = &xe_config_device_group_ops, .ct_attrs = xe_config_device_attrs, .ct_owner = THIS_MODULE, }; @@ -442,6 +795,7 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro if (!dev) return ERR_PTR(-ENOMEM); + dev->desc = match; set_device_defaults(&dev->config); config_group_init_type_name(&dev->group, name, &xe_config_device_type); @@ -451,12 +805,12 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro return &dev->group; } -static struct configfs_group_operations xe_config_device_group_ops = { +static struct configfs_group_operations xe_config_group_ops = { .make_group = xe_config_make_device_group, }; static const struct config_item_type xe_configfs_type = { - .ct_group_ops = &xe_config_device_group_ops, + .ct_group_ops = &xe_config_group_ops, .ct_owner = THIS_MODULE, }; @@ -543,23 +897,6 @@ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) } /** - * xe_configfs_clear_survivability_mode - clear configfs survivability mode - * @pdev: pci device - */ -void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) -{ - struct xe_config_group_device *dev = find_xe_config_group_device(pdev); - - if (!dev) - return; - - guard(mutex)(&dev->lock); - dev->config.survivability_mode = 0; - - config_group_put(&dev->group); -} - -/** * xe_configfs_get_engines_allowed - get engine allowed mask from configfs * @pdev: pci device * @@ -594,11 +931,63 @@ bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) return false; ret = dev->config.enable_psmi; - config_item_put(&dev->group.cg_item); + config_group_put(&dev->group); return ret; } +/** + * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting + * @pdev: pci device + * @class: hw engine class + * @cs: pointer to the bb to use - only valid during probe + * + * Return: Number of dwords used in the mid_ctx_restore setting in configfs + */ +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, + enum xe_engine_class class, + const u32 **cs) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u32 len; + + if (!dev) + return 0; + + if (cs) + *cs = dev->config.ctx_restore_mid_bb[class].cs; + + len = dev->config.ctx_restore_mid_bb[class].len; + config_group_put(&dev->group); + + return len; +} + +/** + * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting + * @pdev: pci device + * @class: hw engine class + * @cs: pointer to the bb to use - only valid during probe + * + * Return: Number of dwords used in the post_ctx_restore setting in configfs + */ +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, + enum xe_engine_class class, + const u32 **cs) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u32 len; + + if (!dev) + return 0; + + *cs = dev->config.ctx_restore_post_bb[class].cs; + len = dev->config.ctx_restore_post_bb[class].len; + config_group_put(&dev->group); + + return len; +} + int __init xe_configfs_init(void) { int ret; @@ -614,7 +1003,7 @@ int __init xe_configfs_init(void) return 0; } -void __exit xe_configfs_exit(void) +void xe_configfs_exit(void) { configfs_unregister_subsystem(&xe_configfs); mutex_destroy(&xe_configfs.su_mutex); diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index 58c8c3164000..c61e0e47ed94 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -8,6 +8,8 @@ #include <linux/limits.h> #include <linux/types.h> +#include <xe_hw_engine_types.h> + struct pci_dev; #if IS_ENABLED(CONFIG_CONFIGFS_FS) @@ -15,17 +17,23 @@ int xe_configfs_init(void); void xe_configfs_exit(void); void xe_configfs_check_device(struct pci_dev *pdev); bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); -void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs); +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs); #else static inline int xe_configfs_init(void) { return 0; } static inline void xe_configfs_exit(void) { } static inline void xe_configfs_check_device(struct pci_dev *pdev) { } static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } -static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } +static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs) { return 0; } +static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs) { return 0; } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 8d6df6bd885e..cd977dbd1ef6 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -24,7 +24,9 @@ #include "xe_pxp_debugfs.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_vf.h" #include "xe_step.h" +#include "xe_tile_debugfs.h" #include "xe_wa.h" #include "xe_vsec.h" @@ -38,7 +40,7 @@ DECLARE_FAULT_ATTR(gt_reset_failure); DECLARE_FAULT_ATTR(inject_csc_hw_error); static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, - u32 offset, char *name, struct drm_printer *p) + u32 offset, const char *name, struct drm_printer *p) { u64 residency = 0; int ret; @@ -134,9 +136,9 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) p = drm_seq_file_printer(m); xe_pm_runtime_get(xe); mmio = xe_root_tile_mmio(xe); - struct { + static const struct { u32 offset; - char *name; + const char *name; } residencies[] = { {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, @@ -163,9 +165,9 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data) xe_pm_runtime_get(xe); mmio = xe_root_tile_mmio(xe); - struct { + static const struct { u32 offset; - char *name; + const char *name; } residencies[] = { {BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"}, {BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"}, @@ -329,23 +331,44 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = { .write = atomic_svm_timeslice_ms_set, }; -static void create_tile_debugfs(struct xe_tile *tile, struct dentry *root) +static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) { - char name[8]; + struct xe_device *xe = file_inode(f)->i_private; + struct xe_late_bind *late_bind = &xe->late_bind; + char buf[32]; + int len; - snprintf(name, sizeof(name), "tile%u", tile->id); - tile->debugfs = debugfs_create_dir(name, root); - if (IS_ERR(tile->debugfs)) - return; + len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_late_bind *late_bind = &xe->late_bind; + u32 uval; + ssize_t ret; - /* - * Store the xe_tile pointer as private data of the tile/ directory - * node so other tile specific attributes under that directory may - * refer to it by looking at its parent node private data. - */ - tile->debugfs->d_inode->i_private = tile; + ret = kstrtouint_from_user(ubuf, size, sizeof(uval), &uval); + if (ret) + return ret; + + if (uval > 1) + return -EINVAL; + + late_bind->disable = !!uval; + return size; } +static const struct file_operations disable_late_binding_fops = { + .owner = THIS_MODULE, + .read = disable_late_binding_show, + .write = disable_late_binding_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; @@ -362,7 +385,7 @@ void xe_debugfs_register(struct xe_device *xe) ARRAY_SIZE(debugfs_list), root, minor); - if (xe->info.platform == XE_BATTLEMAGE) { + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { drm_debugfs_create_files(debugfs_residencies, ARRAY_SIZE(debugfs_residencies), root, minor); @@ -379,6 +402,9 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, &atomic_svm_timeslice_ms_fops); + debugfs_create_file("disable_late_binding", 0600, root, xe, + &disable_late_binding_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); @@ -398,7 +424,7 @@ void xe_debugfs_register(struct xe_device *xe) ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); for_each_tile(tile, xe, tile_id) - create_tile_debugfs(tile, root); + xe_tile_debugfs_register(tile); for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); @@ -411,4 +437,6 @@ void xe_debugfs_register(struct xe_device *xe) if (IS_SRIOV_PF(xe)) xe_sriov_pf_debugfs_register(xe, root); + else if (IS_SRIOV_VF(xe)) + xe_sriov_vf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 9e4773a17ef8..2883b39c9b37 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -45,6 +45,7 @@ #include "xe_hwmon.h" #include "xe_i2c.h" #include "xe_irq.h" +#include "xe_late_bind_fw.h" #include "xe_mmio.h" #include "xe_module.h" #include "xe_nvm.h" @@ -457,6 +458,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; + xe_validation_device_init(&xe->val); + init_waitqueue_head(&xe->ufence_wq); init_rwsem(&xe->usm.lock); @@ -530,7 +533,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe) * re-init and saving/restoring (or re-populating) the wiped memory. Since we * perform the FLR as the very last action before releasing access to the HW * during the driver release flow, we don't attempt recovery at all, because - * if/when a new instance of i915 is bound to the device it will do a full + * if/when a new instance of Xe is bound to the device it will do a full * re-init anyway. */ static void __xe_driver_flr(struct xe_device *xe) @@ -682,16 +685,16 @@ static int wait_for_lmem_ready(struct xe_device *xe) } ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ -static void sriov_update_device_info(struct xe_device *xe) +static void vf_update_device_info(struct xe_device *xe) { + xe_assert(xe, IS_SRIOV_VF(xe)); /* disable features that are not available/applicable to VFs */ - if (IS_SRIOV_VF(xe)) { - xe->info.probe_display = 0; - xe->info.has_heci_cscfi = 0; - xe->info.has_heci_gscfi = 0; - xe->info.skip_guc_pc = 1; - xe->info.skip_pcode = 1; - } + xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; + xe->info.has_heci_gscfi = 0; + xe->info.has_late_bind = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; } static int xe_device_vram_alloc(struct xe_device *xe) @@ -732,7 +735,8 @@ int xe_device_probe_early(struct xe_device *xe) xe_sriov_probe_early(xe); - sriov_update_device_info(xe); + if (IS_SRIOV_VF(xe)) + vf_update_device_info(xe); err = xe_pcode_probe_early(xe); if (err || xe_survivability_mode_is_requested(xe)) { @@ -901,6 +905,10 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = xe_late_bind_init(&xe->late_bind); + if (err) + return err; + err = xe_oa_init(xe); if (err) return err; @@ -950,7 +958,7 @@ int xe_device_probe(struct xe_device *xe) xe_vsec_init(xe); - err = xe_sriov_late_init(xe); + err = xe_sriov_init_late(xe); if (err) goto err_unregister_display; diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 6ee422594b56..c5151c86a98a 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -71,6 +71,15 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static struct attribute *vram_attrs[] = { + &dev_attr_vram_d3cold_threshold.attr, + NULL +}; + +static const struct attribute_group vram_attr_group = { + .attrs = vram_attrs, +}; + static ssize_t lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -149,41 +158,16 @@ out: } static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); -static int late_bind_create_files(struct device *dev) -{ - struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); - struct xe_tile *root = xe_device_get_root_tile(xe); - u32 cap = 0; - int ret; - - xe_pm_runtime_get(xe); - - ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), - &cap, NULL); - if (ret) { - if (ret == -ENXIO) { - drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); - ret = 0; - } - goto out; - } - - if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { - ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); - if (ret) - goto out; - } - - if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) - ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); -out: - xe_pm_runtime_put(xe); - - return ret; -} +static struct attribute *late_bind_attrs[] = { + &dev_attr_lb_fan_control_version.attr, + &dev_attr_lb_voltage_regulator_version.attr, + NULL +}; -static void late_bind_remove_files(struct device *dev) +static umode_t late_bind_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) { + struct device *dev = kobj_to_dev(kobj); struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); struct xe_tile *root = xe_device_get_root_tile(xe); u32 cap = 0; @@ -193,18 +177,25 @@ static void late_bind_remove_files(struct device *dev) ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); + xe_pm_runtime_put(xe); if (ret) - goto out; + return 0; - if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) - sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + if (attr == &dev_attr_lb_fan_control_version.attr && + REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + return attr->mode; + if (attr == &dev_attr_lb_voltage_regulator_version.attr && + REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + return attr->mode; - if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) - sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); -out: - xe_pm_runtime_put(xe); + return 0; } +static const struct attribute_group late_bind_attr_group = { + .attrs = late_bind_attrs, + .is_visible = late_bind_attr_is_visible, +}; + /** * DOC: PCIe Gen5 Limitations * @@ -278,24 +269,15 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); -static const struct attribute *auto_link_downgrade_attrs[] = { +static struct attribute *auto_link_downgrade_attrs[] = { &dev_attr_auto_link_downgrade_capable.attr, &dev_attr_auto_link_downgrade_status.attr, NULL }; -static void xe_device_sysfs_fini(void *arg) -{ - struct xe_device *xe = arg; - - if (xe->d3cold.capable) - sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - - if (xe->info.platform == XE_BATTLEMAGE) { - sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); - late_bind_remove_files(xe->drm.dev); - } -} +static const struct attribute_group auto_link_downgrade_attr_group = { + .attrs = auto_link_downgrade_attrs, +}; int xe_device_sysfs_init(struct xe_device *xe) { @@ -303,20 +285,20 @@ int xe_device_sysfs_init(struct xe_device *xe) int ret; if (xe->d3cold.capable) { - ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + ret = devm_device_add_group(dev, &vram_attr_group); if (ret) return ret; } - if (xe->info.platform == XE_BATTLEMAGE) { - ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { + ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group); if (ret) return ret; - ret = late_bind_create_files(dev); + ret = devm_device_add_group(dev, &late_bind_attr_group); if (ret) return ret; } - return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 1e780f8a2a8c..74d7af830b85 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -14,6 +14,7 @@ #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" +#include "xe_late_bind_fw_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_oa_types.h" @@ -26,6 +27,7 @@ #include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" +#include "xe_validation.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR @@ -183,9 +185,6 @@ struct xe_tile { struct { /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ struct xe_ggtt_node *ggtt_balloon[2]; - - /** @sriov.vf.ccs: CCS read and write contexts for VF. */ - struct xe_tile_vf_ccs ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; } vf; } sriov; @@ -282,6 +281,8 @@ struct xe_device { u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + /** @info.has_late_bind: Device has firmware late binding support */ + u8 has_late_bind:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; /** @info.has_mbx_power_limits: Device has support to manage power limits using @@ -535,6 +536,9 @@ struct xe_device { /** @nvm: discrete graphics non-volatile memory */ struct intel_dg_nvm_dev *nvm; + /** @late_bind: xe mei late bind interface */ + struct xe_late_bind late_bind; + /** @oa: oa observation subsystem */ struct xe_oa oa; @@ -586,6 +590,8 @@ struct xe_device { */ atomic64_t global_total_pages; #endif + /** @val: The domain for exhaustive eviction, which is currently per device. */ + struct xe_validation_device val; /** @psmi: GPU debugging via additional validation HW */ struct { @@ -595,6 +601,13 @@ struct xe_device { u8 region_mask; } psmi; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + /** @g2g_test_array: for testing G2G communications */ + u32 *g2g_test_array; + /** @g2g_test_count: for testing G2G communications */ + atomic_t g2g_test_count; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 95d06bd65b0f..a7d67725c3ee 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -51,6 +51,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) struct drm_gem_object *obj = attach->dmabuf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = xe_bo_device(bo); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; int ret; /* @@ -63,7 +64,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) return -EINVAL; } - ret = xe_bo_migrate(bo, XE_PL_TT); + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) drm_dbg(&xe->drm, @@ -72,7 +73,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) return ret; } - ret = xe_bo_pin_external(bo, true); + ret = xe_bo_pin_external(bo, true, exec); xe_assert(xe, !ret); return 0; @@ -92,6 +93,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, struct dma_buf *dma_buf = attach->dmabuf; struct drm_gem_object *obj = dma_buf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; struct sg_table *sgt; int r = 0; @@ -100,9 +102,9 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, if (!xe_bo_is_pinned(bo)) { if (!attach->peer2peer) - r = xe_bo_migrate(bo, XE_PL_TT); + r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); else - r = xe_bo_validate(bo, NULL, false); + r = xe_bo_validate(bo, NULL, false, exec); if (r) return ERR_PTR(r); } @@ -161,15 +163,26 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, struct xe_bo *bo = gem_to_xe_bo(obj); bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); + struct xe_validation_ctx ctx; + struct drm_exec exec; + int ret = 0; if (!reads) return 0; /* Can we do interruptible lock here? */ - xe_bo_lock(bo, false); - (void)xe_bo_migrate(bo, XE_PL_TT); - xe_bo_unlock(bo); + xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &ret); + } + /* If we failed, cpu-access takes place in current placement. */ return 0; } @@ -220,32 +233,45 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, { struct dma_resv *resv = dma_buf->resv; struct xe_device *xe = to_xe_device(dev); + struct xe_validation_ctx ctx; + struct drm_gem_object *dummy_obj; + struct drm_exec exec; struct xe_bo *bo; - int ret; - - dma_resv_lock(resv, NULL); - bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, - 0, /* Will require 1way or 2way for vm_bind */ - ttm_bo_type_sg, XE_BO_FLAG_SYSTEM); - if (IS_ERR(bo)) { - ret = PTR_ERR(bo); - goto error; + int ret = 0; + + dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); + if (!dummy_obj) + return ERR_PTR(-ENOMEM); + + dummy_obj->resv = resv; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { + ret = drm_exec_lock_obj(&exec, dummy_obj); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + 0, /* Will require 1way or 2way for vm_bind */ + ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } } - dma_resv_unlock(resv); - - return &bo->ttm.base; + drm_gem_object_put(dummy_obj); -error: - dma_resv_unlock(resv); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : &bo->ttm.base; } static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->importer_priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; - XE_WARN_ON(xe_bo_evict(bo)); + XE_WARN_ON(xe_bo_evict(bo, exec)); } static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index fdd514fec5ef..f5cfdf29fde3 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -617,9 +617,8 @@ static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, size = stream->per_xecore_buf_size * last_xecore; - bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL, - size, ~0ull, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64); + bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); if (IS_ERR(bo)) { kfree(stream->xecore_buf); return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 374c831e691b..7715e74bb945 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -19,6 +19,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_sync.h" +#include "xe_svm.h" #include "xe_vm.h" /** @@ -97,9 +98,13 @@ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); + int ret; /* The fence slot added here is intended for the exec sched job. */ - return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); + xe_vm_set_validation_exec(vm, &vm_exec->exec); + ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1); + xe_vm_set_validation_exec(vm, NULL); + return ret; } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -115,10 +120,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs, num_ufence = 0; + struct xe_validation_ctx ctx; struct xe_sched_job *job; struct xe_vm *vm; bool write_locked, skip_retry = false; - ktime_t end = 0; int err = 0; struct xe_hw_engine_group *group; enum xe_hw_engine_group_execution_mode mode, previous_mode; @@ -246,17 +251,12 @@ retry: if (err) goto err_unlock_list; - vm_exec.vm = &vm->gpuvm; - vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; - if (xe_vm_in_lr_mode(vm)) { - drm_exec_init(exec, vm_exec.flags, 0); - } else { - err = drm_gpuvm_exec_lock(&vm_exec); - if (err) { - if (xe_vm_validate_should_retry(exec, err, &end)) - err = -EAGAIN; + if (!xe_vm_in_lr_mode(vm)) { + vm_exec.vm = &vm->gpuvm; + vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val); + if (err) goto err_unlock_list; - } } if (xe_vm_is_closed_or_banned(q->vm)) { @@ -303,7 +303,7 @@ retry: if (err) goto err_put_job; - err = down_read_interruptible(&vm->userptr.notifier_lock); + err = xe_svm_notifier_lock_interruptible(vm); if (err) goto err_put_job; @@ -345,12 +345,13 @@ retry: err_repin: if (!xe_vm_in_lr_mode(vm)) - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); err_put_job: if (err) xe_sched_job_put(job); err_exec: - drm_exec_fini(exec); + if (!xe_vm_in_lr_mode(vm)) + xe_validation_ctx_fini(&ctx); err_unlock_list: up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 063c89d981e5..37b2b93b73d6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -199,6 +199,16 @@ err_lrc: return err; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -229,11 +239,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (xe_exec_queue_uses_pxp(q)) { err = xe_pxp_exec_queue_add(xe->pxp, q); if (err) - goto err_post_alloc; + goto err_post_init; } return q; +err_post_init: + __xe_exec_queue_fini(q); err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); @@ -331,13 +343,11 @@ void xe_exec_queue_destroy(struct kref *ref) xe_exec_queue_put(eq); } - q->ops->fini(q); + q->ops->destroy(q); } void xe_exec_queue_fini(struct xe_exec_queue *q) { - int i; - /* * Before releasing our ref to lrc and xef, accumulate our run ticks * and wakeup any waiters. @@ -346,9 +356,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) wake_up_var(&q->xef->exec_queue.pending_removal); - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - + __xe_exec_queue_fini(q); __xe_exec_queue_free(q); } diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index ba443a497b38..27b76cf9da89 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -181,8 +181,14 @@ struct xe_exec_queue_ops { int (*init)(struct xe_exec_queue *q); /** @kill: Kill inflight submissions for backend */ void (*kill)(struct xe_exec_queue *q); - /** @fini: Fini exec queue for submission backend */ + /** @fini: Undoes the init() for submission backend */ void (*fini)(struct xe_exec_queue *q); + /** + * @destroy: Destroy exec queue for submission backend. The backend + * function must call xe_exec_queue_fini() (which will in turn call the + * fini() backend function) to ensure the queue is properly cleaned up. + */ + void (*destroy)(struct xe_exec_queue *q); /** @set_priority: Set priority for exec queue */ int (*set_priority)(struct xe_exec_queue *q, enum xe_exec_queue_priority priority); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 788f56b066b6..f83d421ac9d3 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -385,10 +385,20 @@ err_free: return err; } -static void execlist_exec_queue_fini_async(struct work_struct *w) +static void execlist_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_execlist_exec_queue *exl = q->execlist; + + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + + kfree(exl); +} + +static void execlist_exec_queue_destroy_async(struct work_struct *w) { struct xe_execlist_exec_queue *ee = - container_of(w, struct xe_execlist_exec_queue, fini_async); + container_of(w, struct xe_execlist_exec_queue, destroy_async); struct xe_exec_queue *q = ee->q; struct xe_execlist_exec_queue *exl = q->execlist; struct xe_device *xe = gt_to_xe(q->gt); @@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - drm_sched_entity_fini(&exl->entity); - drm_sched_fini(&exl->sched); - kfree(exl); - xe_exec_queue_fini(q); } @@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) /* NIY */ } -static void execlist_exec_queue_fini(struct xe_exec_queue *q) +static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); - queue_work(system_unbound_wq, &q->execlist->fini_async); + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); + queue_work(system_unbound_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, @@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, .fini = execlist_exec_queue_fini, + .destroy = execlist_exec_queue_destroy, .set_priority = execlist_exec_queue_set_priority, .set_timeslice = execlist_exec_queue_set_timeslice, .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index 415140936f11..92c4ba52db0c 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -42,7 +42,7 @@ struct xe_execlist_exec_queue { bool has_run; - struct work_struct fini_async; + struct work_struct destroy_async; enum xe_exec_queue_priority active_priority; struct list_head active_link; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 71c7690a92b3..7fdd0a97a628 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -28,6 +28,7 @@ #include "xe_pm.h" #include "xe_res_cursor.h" #include "xe_sriov.h" +#include "xe_tile_printk.h" #include "xe_tile_sriov_vf.h" #include "xe_tlb_inval.h" #include "xe_wa.h" @@ -269,7 +270,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) gsm_size = probe_gsm_size(pdev); if (gsm_size == 0) { - drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); + xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); return -ENOMEM; } @@ -466,8 +467,8 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); - xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n", - node->start, node->start + node->size, buf, description); + xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", + node->start, node->start + node->size, buf, description); } } @@ -499,9 +500,8 @@ int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 err = drm_mm_reserve_node(&ggtt->mm, &node->base); - if (xe_gt_WARN(ggtt->tile->primary_gt, err, - "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->base.start, node->base.start + node->base.size, ERR_PTR(err))) + if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; xe_ggtt_dump_node(ggtt, &node->base, "balloon"); @@ -731,7 +731,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end) + u64 start, u64 end, struct drm_exec *exec) { u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; u8 tile_id = ggtt->tile->id; @@ -746,7 +746,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return 0; } - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; @@ -788,25 +788,28 @@ out: * @bo: the &xe_bo to be inserted * @start: address where it will be inserted * @end: end of the range where it will be inserted + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end) + u64 start, u64 end, struct drm_exec *exec) { - return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); + return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); } /** * xe_ggtt_insert_bo - Insert BO into GGTT * @ggtt: the &xe_ggtt where bo will be inserted * @bo: the &xe_bo to be inserted + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, + struct drm_exec *exec) { - return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); } /** diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index fbe1e397d05d..75fc7a1efea7 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -10,6 +10,7 @@ struct drm_printer; struct xe_tile; +struct drm_exec; struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); int xe_ggtt_init_early(struct xe_ggtt *ggtt); @@ -31,9 +32,9 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, struct xe_bo *bo, u16 pat_index); void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end); + u64 start, u64 end, struct drm_exec *exec); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f5ae28af60d4..83d61bf8ec62 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -136,10 +136,10 @@ static int query_compatibility_version(struct xe_gsc *gsc) u64 ggtt_offset; int err; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) { xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 34505a6d93ed..3e0ad7e5b5df 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -98,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt) * FIXME: if xe_uc_sanitize is called here, on TGL driver will not * reload */ - gt->uc.guc.submission_state.enabled = false; + xe_guc_submit_disable(>->uc.guc); } static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index bf3a67b5951c..f253e2df4907 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -31,6 +31,7 @@ #include "xe_reg_whitelist.h" #include "xe_sa.h" #include "xe_sriov.h" +#include "xe_sriov_vf_ccs.h" #include "xe_tuning.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" @@ -123,45 +124,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p) return ret; } -static int sa_info(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_tile *tile = gt_to_tile(gt); - - xe_pm_runtime_get(gt_to_xe(gt)); - drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, - xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - -static int sa_info_vf_ccs(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_tile *tile = gt_to_tile(gt); - struct xe_sa_manager *bb_pool; - enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - - if (!IS_VF_CCS_READY(gt_to_xe(gt))) - return 0; - - xe_pm_runtime_get(gt_to_xe(gt)); - - for_each_ccs_rw_ctx(ctx_id) { - bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool; - if (!bb_pool) - break; - - drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); - drm_printf(p, "-------------------------\n"); - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); - drm_puts(p, "\n"); - } - - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - static int topology(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -316,7 +278,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) * - without access to the PF specific data */ static const struct drm_info_list vf_safe_debugfs_list[] = { - {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, @@ -327,17 +288,9 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, - {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; -/* - * only for GT debugfs files which are valid on VF. Not valid on PF. - */ -static const struct drm_info_list vf_only_debugfs_list[] = { - {"sa_info_vf_ccs", .show = xe_gt_debugfs_simple_show, .data = sa_info_vf_ccs}, -}; - /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, @@ -363,6 +316,24 @@ static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t return count; } +static ssize_t stats_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt); +} + +static int stats_show(struct seq_file *s, void *unused) +{ + struct drm_printer p = drm_seq_file_printer(s); + struct xe_gt *gt = s->private; + + return xe_gt_stats_print_info(gt, &p); +} +DEFINE_SHOW_STORE_ATTRIBUTE(stats); + static void force_reset(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -448,6 +419,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) root->d_inode->i_private = gt; /* VF safe */ + debugfs_create_file("stats", 0600, root, gt, &stats_fops); debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); @@ -459,11 +431,6 @@ void xe_gt_debugfs_register(struct xe_gt *gt) drm_debugfs_create_files(pf_only_debugfs_list, ARRAY_SIZE(pf_only_debugfs_list), root, minor); - else - drm_debugfs_create_files(vf_only_debugfs_list, - ARRAY_SIZE(vf_only_debugfs_list), - root, minor); - xe_uc_debugfs_register(>->uc, root); diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 60d9354e7dbf..4ff1b6b58d6b 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -227,6 +227,33 @@ static ssize_t max_freq_store(struct kobject *kobj, } static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); +static ssize_t power_profile_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct device *dev = kobj_to_dev(kobj); + + xe_guc_pc_get_power_profile(dev_to_pc(dev), buff); + + return strlen(buff); +} + +static ssize_t power_profile_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_guc_pc *pc = dev_to_pc(dev); + int err; + + xe_pm_runtime_get(dev_to_xe(dev)); + err = xe_guc_pc_set_power_profile(pc, buff); + xe_pm_runtime_put(dev_to_xe(dev)); + + return err ?: count; +} +static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile); + static const struct attribute *freq_attrs[] = { &attr_act_freq.attr, &attr_cur_freq.attr, @@ -236,6 +263,7 @@ static const struct attribute *freq_attrs[] = { &attr_rpn_freq.attr, &attr_min_freq.attr, &attr_max_freq.attr, + &attr_power_profile.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 683ac021a06d..8fb1cae91724 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -362,7 +362,7 @@ fallback: * @group: pointer to storage for steering group ID * @instance: pointer to storage for steering instance ID */ -void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) +void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index bc06520befab..283a1c9770e2 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -31,7 +31,8 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, u8 *group, u8 *instance); void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); +void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, + unsigned int dss, u16 *group, u16 *instance); u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); /* diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index d02d22fb3659..a054d6010ae0 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -87,10 +87,8 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, if (!bo) return 0; - err = need_vram_move ? xe_bo_migrate(bo, vram->placement) : - xe_bo_validate(bo, vm, true); - - return err; + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : + xe_bo_validate(bo, vm, true, exec); } static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, @@ -98,9 +96,9 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, { struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; struct drm_exec exec; struct dma_fence *fence; - ktime_t end = 0; int err, needs_vram; lockdep_assert_held_write(&vm->lock); @@ -129,22 +127,22 @@ retry_userptr: } /* Lock VM and BOs dma-resv */ - drm_exec_init(&exec, 0, 0); + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); drm_exec_until_all_locked(&exec) { err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); drm_exec_retry_on_contention(&exec); - if (xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; + xe_validation_retry_on_oom(&ctx, &err); if (err) goto unlock_dma_resv; /* Bind VMA only to the GT that has faulted */ trace_xe_vma_pf_bind(vma); + xe_vm_set_validation_exec(vm, &exec); fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); if (IS_ERR(fence)) { err = PTR_ERR(fence); - if (xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; + xe_validation_retry_on_oom(&ctx, &err); goto unlock_dma_resv; } } @@ -153,7 +151,7 @@ retry_userptr: dma_fence_put(fence); unlock_dma_resv: - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); if (err == -EAGAIN) goto retry_userptr; @@ -535,6 +533,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) { struct xe_device *xe = gt_to_xe(gt); struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vm *vm; struct xe_vma *vma; @@ -564,15 +563,14 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) goto unlock_vm; /* Lock VM and BOs dma-resv */ - drm_exec_init(&exec, 0, 0); + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); drm_exec_until_all_locked(&exec) { ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); drm_exec_retry_on_contention(&exec); - if (ret) - break; + xe_validation_retry_on_oom(&ctx, &ret); } - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); unlock_vm: up_read(&vm->lock); xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 11da0228cea7..1313d32862db 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -6,18 +6,22 @@ #ifndef _XE_GT_PRINTK_H_ #define _XE_GT_PRINTK_H_ -#include <drm/drm_print.h> - #include "xe_gt_types.h" +#include "xe_tile_printk.h" + +#define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args #define xe_gt_printk(_gt, _level, _fmt, ...) \ - drm_##_level(>_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) + +#define xe_gt_err(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) #define xe_gt_err_once(_gt, _fmt, ...) \ xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__) -#define xe_gt_err(_gt, _fmt, ...) \ - xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) +#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) #define xe_gt_warn(_gt, _fmt, ...) \ xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__) @@ -31,20 +35,20 @@ #define xe_gt_dbg(_gt, _fmt, ...) \ xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__) -#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ - xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) +#define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \ + xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__) #define xe_gt_WARN(_gt, _condition, _fmt, ...) \ - drm_WARN(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ - drm_WARN_ONCE(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define xe_gt_WARN_ON(_gt, _condition) \ - xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) + xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) #define xe_gt_WARN_ON_ONCE(_gt, _condition) \ - xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) + xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf) { @@ -67,12 +71,12 @@ static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format * /* * The original xe_gt_dbg() callsite annotations are useless here, - * redirect to the tweaked drm_dbg_printer() instead. + * redirect to the tweaked xe_tile_dbg_printer() instead. */ - dbg = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); + dbg = xe_tile_dbg_printer((gt)->tile); dbg.origin = p->origin; - drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); + drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf)); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c8f0320d032f..6344b5205c08 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1478,23 +1478,16 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return 0; xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); - bo = xe_bo_create_locked(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_NEEDS_2M | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_PINNED_LATE_RESTORE); + bo = xe_bo_create_pin_range_novm(xe, tile, + ALIGN(size, PAGE_SIZE), 0, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_NEEDS_2M | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); - err = xe_bo_pin(bo); - xe_bo_unlock(bo); - if (unlikely(err)) { - xe_bo_put(bo); - return err; - } - config->lmem_obj = bo; if (xe_device_has_lmtt(xe)) { @@ -1636,7 +1629,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index c712111aa30d..44cc612b0a75 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -55,12 +55,12 @@ static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(xe, tile, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -91,12 +91,12 @@ static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(xe, tile, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 30f942671c2b..5f74706bab81 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -26,11 +26,46 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) atomic64_add(incr, >->stats.counters[id]); } +#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name + static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { - "svm_pagefault_count", - "tlb_inval_count", - "vma_pagefault_count", - "vma_pagefault_kb", + DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"), + DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"), + DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"), + DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"), + DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"), + DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"), + DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"), + DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"), + DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"), + DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"), + DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"), + DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"), + DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"), + DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"), + DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"), + DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"), + DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"), + DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"), + DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"), + DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"), + DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"), + DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"), + DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"), + DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"), + DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"), + DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"), + DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"), + DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"), + DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"), + DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"), + DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"), + DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"), + DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"), + DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"), + DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"), + DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"), + DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"), }; /** @@ -50,3 +85,17 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) return 0; } + +/** + * xe_gt_stats_clear - Clear the GT stats + * @gt: GT structure + * + * This clear (zeros) all the available GT stats. + */ +void xe_gt_stats_clear(struct xe_gt *gt) +{ + int id; + + for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) + atomic64_set(>->stats.counters[id], 0); +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 38325ef53617..e8aea32bc971 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -13,6 +13,7 @@ struct drm_printer; #ifdef CONFIG_DEBUG_FS int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_clear(struct xe_gt *gt); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); #else static inline void diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index be3244d7133c..d8348a8de2e1 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -9,8 +9,41 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, + XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, + XE_GT_STATS_ID_SVM_TLB_INVAL_US, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_4K_MIGRATE_US, + XE_GT_STATS_ID_SVM_64K_MIGRATE_US, + XE_GT_STATS_ID_SVM_2M_MIGRATE_US, + XE_GT_STATS_ID_SVM_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_CPU_COPY_US, + XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, + XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, + XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, + XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, + XE_GT_STATS_ID_SVM_CPU_COPY_KB, + XE_GT_STATS_ID_SVM_4K_GET_PAGES_US, + XE_GT_STATS_ID_SVM_64K_GET_PAGES_US, + XE_GT_STATS_ID_SVM_2M_GET_PAGES_US, + XE_GT_STATS_ID_SVM_4K_BIND_US, + XE_GT_STATS_ID_SVM_64K_BIND_US, + XE_GT_STATS_ID_SVM_2M_BIND_US, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index a0baa560dd71..4e61c5e39bcb 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -12,6 +12,7 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_mmio.h" #include "xe_wa.h" @@ -122,6 +123,21 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, } } +bool xe_gt_topology_report_l3(struct xe_gt *gt) +{ + /* + * No known userspace needs/uses the L3 bank mask reported by + * the media GT, and the hardware itself is known to report bogus + * values on several platforms. Only report L3 bank mask as part + * of the media GT's topology on pre-Xe3 platforms since that's + * already part of our ABI. + */ + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30) + return false; + + return true; +} + static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { @@ -129,16 +145,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) struct xe_mmio *mmio = >->mmio; u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); - /* - * PTL platforms with media version 30.00 do not provide proper values - * for the media GT's L3 bank registers. Skip the readout since we - * don't have any way to obtain real values. - * - * This may get re-described as an official workaround in the future, - * but there's no tracking number assigned yet so we use a custom - * OOB workaround descriptor. - */ - if (XE_GT_WA(gt, no_media_l3)) + if (!xe_gt_topology_report_l3(gt)) return; if (GRAPHICS_VER(xe) >= 30) { @@ -275,8 +282,9 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU type: %s\n", eu_type_to_str(gt->fuse_topo.eu_type)); - drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, - gt->fuse_topo.l3_bank_mask); + if (xe_gt_topology_report_l3(gt)) + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, + gt->fuse_topo.l3_bank_mask); } /* @@ -328,3 +336,19 @@ bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) { return test_bit(dss, gt->fuse_topo.c_dss_mask); } + +bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt) +{ + unsigned int xecore; + int last_group = -1; + u16 group, instance; + + for_each_dss_steering(xecore, gt, group, instance) { + if (last_group != group) { + if (group - last_group > 1) + return true; + last_group = group; + } + } + return false; +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index c8140704ad4c..5e62f5949b7b 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -47,4 +47,8 @@ xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss); bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss); +bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt); + +bool xe_gt_topology_report_l3(struct xe_gt *gt); + #endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 37d06c51180c..00789844ea4d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -74,8 +74,7 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) flags |= GUC_LOG_DISABLED; else - flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << - GUC_LOG_VERBOSITY_SHIFT; + flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level)); return flags; } @@ -122,22 +121,14 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | CAPTURE_FLAG | LOG_FLAG | - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << - GUC_LOG_CAPTURE_SHIFT) | - (offset << GUC_LOG_BUF_ADDR_SHIFT); + FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) | + FIELD_PREP(GUC_LOG_BUF_ADDR, offset); #undef LOG_UNIT #undef LOG_FLAG @@ -150,7 +141,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) static u32 guc_ctl_ads_flags(struct xe_guc *guc) { u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; - u32 flags = ads << GUC_ADS_ADDR_SHIFT; + u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads); return flags; } @@ -709,10 +700,6 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) if (ret) return ret; - ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo); - if (ret) - return ret; - return 0; } @@ -847,6 +834,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_ct_init_post_hwconfig(&guc->ct); + if (ret) + return ret; + guc_init_params_post_hwconfig(guc); ret = xe_guc_submit_init(guc, ~0); @@ -888,9 +879,7 @@ int xe_guc_post_load_init(struct xe_guc *guc) return ret; } - guc->submission_state.enabled = true; - - return 0; + return xe_guc_submit_enable(guc); } int xe_guc_reset(struct xe_guc *guc) @@ -1066,7 +1055,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) #endif #define GUC_LOAD_TIME_WARN_MS 200 -static void guc_wait_ucode(struct xe_guc *guc) +static int guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); struct xe_mmio *mmio = >->mmio; @@ -1173,7 +1162,7 @@ static void guc_wait_ucode(struct xe_guc *guc) break; } - xe_device_declare_wedged(gt_to_xe(gt)); + return -EPROTO; } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) { xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", delta_ms, status, count); @@ -1185,7 +1174,10 @@ static void guc_wait_ucode(struct xe_guc *guc) delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), before_freq, status, count); } + + return 0; } +ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO); static int __xe_guc_upload(struct xe_guc *guc) { @@ -1217,14 +1209,16 @@ static int __xe_guc_upload(struct xe_guc *guc) goto out; /* Wait for authentication */ - guc_wait_ucode(guc); + ret = guc_wait_ucode(guc); + if (ret) + goto out; xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); return 0; out: xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); - return 0 /* FIXME: ret, don't want to stop load currently */; + return ret; } static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) @@ -1602,7 +1596,7 @@ void xe_guc_sanitize(struct xe_guc *guc) { xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); - guc->submission_state.enabled = false; + xe_guc_submit_disable(guc); } int xe_guc_reset_prepare(struct xe_guc *guc) @@ -1695,3 +1689,7 @@ void xe_guc_declare_wedged(struct xe_guc *guc) xe_guc_ct_stop(&guc->ct); xe_guc_submit_wedge(guc); } + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_g2g_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 22cf019a11bf..1cca05967e62 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -53,6 +53,10 @@ void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); void xe_guc_declare_wedged(struct xe_guc *guc); +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); +#endif + static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { switch (class) { diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 5631722f34f5..58e0b0294a5b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -339,7 +339,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads) if (XE_GT_WA(gt, 13011645652)) { u32 data = 0xC40; - guc_waklv_enable(ads, &data, sizeof(data) / sizeof(u32), &offset, &remain, + guc_waklv_enable(ads, &data, 1, &offset, &remain, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE); } @@ -355,7 +355,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads) 0x0, 0xF, }; - guc_waklv_enable(ads, data, sizeof(data) / sizeof(u32), &offset, &remain, + guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain, GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 848065a25c44..18f6327bf552 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -39,6 +39,8 @@ static void receive_g2h(struct xe_guc_ct *ct); static void g2h_worker_func(struct work_struct *w); static void safe_mode_worker_func(struct work_struct *w); static void ct_exit_safe_mode(struct xe_guc_ct *ct); +static void guc_ct_change_state(struct xe_guc_ct *ct, + enum xe_guc_ct_state state); #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { @@ -252,6 +254,13 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) } ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ +static void guc_action_disable_ct(void *arg) +{ + struct xe_guc_ct *ct = arg; + + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); +} + int xe_guc_ct_init(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); @@ -268,10 +277,39 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) return PTR_ERR(bo); ct->bo = bo; - return 0; + + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ +/** + * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM + * @ct: the &xe_guc_ct + * + * Allocate a new BO in VRAM and free the previous BO that was allocated + * in system memory (SMEM). Applicable only for DGFX products. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + int ret; + + xe_assert(xe, !xe_guc_ct_enabled(ct)); + + if (IS_DGFX(xe)) { + ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); + if (ret) + return ret; + } + + devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct); + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); +} + #define desc_read(xe_, guc_ctb__, field_) \ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ struct guc_ct_buffer_desc, field_) @@ -1040,11 +1078,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) return true; } +#define GUC_SEND_RETRY_LIMIT 50 +#define GUC_SEND_RETRY_MSLEEP 5 + static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer, bool no_fail) { struct xe_gt *gt = ct_to_gt(ct); struct g2h_fence g2h_fence; + unsigned int retries = 0; int ret = 0; /* @@ -1109,6 +1151,12 @@ retry_same_fence: xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", action[0], g2h_fence.reason); mutex_unlock(&ct->lock); + if (++retries > GUC_SEND_RETRY_LIMIT) { + xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n", + action[0], GUC_SEND_RETRY_LIMIT); + return -ELOOP; + } + msleep(GUC_SEND_RETRY_MSLEEP * retries); goto retry; } if (g2h_fence.fail) { @@ -1438,6 +1486,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_NOTIFY_EXCEPTION: ret = guc_crash_process_msg(ct, action); break; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + case XE_GUC_ACTION_TEST_G2G_RECV: + ret = xe_guc_g2g_test_notification(guc, payload, adj_len); + break; +#endif default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 18d4225e6502..cf41210ab30a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -13,6 +13,7 @@ struct xe_device; int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); +int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index 92e1f9f41b8c..2b99c1ebdd58 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -94,16 +94,17 @@ static int allocate_engine_activity_buffers(struct xe_guc *guc, struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo, *metadata_bo; - metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size), - ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size), + ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, + false); if (IS_ERR(metadata_bo)) return PTR_ERR(metadata_bo); - bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size), + ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) { xe_bo_unpin_map_no_vm(metadata_bo); diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index a3f421e2adc0..c30c0e3ccbbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -35,8 +35,8 @@ struct xe_guc_exec_queue { struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; /** @lr_tdr: long running TDR worker */ struct work_struct lr_tdr; - /** @fini_async: do final fini async from this worker */ - struct work_struct fini_async; + /** @destroy_async: do final destroy async from this worker */ + struct work_struct destroy_async; /** @resume_time: time of last resume */ u64 resume_time; /** @state: GuC specific state for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 0508f1064178..50c4c2406132 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -15,6 +15,7 @@ #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 #define G2H_LEN_DW_TLB_INVALIDATE 3 +#define G2H_LEN_DW_G2G_NOTIFY_MIN 3 #define GUC_ID_MAX 65535 #define GUC_ID_UNKNOWN 0xffffffff @@ -65,6 +66,7 @@ struct guc_ctxt_registration_info { u32 hwlrca_hi; }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1) /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { @@ -89,13 +91,10 @@ struct guc_update_exec_queue_policy { #define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) #define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) #define GUC_LOG_LOG_ALLOC_UNITS BIT(3) -#define GUC_LOG_CRASH_SHIFT 4 -#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) -#define GUC_LOG_DEBUG_SHIFT 6 -#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) -#define GUC_LOG_CAPTURE_SHIFT 10 -#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) -#define GUC_LOG_BUF_ADDR_SHIFT 12 +#define GUC_LOG_CRASH REG_GENMASK(5, 4) +#define GUC_LOG_DEBUG REG_GENMASK(9, 6) +#define GUC_LOG_CAPTURE REG_GENMASK(11, 10) +#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12) #define GUC_CTL_WA 1 #define GUC_WA_GAM_CREDITS BIT(10) @@ -117,21 +116,14 @@ struct guc_update_exec_queue_policy { #define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 -#define GUC_LOG_VERBOSITY_SHIFT 0 -#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0) #define GUC_LOG_VERBOSITY_MAX 3 -#define GUC_LOG_VERBOSITY_MASK 0x0000000f -#define GUC_LOG_DESTINATION_MASK (3 << 4) -#define GUC_LOG_DISABLED (1 << 6) -#define GUC_PROFILE_ENABLED (1 << 7) +#define GUC_LOG_DESTINATION REG_GENMASK(5, 4) +#define GUC_LOG_DISABLED BIT(6) +#define GUC_PROFILE_ENABLED BIT(7) #define GUC_CTL_ADS 4 -#define GUC_ADS_ADDR_SHIFT 1 -#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) +#define GUC_ADS_ADDR REG_GENMASK(21, 1) #define GUC_CTL_DEVID 5 diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index f1e2b0be90a9..98a47ac42b08 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -17,7 +17,7 @@ struct xe_device; #define DEBUG_BUFFER_SIZE SZ_8M #define CAPTURE_BUFFER_SIZE SZ_2M #else -#define CRASH_BUFFER_SIZE SZ_8K +#define CRASH_BUFFER_SIZE SZ_16K #define DEBUG_BUFFER_SIZE SZ_64K #define CAPTURE_BUFFER_SIZE SZ_1M #endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 88557e86d637..53fdf59524c4 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -79,6 +79,11 @@ * Xe driver enables SLPC with all of its defaults features and frequency * selection, which varies per platform. * + * Power profiles add another level of control to SLPC. When power saving + * profile is chosen, SLPC will use conservative thresholds to ramp frequency, + * thus saving power. Base profile is default and ensures balanced performance + * for any workload. + * * Render-C States: * ================ * @@ -1171,6 +1176,61 @@ static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val) return ret; } +static const char *power_profile_to_string(struct xe_guc_pc *pc) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + return "base"; + case SLPC_POWER_PROFILE_POWER_SAVING: + return "power_saving"; + default: + return "invalid"; + } +} + +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + sprintf(profile, "[%s] %s\n", "base", "power_saving"); + break; + case SLPC_POWER_PROFILE_POWER_SAVING: + sprintf(profile, "%s [%s]\n", "base", "power_saving"); + break; + default: + sprintf(profile, "invalid"); + } +} + +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) +{ + int ret = 0; + u32 val; + + if (strncmp("base", buf, strlen("base")) == 0) + val = SLPC_POWER_PROFILE_BASE; + else if (strncmp("power_saving", buf, strlen("power_saving")) == 0) + val = SLPC_POWER_PROFILE_POWER_SAVING; + else + return -EINVAL; + + guard(mutex)(&pc->freq_lock); + xe_pm_runtime_get_noresume(pc_to_xe(pc)); + + ret = pc_action_set_param(pc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + pc->power_profile = val; + + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + /** * xe_guc_pc_start - Start GuC's Power Conservation component * @pc: Xe_GuC_PC instance @@ -1249,6 +1309,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) /* Enable SLPC Optimized Strategy for compute */ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + /* Set cached value of power_profile */ + ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc)); + if (unlikely(ret)) + xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + out: xe_force_wake_put(gt_to_fw(gt), fw_ref); return ret; @@ -1327,6 +1392,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) pc->bo = bo; + pc->power_profile = SLPC_POWER_PROFILE_BASE; + return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 52ecdd5ddbff..0e31396f103c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -31,6 +31,8 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq); int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq); int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq); +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf); +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile); enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index c02053948a57..5e4ea53fbee6 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -37,6 +37,8 @@ struct xe_guc_pc { struct mutex freq_lock; /** @freq_ready: Only handle freq changes, if they are really ready */ bool freq_ready; + /** @power_profile: Base or power_saving profile */ + u32 power_profile; }; #endif /* _XE_GUC_PC_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 1185b23b1384..53024eb5670b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -32,6 +32,7 @@ #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -316,6 +317,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } +/* + * Given that we want to guarantee enough RCS throughput to avoid missing + * frames, we set the yield policy to 20% of each 80ms interval. + */ +#define RC_YIELD_DURATION 80 /* in ms */ +#define RC_YIELD_RATIO 20 /* in percent */ +static u32 *emit_render_compute_yield_klv(u32 *emit) +{ + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); + *emit++ = RC_YIELD_DURATION; + *emit++ = RC_YIELD_RATIO; + + return emit; +} + +#define SCHEDULING_POLICY_MAX_DWORDS 16 +static int guc_init_global_schedule_policy(struct xe_guc *guc) +{ + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; + u32 *emit = data; + u32 count = 0; + int ret; + + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) + return 0; + + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + + if (CCS_MASK(guc_to_gt(guc))) + emit = emit_render_compute_yield_klv(emit); + + count = emit - data; + if (count > 1) { + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); + + ret = xe_guc_ct_send_block(&guc->ct, data, count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC scheduling policies: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + +int xe_guc_submit_enable(struct xe_guc *guc) +{ + int ret; + + ret = guc_init_global_schedule_policy(guc); + if (ret) + return ret; + + guc->submission_state.enabled = true; + + return 0; +} + +void xe_guc_submit_disable(struct xe_guc *guc) +{ + guc->submission_state.enabled = false; +} + static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) { int i; @@ -558,10 +624,8 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) info.engine_submit_mask = q->logical_mask; info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; - - if (ctx_type != GUC_CONTEXT_NORMAL) - info.flags |= BIT(ctx_type); + info.flags = CONTEXT_REGISTRATION_FLAG_KMD | + FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); if (xe_exec_queue_is_parallel(q)) { u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); @@ -1355,48 +1419,57 @@ rearm: return DRM_GPU_SCHED_STAT_NO_HANG; } -static void __guc_exec_queue_fini_async(struct work_struct *w) +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_guc_exec_queue *ge = q->guc; + struct xe_guc *guc = exec_queue_to_guc(q); + + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); + + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); +} + +static void __guc_exec_queue_destroy_async(struct work_struct *w) { struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); + container_of(w, struct xe_guc_exec_queue, destroy_async); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - /* - * RCU free due sched being exported via DRM scheduler fences - * (timeline name). - */ - kfree_rcu(ge, rcu); xe_exec_queue_fini(q); + xe_pm_runtime_put(guc_to_xe(guc)); } -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); + __guc_exec_queue_destroy_async(&q->guc->destroy_async); else - queue_work(xe->destroy_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->destroy_async); } -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -1405,7 +1478,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_exec_queue_fini_async(q); + guc_exec_queue_destroy_async(q); } static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) @@ -1419,7 +1492,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) if (exec_queue_registered(q)) disable_scheduling_deregister(guc, q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) @@ -1652,14 +1725,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) +static void guc_exec_queue_destroy(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); } static int guc_exec_queue_set_priority(struct xe_exec_queue *q, @@ -1789,6 +1862,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .init = guc_exec_queue_init, .kill = guc_exec_queue_kill, .fini = guc_exec_queue_fini, + .destroy = guc_exec_queue_destroy, .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, @@ -1810,7 +1884,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -2029,7 +2103,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); + xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); return NULL; } @@ -2139,7 +2213,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) @@ -2528,7 +2602,7 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) } /** - * xe_guc_register_exec_queue - Register exec queue for a given context type. + * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. * @q: Execution queue * @ctx_type: Type of the context * @@ -2539,15 +2613,17 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) * * Returns - None. */ -void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type) +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); - xe_assert(xe, IS_SRIOV_VF(xe)); - xe_assert(xe, !IS_DGFX(xe)); - xe_assert(xe, (ctx_type > GUC_CONTEXT_NORMAL && - ctx_type < GUC_CONTEXT_COUNT)); + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + xe_gt_assert(gt, !IS_DGFX(xe)); + xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || + ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); + xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); register_exec_queue(q, ctx_type); enable_scheduling(q); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 6b5df5d0956b..78c3f07e31a0 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -13,6 +13,8 @@ struct xe_exec_queue; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); +int xe_guc_submit_enable(struct xe_guc *guc); +void xe_guc_submit_disable(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); @@ -46,7 +48,7 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); -void xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type); +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type); int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c deleted file mode 100644 index 57b71956ddf4..000000000000 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ /dev/null @@ -1,325 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <linux/scatterlist.h> -#include <linux/mmu_notifier.h> -#include <linux/dma-mapping.h> -#include <linux/memremap.h> -#include <linux/swap.h> -#include <linux/hmm.h> -#include <linux/mm.h> -#include "xe_hmm.h" -#include "xe_vm.h" -#include "xe_bo.h" - -static u64 xe_npages_in_range(unsigned long start, unsigned long end) -{ - return (end - start) >> PAGE_SHIFT; -} - -static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, - struct hmm_range *range, struct rw_semaphore *notifier_sem) -{ - unsigned long i, npages, hmm_pfn; - unsigned long num_chunks = 0; - int ret; - - /* HMM docs says this is needed. */ - ret = down_read_interruptible(notifier_sem); - if (ret) - return ret; - - if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { - up_read(notifier_sem); - return -EAGAIN; - } - - npages = xe_npages_in_range(range->start, range->end); - for (i = 0; i < npages;) { - unsigned long len; - - hmm_pfn = range->hmm_pfns[i]; - xe_assert(xe, hmm_pfn & HMM_PFN_VALID); - - len = 1UL << hmm_pfn_to_map_order(hmm_pfn); - - /* If order > 0 the page may extend beyond range->start */ - len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); - i += len; - num_chunks++; - } - up_read(notifier_sem); - - return sg_alloc_table(st, num_chunks, GFP_KERNEL); -} - -/** - * xe_build_sg() - build a scatter gather table for all the physical pages/pfn - * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table - * and will be used to program GPU page table later. - * @xe: the xe device who will access the dma-address in sg table - * @range: the hmm range that we build the sg table from. range->hmm_pfns[] - * has the pfn numbers of pages that back up this hmm address range. - * @st: pointer to the sg table. - * @notifier_sem: The xe notifier lock. - * @write: whether we write to this range. This decides dma map direction - * for system pages. If write we map it bi-diretional; otherwise - * DMA_TO_DEVICE - * - * All the contiguous pfns will be collapsed into one entry in - * the scatter gather table. This is for the purpose of efficiently - * programming GPU page table. - * - * The dma_address in the sg table will later be used by GPU to - * access memory. So if the memory is system memory, we need to - * do a dma-mapping so it can be accessed by GPU/DMA. - * - * FIXME: This function currently only support pages in system - * memory. If the memory is GPU local memory (of the GPU who - * is going to access memory), we need gpu dpa (device physical - * address), and there is no need of dma-mapping. This is TBD. - * - * FIXME: dma-mapping for peer gpu device to access remote gpu's - * memory. Add this when you support p2p - * - * This function allocates the storage of the sg table. It is - * caller's responsibility to free it calling sg_free_table. - * - * Returns 0 if successful; -ENOMEM if fails to allocate memory - */ -static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, - struct rw_semaphore *notifier_sem, - bool write) -{ - unsigned long npages = xe_npages_in_range(range->start, range->end); - struct device *dev = xe->drm.dev; - struct scatterlist *sgl; - struct page *page; - unsigned long i, j; - - lockdep_assert_held(notifier_sem); - - i = 0; - for_each_sg(st->sgl, sgl, st->nents, j) { - unsigned long hmm_pfn, size; - - hmm_pfn = range->hmm_pfns[i]; - page = hmm_pfn_to_page(hmm_pfn); - xe_assert(xe, !is_device_private_page(page)); - - size = 1UL << hmm_pfn_to_map_order(hmm_pfn); - size -= page_to_pfn(page) & (size - 1); - i += size; - - if (unlikely(j == st->nents - 1)) { - xe_assert(xe, i >= npages); - if (i > npages) - size -= (i - npages); - - sg_mark_end(sgl); - } else { - xe_assert(xe, i < npages); - } - - sg_set_page(sgl, page, size << PAGE_SHIFT, 0); - } - - return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); -} - -static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - lockdep_assert_held_write(&vm->lock); - lockdep_assert_held(&vm->userptr.notifier_lock); - - mutex_lock(&userptr->unmap_mutex); - xe_assert(vm->xe, !userptr->mapped); - userptr->mapped = true; - mutex_unlock(&userptr->unmap_mutex); -} - -void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && - !lockdep_is_held_type(&vm->lock, 0) && - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { - /* Don't unmap in exec critical section. */ - xe_vm_assert_held(vm); - /* Don't unmap while mapping the sg. */ - lockdep_assert_held(&vm->lock); - } - - mutex_lock(&userptr->unmap_mutex); - if (userptr->sg && userptr->mapped) - dma_unmap_sgtable(xe->drm.dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); - userptr->mapped = false; - mutex_unlock(&userptr->unmap_mutex); -} - -/** - * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * @uvma: the userptr vma which hold the scatter gather table - * - * With function xe_userptr_populate_range, we allocate storage of - * the userptr sg table. This is a helper function to free this - * sg table, and dma unmap the address in the table. - */ -void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - - xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); - xe_hmm_userptr_unmap(uvma); - sg_free_table(userptr->sg); - userptr->sg = NULL; -} - -/** - * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual - * address range - * - * @uvma: userptr vma which has information of the range to populate. - * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. - * - * This function populate the physical pages of a virtual - * address range. The populated physical pages is saved in - * userptr's sg table. It is similar to get_user_pages but call - * hmm_range_fault. - * - * This function also read mmu notifier sequence # ( - * mmu_interval_read_begin), for the purpose of later - * comparison (through mmu_interval_read_retry). - * - * This must be called with mmap read or write lock held. - * - * This function allocates the storage of the userptr sg table. - * It is caller's responsibility to free it calling sg_free_table. - * - * returns: 0 for success; negative error no on failure - */ -int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, - bool is_mm_mmap_locked) -{ - unsigned long timeout = - jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns; - struct xe_userptr *userptr; - struct xe_vma *vma = &uvma->vma; - u64 userptr_start = xe_vma_userptr(vma); - u64 userptr_end = userptr_start + xe_vma_size(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range = { - .pfn_flags_mask = 0, /* ignore pfns */ - .default_flags = HMM_PFN_REQ_FAULT, - .start = userptr_start, - .end = userptr_end, - .notifier = &uvma->userptr.notifier, - .dev_private_owner = vm->xe, - }; - bool write = !xe_vma_read_only(vma); - unsigned long notifier_seq; - u64 npages; - int ret; - - userptr = &uvma->userptr; - - if (is_mm_mmap_locked) - mmap_assert_locked(userptr->notifier.mm); - - if (vma->gpuva.flags & XE_VMA_DESTROYED) - return 0; - - notifier_seq = mmu_interval_read_begin(&userptr->notifier); - if (notifier_seq == userptr->notifier_seq) - return 0; - - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - npages = xe_npages_in_range(userptr_start, userptr_end); - pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) - return -ENOMEM; - - if (write) - hmm_range.default_flags |= HMM_PFN_REQ_WRITE; - - if (!mmget_not_zero(userptr->notifier.mm)) { - ret = -EFAULT; - goto free_pfns; - } - - hmm_range.hmm_pfns = pfns; - - while (true) { - hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); - - if (!is_mm_mmap_locked) - mmap_read_lock(userptr->notifier.mm); - - ret = hmm_range_fault(&hmm_range); - - if (!is_mm_mmap_locked) - mmap_read_unlock(userptr->notifier.mm); - - if (ret == -EBUSY) { - if (time_after(jiffies, timeout)) - break; - - continue; - } - break; - } - - mmput(userptr->notifier.mm); - - if (ret) - goto free_pfns; - - ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); - if (ret) - goto free_pfns; - - ret = down_read_interruptible(&vm->userptr.notifier_lock); - if (ret) - goto free_st; - - if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { - ret = -EAGAIN; - goto out_unlock; - } - - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, - &vm->userptr.notifier_lock, write); - if (ret) - goto out_unlock; - - userptr->sg = &userptr->sgt; - xe_hmm_userptr_set_mapped(uvma); - userptr->notifier_seq = hmm_range.notifier_seq; - up_read(&vm->userptr.notifier_lock); - kvfree(pfns); - return 0; - -out_unlock: - up_read(&vm->userptr.notifier_lock); -free_st: - sg_free_table(&userptr->sgt); -free_pfns: - kvfree(pfns); - return ret; -} diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h deleted file mode 100644 index 0ea98d8e7bbc..000000000000 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Copyright © 2024 Intel Corporation - */ - -#ifndef _XE_HMM_H_ -#define _XE_HMM_H_ - -#include <linux/types.h> - -struct xe_userptr_vma; - -int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); - -void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); - -void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); -#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 58bee3ffe881..fa4db5f23342 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -213,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group err = q->ops->suspend_wait(q); if (err) - goto err_suspend; + return err; } if (need_resume) xe_hw_engine_group_resume_faulting_lr_jobs(group); return 0; - -err_suspend: - up_write(&group->mode_sem); - return err; } /** diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 32a76ae6e9dc..b6790589e623 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -286,7 +286,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg */ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val = 0, min, max; + u32 reg_val = 0; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); @@ -294,7 +294,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)®_val); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -304,19 +304,21 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe /* Check if PL limits are disabled. */ if (!(reg_val & PWR_LIM_EN)) { *value = PL_DISABLE; - drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; /* For platforms with mailbox power limit support clamping would be done by pcode. */ if (!hwmon->xe->info.has_mbx_power_limits) { - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + u64 pkg_pwr, min, max; + + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); if (min && max) @@ -493,8 +495,8 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at { struct xe_hwmon *hwmon = dev_get_drvdata(dev); struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; + u32 reg_val, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; @@ -505,23 +507,24 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, ®_val); if (ret) { drm_err(&hwmon->xe->drm, - "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", - channel, power_attr, r, ret); - r = 0; + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", + channel, power_attr, reg_val, ret); + reg_val = 0; } } else { - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)); } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PWR_LIM_TIME_X, r); - y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); /* * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) @@ -1294,13 +1297,6 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } -static void xe_hwmon_mutex_destroy(void *arg) -{ - struct xe_hwmon *hwmon = arg; - - mutex_destroy(&hwmon->hwmon_lock); -} - int xe_hwmon_register(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -1319,8 +1315,7 @@ int xe_hwmon_register(struct xe_device *xe) if (!hwmon) return -ENOMEM; - mutex_init(&hwmon->hwmon_lock); - ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon); + ret = devm_mutex_init(dev, &hwmon->hwmon_lock); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 044dda517b7c..48dfcb41fa08 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -259,7 +259,7 @@ void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) return; if (d3cold) - xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c new file mode 100644 index 000000000000..768442ca7da6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/component.h> +#include <linux/delay.h> +#include <linux/firmware.h> + +#include <drm/drm_managed.h> +#include <drm/intel/i915_component.h> +#include <drm/intel/intel_lb_mei_interface.h> +#include <drm/drm_print.h> + +#include "xe_device.h" +#include "xe_late_bind_fw.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" +#include "xe_pm.h" + +/* + * The component should load quite quickly in most cases, but it could take + * a bit. Using a very big timeout just to cover the worst case scenario + */ +#define LB_INIT_TIMEOUT_MS 20000 + +/* + * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for + * other OS components to release the MEI CL handle + */ +#define LB_FW_LOAD_RETRY_MAXCOUNT 30 +#define LB_FW_LOAD_RETRY_PAUSE_MS 200 + +static const u32 fw_id_to_type[] = { + [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL, + }; + +static const char * const fw_id_to_name[] = { + [XE_LB_FW_FAN_CONTROL] = "fan_control", + }; + +static struct xe_device * +late_bind_to_xe(struct xe_late_bind *late_bind) +{ + return container_of(late_bind, struct xe_device, late_bind); +} + +static struct xe_device * +late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw) +{ + return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]); +} + +/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ +static int parse_cpd_header(struct xe_late_bind_fw *lb_fw, + const void *data, size_t size, const char *manifest_entry) +{ + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); + const struct gsc_cpd_header_v2 *header = data; + const struct gsc_manifest_header *manifest; + const struct gsc_cpd_entry *entry; + size_t min_size = sizeof(*header); + u32 offset = 0; + int i; + + /* manifest_entry is mandatory */ + xe_assert(xe, manifest_entry); + + if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER) + return -ENOENT; + + if (header->header_length < sizeof(struct gsc_cpd_header_v2)) { + drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n", + fw_id_to_name[lb_fw->id], header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries; + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + /* Look for the manifest first */ + entry = (void *)header + header->header_length; + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, manifest_entry) == 0) + offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK; + + if (!offset) { + drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n", + fw_id_to_name[lb_fw->id]); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_manifest_header); + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + manifest = data + offset; + + lb_fw->version = manifest->fw_version; + + return 0; +} + +/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ +static int parse_lb_layout(struct xe_late_bind_fw *lb_fw, + const void *data, size_t size, const char *fpt_entry) +{ + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); + const struct csc_fpt_header *header = data; + const struct csc_fpt_entry *entry; + size_t min_size = sizeof(*header); + u32 offset = 0; + int i; + + /* fpt_entry is mandatory */ + xe_assert(xe, fpt_entry); + + if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER) + return -ENOENT; + + if (header->header_length < sizeof(struct csc_fpt_header)) { + drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n", + fw_id_to_name[lb_fw->id], header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries; + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + /* Look for the cpd header first */ + entry = (void *)header + header->header_length; + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, fpt_entry) == 0) + offset = entry->offset; + + if (!offset) { + drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n", + fw_id_to_name[lb_fw->id]); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_cpd_header_v2); + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man"); +} + +static const char *xe_late_bind_parse_status(uint32_t status) +{ + switch (status) { + case INTEL_LB_STATUS_SUCCESS: + return "success"; + case INTEL_LB_STATUS_4ID_MISMATCH: + return "4Id Mismatch"; + case INTEL_LB_STATUS_ARB_FAILURE: + return "ARB Failure"; + case INTEL_LB_STATUS_GENERAL_ERROR: + return "General Error"; + case INTEL_LB_STATUS_INVALID_PARAMS: + return "Invalid Params"; + case INTEL_LB_STATUS_INVALID_SIGNATURE: + return "Invalid Signature"; + case INTEL_LB_STATUS_INVALID_PAYLOAD: + return "Invalid Payload"; + case INTEL_LB_STATUS_TIMEOUT: + return "Timeout"; + default: + return "Unknown error"; + } +} + +static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(root_tile, + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL); +} + +void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_late_bind_fw *lbfw; + int fw_id; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + lbfw = &late_bind->late_bind_fw[fw_id]; + if (lbfw->payload && late_bind->wq) { + drm_dbg(&xe->drm, "Flush work: load %s firmware\n", + fw_id_to_name[lbfw->id]); + flush_work(&lbfw->work); + } + } +} + +static void xe_late_bind_work(struct work_struct *work) +{ + struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work); + struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind, + late_bind_fw[lbfw->id]); + struct xe_device *xe = late_bind_to_xe(late_bind); + int retry = LB_FW_LOAD_RETRY_MAXCOUNT; + int ret; + int slept; + + xe_device_assert_mem_access(xe); + + /* we can queue this before the component is bound */ + for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) { + if (late_bind->component.ops) + break; + msleep(100); + } + + if (!late_bind->component.ops) { + drm_err(&xe->drm, "Late bind component not bound\n"); + /* Do not re-attempt fw load */ + drmm_kfree(&xe->drm, (void *)lbfw->payload); + lbfw->payload = NULL; + goto out; + } + + drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]); + + do { + ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev, + lbfw->type, + lbfw->flags, + lbfw->payload, + lbfw->payload_size); + if (!ret) + break; + msleep(LB_FW_LOAD_RETRY_PAUSE_MS); + } while (--retry && ret == -EBUSY); + + if (!ret) { + drm_dbg(&xe->drm, "Load %s firmware successful\n", + fw_id_to_name[lbfw->id]); + goto out; + } + + if (ret > 0) + drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n", + fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret)); + else + drm_err(&xe->drm, "Load %s firmware failed with err %d", + fw_id_to_name[lbfw->id], ret); + /* Do not re-attempt fw load */ + drmm_kfree(&xe->drm, (void *)lbfw->payload); + lbfw->payload = NULL; + +out: + xe_pm_runtime_put(xe); +} + +int xe_late_bind_fw_load(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_late_bind_fw *lbfw; + int fw_id; + + if (!late_bind->component_added) + return -ENODEV; + + if (late_bind->disable) + return 0; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + lbfw = &late_bind->late_bind_fw[fw_id]; + if (lbfw->payload) { + xe_pm_runtime_get_noresume(xe); + queue_work(late_bind->wq, &lbfw->work); + } + } + return 0; +} + +static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_late_bind_fw *lb_fw; + const struct firmware *fw; + u32 num_fans; + int ret; + + if (fw_id >= XE_LB_FW_MAX_ID) + return -EINVAL; + + lb_fw = &late_bind->late_bind_fw[fw_id]; + + lb_fw->id = fw_id; + lb_fw->type = fw_id_to_type[lb_fw->id]; + lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; + + if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { + ret = xe_late_bind_fw_num_fans(late_bind, &num_fans); + if (ret) { + drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret); + return 0; /* Not a fatal error, continue without fan control */ + } + drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); + if (!num_fans) + return 0; + } + + snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin", + fw_id_to_name[lb_fw->id], pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); + + drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path); + ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev); + if (ret) { + drm_dbg(&xe->drm, "%s late binding fw not available for current device", + fw_id_to_name[lb_fw->id]); + return 0; + } + + if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) { + drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n", + lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE); + release_firmware(fw); + return -ENODATA; + } + + ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES"); + if (ret) + return ret; + + lb_fw->payload_size = fw->size; + lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL); + if (!lb_fw->payload) { + release_firmware(fw); + return -ENOMEM; + } + + drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n", + fw_id_to_name[lb_fw->id], lb_fw->blob_path, + lb_fw->version.major, lb_fw->version.minor, + lb_fw->version.hotfix, lb_fw->version.build); + + memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size); + release_firmware(fw); + INIT_WORK(&lb_fw->work, xe_late_bind_work); + + return 0; +} + +static int xe_late_bind_fw_init(struct xe_late_bind *late_bind) +{ + int ret; + int fw_id; + + late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0); + if (!late_bind->wq) + return -ENOMEM; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + ret = __xe_late_bind_fw_init(late_bind, fw_id); + if (ret) + return ret; + } + + return 0; +} + +static int xe_late_bind_component_bind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe_device(xe_kdev); + struct xe_late_bind *late_bind = &xe->late_bind; + + late_bind->component.ops = data; + late_bind->component.mei_dev = mei_kdev; + + return 0; +} + +static void xe_late_bind_component_unbind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe_device(xe_kdev); + struct xe_late_bind *late_bind = &xe->late_bind; + + xe_late_bind_wait_for_worker_completion(late_bind); + + late_bind->component.ops = NULL; +} + +static const struct component_ops xe_late_bind_component_ops = { + .bind = xe_late_bind_component_bind, + .unbind = xe_late_bind_component_unbind, +}; + +static void xe_late_bind_remove(void *arg) +{ + struct xe_late_bind *late_bind = arg; + struct xe_device *xe = late_bind_to_xe(late_bind); + + xe_late_bind_wait_for_worker_completion(late_bind); + + late_bind->component_added = false; + + component_del(xe->drm.dev, &xe_late_bind_component_ops); + if (late_bind->wq) { + destroy_workqueue(late_bind->wq); + late_bind->wq = NULL; + } +} + +/** + * xe_late_bind_init() - add xe mei late binding component + * @late_bind: pointer to late bind structure. + * + * Return: 0 if the initialization was successful, a negative errno otherwise. + */ +int xe_late_bind_init(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + int err; + + if (!xe->info.has_late_bind) + return 0; + + if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) { + drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n"); + return 0; + } + + err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops, + INTEL_COMPONENT_LB); + if (err < 0) { + drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err)); + return err; + } + + late_bind->component_added = true; + + err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind); + if (err) + return err; + + err = xe_late_bind_fw_init(late_bind); + if (err) + return err; + + return xe_late_bind_fw_load(late_bind); +} diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h new file mode 100644 index 000000000000..07e437390539 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_LATE_BIND_FW_H_ +#define _XE_LATE_BIND_FW_H_ + +#include <linux/types.h> + +struct xe_late_bind; + +int xe_late_bind_init(struct xe_late_bind *late_bind); +int xe_late_bind_fw_load(struct xe_late_bind *late_bind); +void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind); + +#endif diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h new file mode 100644 index 000000000000..0f5da89ce98b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_LATE_BIND_TYPES_H_ +#define _XE_LATE_BIND_TYPES_H_ + +#include <linux/iosys-map.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include "xe_uc_fw_abi.h" + +#define XE_LB_MAX_PAYLOAD_SIZE SZ_4K + +/** + * xe_late_bind_fw_id - enum to determine late binding fw index + */ +enum xe_late_bind_fw_id { + XE_LB_FW_FAN_CONTROL = 0, + XE_LB_FW_MAX_ID +}; + +/** + * struct xe_late_bind_fw + */ +struct xe_late_bind_fw { + /** @id: firmware index */ + u32 id; + /** @blob_path: firmware binary path */ + char blob_path[PATH_MAX]; + /** @type: firmware type */ + u32 type; + /** @flags: firmware flags */ + u32 flags; + /** @payload: to store the late binding blob */ + const u8 *payload; + /** @payload_size: late binding blob payload_size */ + size_t payload_size; + /** @work: worker to upload latebind blob */ + struct work_struct work; + /** @version: late binding blob manifest version */ + struct gsc_version version; +}; + +/** + * struct xe_late_bind_component - Late Binding services component + * @mei_dev: device that provide Late Binding service. + * @ops: Ops implemented by Late Binding driver, used by Xe driver. + * + * Communication between Xe and MEI drivers for Late Binding services + */ +struct xe_late_bind_component { + struct device *mei_dev; + const struct intel_lb_component_ops *ops; +}; + +/** + * struct xe_late_bind + */ +struct xe_late_bind { + /** @component: struct for communication with mei component */ + struct xe_late_bind_component component; + /** @late_bind_fw: late binding firmware array */ + struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID]; + /** @wq: workqueue to submit request to download late bind blob */ + struct workqueue_struct *wq; + /** @component_added: whether the component has been added */ + bool component_added; + /** @disable: to block late binding reload during pm resume flow*/ + bool disable; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index f2bfbfa3efa1..62fc5a1a332d 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -67,12 +67,12 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level goto out; } - bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL, - PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * - lmtt->ops->lmtt_pte_num(level)), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_FLAG_NEEDS_64K); + bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), + PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_FLAG_NEEDS_64K, false); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 8f6c3ba47882..47e9df775072 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -8,6 +8,7 @@ #include <generated/xe_wa_oob.h> #include <linux/ascii85.h> +#include <linux/panic.h> #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" @@ -16,6 +17,7 @@ #include "regs/xe_lrc_layout.h" #include "xe_bb.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue_types.h" @@ -75,11 +77,17 @@ lrc_to_xe(struct xe_lrc *lrc) static bool gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) { + struct xe_device *xe = gt_to_xe(gt); + if (XE_GT_WA(gt, 16010904313) && (class == XE_ENGINE_CLASS_RENDER || class == XE_ENGINE_CLASS_COMPUTE)) return true; + if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + class, NULL)) + return true; + return false; } @@ -1102,6 +1110,64 @@ static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, return cmd - batch; } +static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + +static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_len) @@ -1203,6 +1269,7 @@ int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe { .setup = setup_timestamp_wa }, { .setup = setup_invalidate_state_cache_wa }, { .setup = setup_utilization_wa }, + { .setup = setup_configfs_post_ctx_restore_bb }, }; struct bo_setup_state state = { .lrc = lrc, @@ -1249,8 +1316,12 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) static int setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) { - static struct bo_setup rcs_funcs[] = { + static const struct bo_setup rcs_funcs[] = { { .setup = setup_timestamp_wa }, + { .setup = setup_configfs_mid_ctx_restore_bb }, + }; + static const struct bo_setup xcs_funcs[] = { + { .setup = setup_configfs_mid_ctx_restore_bb }, }; struct bo_setup_state state = { .lrc = lrc, @@ -1268,6 +1339,9 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) hwe->class == XE_ENGINE_CLASS_COMPUTE) { state.funcs = rcs_funcs; state.num_funcs = ARRAY_SIZE(rcs_funcs); + } else { + state.funcs = xcs_funcs; + state.num_funcs = ARRAY_SIZE(xcs_funcs); } if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) @@ -1294,14 +1368,15 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) finish_bo(&state); kfree(state.buffer); + /* + * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it + * varies per engine class, but the default is good enough + */ xe_lrc_write_ctx_reg(lrc, CTX_CS_INDIRECT_CTX, (xe_bo_ggtt_addr(lrc->bo) + state.offset) | /* Size in CLs. */ (state.written * sizeof(u32) / 64)); - xe_lrc_write_ctx_reg(lrc, - CTX_CS_INDIRECT_CTX_OFFSET, - CTX_INDIRECT_CTX_OFFSET_DEFAULT); return 0; } @@ -1340,9 +1415,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, - ttm_bo_type_kernel, - bo_flags); + lrc->bo = xe_bo_create_pin_map_novm(xe, tile, + bo_size, + ttm_bo_type_kernel, + bo_flags, false); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9643442ef101..1d667fa36cf3 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -35,6 +35,7 @@ #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_trace_bo.h" +#include "xe_validation.h" #include "xe_vm.h" #include "xe_vram.h" @@ -173,7 +174,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, } static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, - struct xe_vm *vm) + struct xe_vm *vm, struct drm_exec *exec) { struct xe_device *xe = tile_to_xe(tile); u16 pat_index = xe->pat.idx[XE_CACHE_WB]; @@ -200,7 +201,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PAGETABLE); + XE_BO_FLAG_PAGETABLE, exec); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -393,6 +394,24 @@ struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile) return m; } +static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_validation_ctx ctx; + struct drm_exec exec; + int err = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + err = xe_migrate_prepare_vm(tile, m, vm, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + } + + return err; +} + /** * xe_migrate_init() - Initialize a migrate context * @m: The migration context @@ -413,11 +432,9 @@ int xe_migrate_init(struct xe_migrate *m) if (IS_ERR(vm)) return PTR_ERR(vm); - xe_vm_lock(vm, false); - err = xe_migrate_prepare_vm(tile, m, vm); - xe_vm_unlock(vm); + err = xe_migrate_lock_prepare_vm(tile, m, vm); if (err) - goto err_out; + return err; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, @@ -842,11 +859,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0, &src_L0_ofs, &src_L0_pt, 0, 0, avail_pts); - - pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; - batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, - &dst_L0_ofs, &dst_L0_pt, 0, - avail_pts, avail_pts); + if (copy_only_ccs) { + dst_L0_ofs = src_L0_ofs; + } else { + pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; + batch_size += pte_update_size(m, pte_flags, dst, + &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, + 0, avail_pts, avail_pts); + } if (copy_system_ccs) { xe_assert(xe, type_device); @@ -876,7 +897,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); - else + else if (!copy_only_ccs) emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, &dst_it, src_L0, dst); @@ -908,7 +929,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (!fence) { err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); - if (!err && src_bo != dst_bo) + if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv) err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); if (err) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 9ca4a5ae1693..33f4ac82fc80 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -35,6 +35,10 @@ static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { static void xe_nvm_release_dev(struct device *dev) { + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); + + kfree(nvm); } static bool xe_nvm_non_posted_erase(struct xe_device *xe) @@ -162,6 +166,5 @@ void xe_nvm_fini(struct xe_device *xe) auxiliary_device_delete(&nvm->aux_dev); auxiliary_device_uninit(&nvm->aux_dev); - kfree(nvm); xe->nvm = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a188bad172ad..a4894eb0d7f3 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -883,9 +883,9 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, + size, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 046d330bad34..be91343829dd 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -334,6 +334,7 @@ static const struct xe_device_desc bmg_desc = { .has_mbx_power_limits = true, .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_late_bind = true, .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, @@ -510,6 +511,26 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, *revid = REG_FIELD_GET(GMD_ID_REVID, val); } +static const struct xe_ip *find_graphics_ip(unsigned int verx100) +{ + KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100); + + for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) + if (graphics_ips[i].verx100 == verx100) + return &graphics_ips[i]; + return NULL; +} + +static const struct xe_ip *find_media_ip(unsigned int verx100) +{ + KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100); + + for (int i = 0; i < ARRAY_SIZE(media_ips); i++) + if (media_ips[i].verx100 == verx100) + return &media_ips[i]; + return NULL; +} + /* * Read IP version from hardware and select graphics/media IP descriptors * based on the result. @@ -527,14 +548,7 @@ static void handle_gmdid(struct xe_device *xe, read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { - if (ver == graphics_ips[i].verx100) { - *graphics_ip = &graphics_ips[i]; - - break; - } - } - + *graphics_ip = find_graphics_ip(ver); if (!*graphics_ip) { drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", ver / 100, ver % 100); @@ -545,14 +559,7 @@ static void handle_gmdid(struct xe_device *xe, if (ver == 0) return; - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { - if (ver == media_ips[i].verx100) { - *media_ip = &media_ips[i]; - - break; - } - } - + *media_ip = find_media_ip(ver); if (!*media_ip) { drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", ver / 100, ver % 100); @@ -581,6 +588,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; + xe->info.has_late_bind = desc->has_late_bind; xe->info.has_llc = desc->has_llc; xe->info.has_pxp = desc->has_pxp; xe->info.has_sriov = desc->has_sriov; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index b63002fc0f67..9b9766a3baa3 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -39,6 +39,7 @@ struct xe_device_desc { u8 has_gsc_nvm:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; + u8 has_late_bind:1; u8 has_llc:1; u8 has_mbx_power_limits:1; u8 has_pxp:1; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 6eea4190bbd2..2c5a44377994 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -21,6 +21,7 @@ #include "xe_gt_idle.h" #include "xe_i2c.h" #include "xe_irq.h" +#include "xe_late_bind_fw.h" #include "xe_pcode.h" #include "xe_pxp.h" #include "xe_sriov_vf_ccs.h" @@ -129,6 +130,8 @@ int xe_pm_suspend(struct xe_device *xe) if (err) goto err; + xe_late_bind_wait_for_worker_completion(&xe->late_bind); + for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); @@ -198,7 +201,7 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; - xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_i2c_pm_resume(xe, true); xe_irq_resume(xe); @@ -213,9 +216,11 @@ int xe_pm_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); - if (IS_SRIOV_VF(xe)) + if (IS_VF_CCS_READY(xe)) xe_sriov_vf_ccs_register_context(xe); + xe_late_bind_fw_load(&xe->late_bind); + drm_dbg(&xe->drm, "Device resumed\n"); return 0; err: @@ -598,9 +603,12 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); - if (IS_SRIOV_VF(xe)) + if (IS_VF_CCS_READY(xe)) xe_sriov_vf_ccs_register_context(xe); + if (xe->d3cold.allowed) + xe_late_bind_fw_load(&xe->late_bind); + out: xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); diff --git a/drivers/gpu/drm/xe/xe_printk.h b/drivers/gpu/drm/xe/xe_printk.h new file mode 100644 index 000000000000..c5be2385aa95 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_printk.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PRINTK_H_ +#define _XE_PRINTK_H_ + +#include <drm/drm_print.h> + +#include "xe_device_types.h" + +#define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args + +#define xe_printk(_xe, _level, _fmt, ...) \ + drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_err(_xe, _fmt, ...) \ + xe_printk((_xe), err, _fmt, ##__VA_ARGS__) + +#define xe_err_once(_xe, _fmt, ...) \ + xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__) + +#define xe_err_ratelimited(_xe, _fmt, ...) \ + xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_warn(_xe, _fmt, ...) \ + xe_printk((_xe), warn, _fmt, ##__VA_ARGS__) + +#define xe_notice(_xe, _fmt, ...) \ + xe_printk((_xe), notice, _fmt, ##__VA_ARGS__) + +#define xe_info(_xe, _fmt, ...) \ + xe_printk((_xe), info, _fmt, ##__VA_ARGS__) + +#define xe_dbg(_xe, _fmt, ...) \ + xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__) + +#define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \ + drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__) + +#define xe_WARN(_xe, _condition, _fmt, ...) \ + xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \ + xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_WARN_ON(_xe, _condition) \ + xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) + +#define xe_WARN_ON_ONCE(_xe, _condition) \ + xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) + +static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + + xe_err(xe, "%pV", vaf); +} + +static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + + xe_info(xe, "%pV", vaf); +} + +static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + struct drm_printer ddp; + + /* + * The original xe_dbg() callsite annotations are useless here, + * redirect to the tweaked drm_dbg_printer() instead. + */ + ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); + ddp.origin = p->origin; + + drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf)); +} + +/** + * xe_err_printer - Construct a &drm_printer that outputs to xe_err() + * @xe: the &xe_device pointer to use in xe_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_err_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_err, + .arg = xe, + }; + return p; +} + +/** + * xe_info_printer - Construct a &drm_printer that outputs to xe_info() + * @xe: the &xe_device pointer to use in xe_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_info_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_info, + .arg = xe, + }; + return p; +} + +/** + * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg() + * @xe: the &xe_device pointer to use in xe_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_dbg_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_dbg, + .arg = xe, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c index a2c9ff5bfd59..45d142191d60 100644 --- a/drivers/gpu/drm/xe/xe_psmi.c +++ b/drivers/gpu/drm/xe/xe_psmi.c @@ -68,9 +68,7 @@ static void psmi_cleanup(struct xe_device *xe) static struct xe_bo *psmi_alloc_object(struct xe_device *xe, unsigned int id, size_t bo_size) { - struct xe_bo *bo = NULL; struct xe_tile *tile; - int err; if (!id || !bo_size) return NULL; @@ -78,22 +76,12 @@ static struct xe_bo *psmi_alloc_object(struct xe_device *xe, tile = &xe->tiles[id - 1]; /* VRAM: Allocate GEM object for the capture buffer */ - bo = xe_bo_create_locked(xe, tile, NULL, bo_size, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_PINNED_LATE_RESTORE | - XE_BO_FLAG_NEEDS_CPU_ACCESS); - - if (!IS_ERR(bo)) { - /* Buffer written by HW, ensure stays resident */ - err = xe_bo_pin(bo); - if (err) - bo = ERR_PTR(err); - xe_bo_unlock(bo); - } - - return bo; + return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_NEEDS_CPU_ACCESS); } /* diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c129048a9a09..a1c88f9a6c76 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -13,17 +13,17 @@ #include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt.h" -#include "xe_tlb_inval_job.h" #include "xe_migrate.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" #include "xe_res_cursor.h" #include "xe_sched_job.h" -#include "xe_sync.h" #include "xe_svm.h" +#include "xe_sync.h" #include "xe_tlb_inval_job.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" +#include "xe_userptr.h" #include "xe_vm.h" struct xe_pt_dir { @@ -89,6 +89,7 @@ static void xe_pt_free(struct xe_pt *pt) * @vm: The vm to create for. * @tile: The tile to create for. * @level: The page-table level. + * @exec: The drm_exec object used to lock the vm. * * Allocate and initialize a single struct xe_pt metadata structure. Also * create the corresponding page-table bo, but don't initialize it. If the @@ -100,7 +101,7 @@ static void xe_pt_free(struct xe_pt *pt) * error. */ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) + unsigned int level, struct drm_exec *exec) { struct xe_pt *pt; struct xe_bo *bo; @@ -124,9 +125,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; pt->level = level; + + drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec)); bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - bo_flags); + bo_flags, exec); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; @@ -590,7 +593,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (covers || !*child) { u64 flags = 0; - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); + xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1, + xe_vm_validation_exec(vm)); if (IS_ERR(xe_child)) return PTR_ERR(xe_child); @@ -729,7 +733,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, return -EAGAIN; } if (xe_svm_range_has_dma_mapping(range)) { - xe_res_first_dma(range->base.dma_addr, 0, + xe_res_first_dma(range->base.pages.dma_addr, 0, range->base.itree.last + 1 - range->base.itree.start, &curs); xe_svm_range_debug(range, "BIND PREPARE - MIXED"); @@ -760,8 +764,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma) && !range) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); + xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0, + xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); @@ -914,7 +918,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) if (xe_vma_bo(vma)) xe_bo_assert_held(xe_vma_bo(vma)); else if (xe_vma_is_userptr(vma)) - lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); + lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock); if (!(pt_mask & BIT(tile->id))) return false; @@ -1049,7 +1053,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_pt_commit_prepare_locks_assert(vma); if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); } static void xe_pt_commit(struct xe_vma *vma, @@ -1376,6 +1380,7 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) pt_update_ops, rftree); } +#if IS_ENABLED(CONFIG_DRM_GPUSVM) #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) @@ -1406,7 +1411,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, struct xe_userptr_vma *uvma; unsigned long notifier_seq; - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); if (!xe_vma_is_userptr(vma)) return 0; @@ -1415,7 +1420,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, if (xe_pt_userptr_inject_eagain(uvma)) xe_vma_userptr_force_invalidate(uvma); - notifier_seq = uvma->userptr.notifier_seq; + notifier_seq = uvma->userptr.pages.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, notifier_seq)) @@ -1431,12 +1436,12 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; } -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) +static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_vm_pgtable_update_ops *pt_update) { int err = 0; - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -1454,9 +1459,40 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, case DRM_GPUVA_OP_UNMAP: break; case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); + if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) { + struct xe_svm_range *range = op->map_range.range; + unsigned long i; + + xe_assert(vm->xe, + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); + xa_for_each(&op->prefetch_range.range, i, range) { + xe_svm_range_debug(range, "PRE-COMMIT"); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + return -ENODATA; + } + } + } else { + err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update); + } + break; +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) + case DRM_GPUVA_OP_DRIVER: + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { + struct xe_svm_range *range = op->map_range.range; + + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); + + xe_svm_range_debug(range, "PRE-COMMIT"); + + if (!xe_svm_range_pages_valid(range)) { + xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); + return -EAGAIN; + } + } break; +#endif default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } @@ -1464,7 +1500,7 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, return err; } -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) +static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_vm *vm = pt_update->vops->vm; struct xe_vma_ops *vops = pt_update->vops; @@ -1477,69 +1513,18 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) if (err) return err; - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); + err = op_check_svm_userptr(vm, op, pt_update_ops); if (err) { - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); break; } } return err; } - -#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) -static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vma_op *op; - unsigned long i; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - xe_svm_notifier_lock(vm); - - list_for_each_entry(op, &vops->list, link) { - struct xe_svm_range *range = NULL; - - if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) - continue; - - if (op->base.op == DRM_GPUVA_OP_PREFETCH) { - xe_assert(vm->xe, - xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); - xa_for_each(&op->prefetch_range.range, i, range) { - xe_svm_range_debug(range, "PRE-COMMIT"); - - if (!xe_svm_range_pages_valid(range)) { - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); - return -ENODATA; - } - } - } else { - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); - range = op->map_range.range; - - xe_svm_range_debug(range, "PRE-COMMIT"); - - if (!xe_svm_range_pages_valid(range)) { - xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); - return -EAGAIN; - } - } - } - - return 0; -} #endif struct xe_pt_stage_unbind_walk { @@ -1843,7 +1828,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, xe_vma_start(vma), xe_vma_end(vma)); ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); /* * If rebind, we have to invalidate TLB on !LR vms to invalidate @@ -1951,7 +1936,7 @@ static int unbind_op_prepare(struct xe_tile *tile, xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma), xe_vma_end(vma)); ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); pt_update_ops->needs_invalidation = true; xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); @@ -2199,7 +2184,7 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, vma->tile_invalidated & ~BIT(tile->id)); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); to_userptr_vma(vma)->userptr.initial_bind = true; } @@ -2235,7 +2220,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); spin_lock(&vm->userptr.invalidated_lock); list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); @@ -2338,20 +2323,14 @@ static const struct xe_migrate_pt_update_ops migrate_ops = { .pre_commit = xe_pt_pre_commit, }; -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) -static const struct xe_migrate_pt_update_ops svm_migrate_ops = { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) +static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = { .populate = xe_vm_populate_pgtable, .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_svm_pre_commit, + .pre_commit = xe_pt_svm_userptr_pre_commit, }; #else -static const struct xe_migrate_pt_update_ops svm_migrate_ops; +static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops; #endif static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q, @@ -2389,9 +2368,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) int err = 0, i; struct xe_migrate_pt_update update = { .ops = pt_update_ops->needs_svm_lock ? - &svm_migrate_ops : - pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : + &svm_userptr_migrate_ops : &migrate_ops, .vops = vops, .tile_id = tile->id, @@ -2533,8 +2510,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) if (pt_update_ops->needs_svm_lock) xe_svm_notifier_unlock(vm); - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); xe_tlb_inval_job_put(mjob); xe_tlb_inval_job_put(ijob); diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 5ecf003d513c..4daeebaab5a1 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -10,6 +10,7 @@ #include "xe_pt_types.h" struct dma_fence; +struct drm_exec; struct xe_bo; struct xe_device; struct xe_exec_queue; @@ -29,7 +30,7 @@ struct xe_vma_ops; unsigned int xe_pt_shift(unsigned int level); struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level); + unsigned int level, struct drm_exec *exec); void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 17cdd7c7e9f5..881f01e14db8 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -105,8 +105,6 @@ struct xe_vm_pgtable_update_ops { u32 current_op; /** @needs_svm_lock: Needs SVM lock */ bool needs_svm_lock; - /** @needs_userptr_lock: Needs userptr lock */ - bool needs_userptr_lock; /** @needs_invalidation: Needs invalidation */ bool needs_invalidation; /** diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 3d62008c99f1..bdbdbbf6a678 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -688,6 +688,7 @@ start: return ret; } +ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO); static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock) { diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index ca95f2a4d4ef..e60526e30030 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -54,8 +54,9 @@ static int allocate_vcs_execution_resources(struct xe_pxp *pxp) * Each termination is 16 DWORDS, so 4K is enough to contain a * termination for each sessions. */ - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT, + false); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_queue; @@ -87,7 +88,9 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, { struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = tile_to_xe(tile); + struct xe_validation_ctx ctx; struct xe_hw_engine *hwe; + struct drm_exec exec; struct xe_vm *vm; struct xe_bo *bo; struct xe_exec_queue *q; @@ -106,15 +109,26 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, return PTR_ERR(vm); /* We allocate a single object for the batch and the in/out memory */ - xe_vm_lock(vm, false); - bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC); - xe_vm_unlock(vm); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto vm_out; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + if (err) + break; + + bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | + XE_BO_FLAG_NEEDS_UC, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } } + if (err) + goto vm_out; fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB); if (IS_ERR(fence)) { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4dbe5732cb7f..2e9ff33ed2fe 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -21,6 +21,7 @@ #include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_gt_topology.h" #include "xe_guc_hwconfig.h" #include "xe_macros.h" #include "xe_mmio.h" @@ -275,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[0].instance = 0; mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; - if (perfmon_capable()) - mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); mem_regions->num_mem_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { @@ -292,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = man->size; - if (perfmon_capable()) { - xe_ttm_vram_get_used(man, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].used, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].cpu_visible_used); - } + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = xe_ttm_vram_get_cpu_visible_size(man); @@ -477,7 +475,7 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss); /* L3bank mask may not be available for some GTs */ - if (!XE_GT_WA(gt, no_media_l3)) + if (xe_gt_topology_report_l3(gt)) query_size += sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask); } @@ -540,7 +538,7 @@ static int query_gt_topology(struct xe_device *xe, * mask, then it's better to omit L3 from the query rather than * reporting bogus or zeroed information to userspace. */ - if (!XE_GT_WA(gt, no_media_l3)) { + if (xe_gt_topology_report_l3(gt)) { topo.type = DRM_XE_TOPO_L3_BANK; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, sizeof(gt->fuse_topo.l3_bank_mask)); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 47ea1521dc80..b5f430d59f80 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -370,3 +370,9 @@ bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, { return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev)); } + +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_gt_has_discontiguous_dss_groups(gt); +} diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 7951fefdbe04..ac12ddf6cde6 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -480,4 +480,7 @@ bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 87911fb4eea7..7d2d6de2aabf 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -160,19 +160,15 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) } /** - * xe_sriov_late_init() - SR-IOV late initialization functions. + * xe_sriov_init_late() - SR-IOV late initialization functions. * @xe: the &xe_device to initialize * - * On VF this function will initialize code for CCS migration. - * * Return: 0 on success or a negative error code on failure. */ -int xe_sriov_late_init(struct xe_device *xe) +int xe_sriov_init_late(struct xe_device *xe) { - int err = 0; - - if (IS_VF_CCS_INIT_NEEDED(xe)) - err = xe_sriov_vf_ccs_init(xe); + if (IS_SRIOV_VF(xe)) + return xe_sriov_vf_init_late(xe); - return err; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 0e0c1abf2d14..6db45df55615 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -18,7 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len); void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); -int xe_sriov_late_init(struct xe_device *xe); +int xe_sriov_init_late(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 5de81f213d83..cdd9f8e78b2a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -3,6 +3,7 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" @@ -10,6 +11,7 @@ #include "xe_gt.h" #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" +#include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_irq.h" @@ -18,6 +20,7 @@ #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" #include "xe_tile_sriov_vf.h" /** @@ -127,16 +130,66 @@ * | | | */ -static bool vf_migration_supported(struct xe_device *xe) +/** + * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is + * supported or not. + * @xe: the &xe_device to check + * + * Returns: true if VF migration is supported, false otherwise. + */ +bool xe_sriov_vf_migration_supported(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + return xe->sriov.vf.migration.enabled; +} + +static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) +{ + struct va_format vaf; + va_list va_args; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + va_start(va_args, fmt); + vaf.fmt = fmt; + vaf.va = &va_args; + xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); + va_end(va_args); + + xe->sriov.vf.migration.enabled = false; +} + +static void migration_worker_func(struct work_struct *w); + +static void vf_migration_init_early(struct xe_device *xe) { /* * TODO: Add conditions to allow specific platforms, when they're * supported at production quality. */ - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); -} + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + return vf_disable_migration(xe, + "experimental feature not available on production builds"); + + if (GRAPHICS_VER(xe) < 20) + return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", + GRAPHICS_VER(xe)); + + if (!IS_DGFX(xe)) { + struct xe_uc_fw_version guc_version; + + xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) + return vf_disable_migration(xe, + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", + guc_version.major, guc_version.minor); + } -static void migration_worker_func(struct work_struct *w); + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); + + xe->sriov.vf.migration.enabled = true; + xe_sriov_dbg(xe, "migration support enabled\n"); +} /** * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. @@ -144,10 +197,7 @@ static void migration_worker_func(struct work_struct *w); */ void xe_sriov_vf_init_early(struct xe_device *xe) { - INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); - - if (!vf_migration_supported(xe)) - xe_sriov_info(xe, "migration not supported by this module version\n"); + vf_migration_init_early(xe); } /** @@ -302,8 +352,8 @@ static void vf_post_migration_recovery(struct xe_device *xe) xe_pm_runtime_get(xe); vf_post_migration_shutdown(xe); - if (!vf_migration_supported(xe)) { - xe_sriov_err(xe, "migration not supported by this module version\n"); + if (!xe_sriov_vf_migration_supported(xe)) { + xe_sriov_err(xe, "migration is not supported\n"); err = -ENOTRECOVERABLE; goto fail; } @@ -378,3 +428,48 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); } + +/** + * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. + * @xe: the &xe_device to initialize + * + * This function initializes code for CCS migration. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vf_init_late(struct xe_device *xe) +{ + int err = 0; + + if (xe_sriov_vf_migration_supported(xe)) + err = xe_sriov_vf_ccs_init(xe); + + return err; +} + +static int sa_info_vf_ccs(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct xe_device *xe = to_xe_device(node->minor->dev); + struct drm_printer p = drm_seq_file_printer(m); + + xe_sriov_vf_ccs_print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs }, +}; + +/** + * xe_sriov_vf_debugfs_register - Register VF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the VF. + */ +void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), + root, xe->drm.primary); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h index 7b8622cff2b7..9e752105ec2a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -6,9 +6,15 @@ #ifndef _XE_SRIOV_VF_H_ #define _XE_SRIOV_VF_H_ +#include <linux/types.h> + +struct dentry; struct xe_device; void xe_sriov_vf_init_early(struct xe_device *xe); +int xe_sriov_vf_init_late(struct xe_device *xe); void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); +bool xe_sriov_vf_migration_supported(struct xe_device *xe); +void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 4872e43eb440..8dec616c37c9 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -13,8 +13,10 @@ #include "xe_guc_submit.h" #include "xe_lrc.h" #include "xe_migrate.h" +#include "xe_pm.h" #include "xe_sa.h" #include "xe_sriov_printk.h" +#include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" #include "xe_sriov_vf_ccs_types.h" @@ -135,7 +137,7 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe) return round_up(bb_pool_size * 2, SZ_1M); } -static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx) +static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) { struct xe_device *xe = tile_to_xe(tile); struct xe_sa_manager *sa_manager; @@ -167,7 +169,7 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx) return 0; } -static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx) +static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) { u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); @@ -184,9 +186,8 @@ static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx) xe_lrc_set_ring_tail(lrc, lrc->ring.tail); } -static int register_save_restore_context(struct xe_tile_vf_ccs *ctx) +static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx) { - int err = -EINVAL; int ctx_type; switch (ctx->ctx_id) { @@ -197,10 +198,10 @@ static int register_save_restore_context(struct xe_tile_vf_ccs *ctx) ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; break; default: - return err; + return -EINVAL; } - xe_guc_register_exec_queue(ctx->mig_q, ctx_type); + xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type); return 0; } @@ -215,16 +216,14 @@ static int register_save_restore_context(struct xe_tile_vf_ccs *ctx) */ int xe_sriov_vf_ccs_register_context(struct xe_device *xe) { - struct xe_tile *tile = xe_device_get_root_tile(xe); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_tile_vf_ccs *ctx; + struct xe_sriov_vf_ccs_ctx *ctx; int err; - if (!IS_VF_CCS_READY(xe)) - return 0; + xe_assert(xe, IS_VF_CCS_READY(xe)); for_each_ccs_rw_ctx(ctx_id) { - ctx = &tile->sriov.vf.ccs[ctx_id]; + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; err = register_save_restore_context(ctx); if (err) return err; @@ -235,7 +234,7 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe) static void xe_sriov_vf_ccs_fini(void *arg) { - struct xe_tile_vf_ccs *ctx = arg; + struct xe_sriov_vf_ccs_ctx *ctx = arg; struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); /* @@ -259,17 +258,19 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe) { struct xe_tile *tile = xe_device_get_root_tile(xe); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_tile_vf_ccs *ctx; + struct xe_sriov_vf_ccs_ctx *ctx; struct xe_exec_queue *q; u32 flags; int err; xe_assert(xe, IS_SRIOV_VF(xe)); - xe_assert(xe, !IS_DGFX(xe)); - xe_assert(xe, xe_device_has_flat_ccs(xe)); + xe_assert(xe, xe_sriov_vf_migration_supported(xe)); + + if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe)) + return 0; for_each_ccs_rw_ctx(ctx_id) { - ctx = &tile->sriov.vf.ccs[ctx_id]; + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; ctx->ctx_id = ctx_id; flags = EXEC_QUEUE_FLAG_KERNEL | @@ -324,13 +325,12 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_tile_vf_ccs *ctx; + struct xe_sriov_vf_ccs_ctx *ctx; struct xe_tile *tile; struct xe_bb *bb; int err = 0; - if (!IS_VF_CCS_READY(xe)) - return 0; + xe_assert(xe, IS_VF_CCS_READY(xe)); tile = xe_device_get_root_tile(xe); @@ -339,7 +339,7 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) /* bb should be NULL here. Assert if not NULL */ xe_assert(xe, !bb); - ctx = &tile->sriov.vf.ccs[ctx_id]; + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); } return err; @@ -361,7 +361,9 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) enum xe_sriov_vf_ccs_rw_ctxs ctx_id; struct xe_bb *bb; - if (!IS_VF_CCS_READY(xe)) + xe_assert(xe, IS_VF_CCS_READY(xe)); + + if (!xe_bo_has_valid_ccs_bb(bo)) return 0; for_each_ccs_rw_ctx(ctx_id) { @@ -375,3 +377,34 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) } return 0; } + +/** + * xe_sriov_vf_ccs_print - Print VF CCS details. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for VF use only. + */ +void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) +{ + struct xe_sa_manager *bb_pool; + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + if (!IS_VF_CCS_READY(xe)) + return; + + xe_pm_runtime_get(xe); + + for_each_ccs_rw_ctx(ctx_id) { + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; + if (!bb_pool) + break; + + drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); + drm_printf(p, "-------------------------\n"); + drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + drm_puts(p, "\n"); + } + + xe_pm_runtime_put(xe); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h index 1f1baf685fec..0745c0ff0228 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -6,6 +6,11 @@ #ifndef _XE_SRIOV_VF_CCS_H_ #define _XE_SRIOV_VF_CCS_H_ +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_vf_ccs_types.h" + +struct drm_printer; struct xe_device; struct xe_bo; @@ -13,5 +18,17 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe); int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo); int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); int xe_sriov_vf_ccs_register_context(struct xe_device *xe); +void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); + +static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + return xe->sriov.vf.ccs.initialized; +} + +#define IS_VF_CCS_READY(xe) ({ \ + struct xe_device *xe__ = (xe); \ + IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \ + }) #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h index 93435a6f4cb6..22c499943d2a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -6,48 +6,46 @@ #ifndef _XE_SRIOV_VF_CCS_TYPES_H_ #define _XE_SRIOV_VF_CCS_TYPES_H_ +#include <linux/types.h> + #define for_each_ccs_rw_ctx(id__) \ for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++) -#define IS_VF_CCS_READY(xe) ({ \ - struct xe_device *___xe = (xe); \ - xe_assert(___xe, IS_SRIOV_VF(___xe)); \ - ___xe->sriov.vf.ccs.initialized; \ - }) - -#define IS_VF_CCS_INIT_NEEDED(xe) ({\ - struct xe_device *___xe = (xe); \ - IS_SRIOV_VF(___xe) && !IS_DGFX(___xe) && \ - xe_device_has_flat_ccs(___xe) && GRAPHICS_VER(___xe) >= 20; \ - }) - enum xe_sriov_vf_ccs_rw_ctxs { XE_SRIOV_VF_CCS_READ_CTX, XE_SRIOV_VF_CCS_WRITE_CTX, XE_SRIOV_VF_CCS_CTX_COUNT }; -#define IS_VF_CCS_BB_VALID(xe, bo) ({ \ - struct xe_device *___xe = (xe); \ - struct xe_bo *___bo = (bo); \ - IS_SRIOV_VF(___xe) && \ - ___bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && \ - ___bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; \ - }) - struct xe_migrate; struct xe_sa_manager; -struct xe_tile_vf_ccs { - /** @id: Id to which context it belongs to */ +/** + * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. + */ +struct xe_sriov_vf_ccs_ctx { + /** @ctx_id: Id to which context it belongs to */ enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + /** @mig_q: exec queues used for migration */ struct xe_exec_queue *mig_q; + /** @mem: memory data */ struct { - /** @ccs_bb_pool: Pool from which batch buffers are allocated. */ + /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ struct xe_sa_manager *ccs_bb_pool; } mem; }; +/** + * struct xe_sriov_vf_ccs - The VF CCS migration support data. + */ +struct xe_sriov_vf_ccs { + /** @contexts: CCS read and write contexts for VF. */ + struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT]; + + /** @initialized: Initialization of VF CCS is completed or not. */ + bool initialized; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h index 24a873c50c49..426cc5841958 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -9,6 +9,8 @@ #include <linux/types.h> #include <linux/workqueue_types.h> +#include "xe_sriov_vf_ccs_types.h" + /** * struct xe_sriov_vf_relay_version - PF ABI version details. */ @@ -35,13 +37,15 @@ struct xe_device_vf { struct work_struct worker; /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ unsigned long gt_flags; + /** + * @migration.enabled: flag indicating if migration support + * was enabled or not due to missing prerequisites + */ + bool enabled; } migration; /** @ccs: VF CCS state data */ - struct { - /** @ccs.initialized: Initilalization of VF CCS is completed or not */ - bool initialized; - } ccs; + struct xe_sriov_vf_ccs ccs; }; #endif diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 7999cc5262a5..1662bfddd4bc 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -289,19 +289,10 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) u32 data; bool survivability_mode; - if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) return false; survivability_mode = xe_configfs_get_survivability_mode(pdev); - - if (xe->info.platform < XE_BATTLEMAGE) { - if (survivability_mode) { - dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n"); - xe_configfs_clear_survivability_mode(pdev); - } - return false; - } - /* Enable survivability mode if set via configfs */ if (survivability_mode) return true; diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 76c6d74c1208..7e2db71ff34e 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -6,6 +6,7 @@ #include <drm/drm_drv.h> #include "xe_bo.h" +#include "xe_exec_queue_types.h" #include "xe_gt_stats.h" #include "xe_migrate.h" #include "xe_module.h" @@ -25,9 +26,9 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) * memory. */ - struct drm_gpusvm_range_flags flags = { + struct drm_gpusvm_pages_flags flags = { /* Pairs with WRITE_ONCE in drm_gpusvm.c */ - .__flags = READ_ONCE(range->base.flags.__flags), + .__flags = READ_ONCE(range->base.pages.flags.__flags), }; return flags.has_devmem_pages; @@ -49,15 +50,15 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) return gpusvm_to_vm(r->gpusvm); } -#define range_debug(r__, operaton__) \ +#define range_debug(r__, operation__) \ vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ "start=0x%014lx, end=0x%014lx, size=%lu", \ - (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ + (operation__), range_to_vm(&(r__)->base)->usm.asid, \ (r__)->base.gpusvm, \ xe_svm_range_in_vram((r__)) ? 1 : 0, \ xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ - (r__)->base.notifier_seq, \ + (r__)->base.pages.notifier_seq, \ xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ xe_svm_range_size((r__))) @@ -66,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) range_debug(range, operation); } -static void *xe_svm_devm_owner(struct xe_device *xe) -{ - return xe; -} - static struct drm_gpusvm_range * xe_svm_range_alloc(struct drm_gpusvm *gpusvm) { @@ -112,6 +108,11 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, &vm->svm.garbage_collector.work); } +static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) +{ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1); +} + static u8 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, const struct mmu_notifier_range *mmu_range, @@ -128,7 +129,7 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, range_debug(range, "NOTIFIER"); /* Skip if already unmapped or if no binding exist */ - if (range->base.flags.unmapped || !range->tile_present) + if (range->base.pages.flags.unmapped || !range->tile_present) return 0; range_debug(range, "NOTIFIER - EXECUTE"); @@ -144,13 +145,19 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, */ for_each_tile(tile, xe, id) if (xe_pt_zap_ptes_range(tile, vm, range)) { - tile_mask |= BIT(id); /* * WRITE_ONCE pairs with READ_ONCE in * xe_vm_has_valid_gpu_mapping() */ WRITE_ONCE(range->tile_invalidated, range->tile_invalidated | BIT(id)); + + if (!(tile_mask & BIT(id))) { + xe_svm_tlb_inval_count_stats_incr(tile->primary_gt); + if (tile->media_gt) + xe_svm_tlb_inval_count_stats_incr(tile->media_gt); + tile_mask |= BIT(id); + } } return tile_mask; @@ -170,6 +177,24 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, mmu_range); } +static s64 xe_svm_stats_ktime_us_delta(ktime_t start) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? + ktime_us_delta(ktime_get(), start) : 0; +} + +static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) +{ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); +} + +static ktime_t xe_svm_stats_ktime_get(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0; +} + static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, const struct mmu_notifier_range *mmu_range) @@ -177,8 +202,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct xe_vm *vm = gpusvm_to_vm(gpusvm); struct xe_device *xe = vm->xe; struct drm_gpusvm_range *r, *first; + struct xe_tile *tile; + ktime_t start = xe_svm_stats_ktime_get(); u64 adj_start = mmu_range->start, adj_end = mmu_range->end; - u8 tile_mask = 0; + u8 tile_mask = 0, id; long err; xe_svm_assert_in_notifier(vm); @@ -231,6 +258,13 @@ range_notifier_event_end: r = first; drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) xe_svm_range_notifier_event_end(vm, r, mmu_range); + for_each_tile(tile, xe, id) { + if (tile_mask & BIT(id)) { + xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start); + if (tile->media_gt) + xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start); + } + } } static int __xe_svm_garbage_collector(struct xe_vm *vm, @@ -308,8 +342,8 @@ static int xe_svm_garbage_collector(struct xe_vm *vm) if (xe_vm_is_closed_or_banned(vm)) return -ENOENT; - spin_lock(&vm->svm.garbage_collector.lock); for (;;) { + spin_lock(&vm->svm.garbage_collector.lock); range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, typeof(*range), garbage_collector_link); @@ -338,8 +372,6 @@ static int xe_svm_garbage_collector(struct xe_vm *vm) else return err; } - - spin_lock(&vm->svm.garbage_collector.lock); } spin_unlock(&vm->svm.garbage_collector.lock); @@ -384,11 +416,66 @@ enum xe_svm_copy_dir { XE_SVM_COPY_TO_SRAM, }; +static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, + const enum xe_svm_copy_dir dir, + int kb) +{ + if (dir == XE_SVM_COPY_TO_VRAM) + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); + else + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); +} + +static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, + const enum xe_svm_copy_dir dir, + unsigned long npages, + ktime_t start) +{ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); + + if (dir == XE_SVM_COPY_TO_VRAM) { + switch (npages) { + case 1: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, + us_delta); + break; + case 16: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, + us_delta); + break; + case 512: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, + us_delta); + break; + } + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US, + us_delta); + } else { + switch (npages) { + case 1: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, + us_delta); + break; + case 16: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, + us_delta); + break; + case 512: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, + us_delta); + break; + } + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US, + us_delta); + } +} + static int xe_svm_copy(struct page **pages, struct drm_pagemap_addr *pagemap_addr, unsigned long npages, const enum xe_svm_copy_dir dir) { struct xe_vram_region *vr = NULL; + struct xe_gt *gt = NULL; struct xe_device *xe; struct dma_fence *fence = NULL; unsigned long i; @@ -396,6 +483,7 @@ static int xe_svm_copy(struct page **pages, u64 vram_addr = XE_VRAM_ADDR_INVALID; int err = 0, pos = 0; bool sram = dir == XE_SVM_COPY_TO_SRAM; + ktime_t start = xe_svm_stats_ktime_get(); /* * This flow is complex: it locates physically contiguous device pages, @@ -422,6 +510,7 @@ static int xe_svm_copy(struct page **pages, if (!vr && spage) { vr = page_to_vr(spage); + gt = xe_migrate_exec_queue(vr->migrate)->gt; xe = vr->xe; } XE_WARN_ON(spage && page_to_vr(spage) != vr); @@ -461,6 +550,9 @@ static int xe_svm_copy(struct page **pages, int incr = (match && last) ? 1 : 0; if (vram_addr != XE_VRAM_ADDR_INVALID) { + xe_svm_copy_kb_stats_incr(gt, dir, + (i - pos + incr) * + (PAGE_SIZE / SZ_1K)); if (sram) { vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", @@ -499,6 +591,8 @@ static int xe_svm_copy(struct page **pages, /* Extra mismatched device page, copy it */ if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { + xe_svm_copy_kb_stats_incr(gt, dir, + (PAGE_SIZE / SZ_1K)); if (sram) { vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", @@ -532,6 +626,14 @@ err_out: dma_fence_put(fence); } + /* + * XXX: We can't derive the GT here (or anywhere in this functions, but + * compute always uses the primary GT so accumlate stats on the likely + * GT of the fault. + */ + if (gt) + xe_svm_copy_us_stats_incr(gt, dir, npages, start); + return err; #undef XE_MIGRATE_CHUNK_SIZE #undef XE_VRAM_ADDR_INVALID @@ -630,22 +732,25 @@ int xe_svm_init(struct xe_vm *vm) { int err; - spin_lock_init(&vm->svm.garbage_collector.lock); - INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); - INIT_WORK(&vm->svm.garbage_collector.work, - xe_svm_garbage_collector_work_func); - - err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, - current->mm, xe_svm_devm_owner(vm->xe), 0, - vm->size, xe_modparam.svm_notifier_size * SZ_1M, - &gpusvm_ops, fault_chunk_sizes, - ARRAY_SIZE(fault_chunk_sizes)); - if (err) - return err; - - drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) { + spin_lock_init(&vm->svm.garbage_collector.lock); + INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); + INIT_WORK(&vm->svm.garbage_collector.work, + xe_svm_garbage_collector_work_func); + + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, + current->mm, 0, vm->size, + xe_modparam.svm_notifier_size * SZ_1M, + &gpusvm_ops, fault_chunk_sizes, + ARRAY_SIZE(fault_chunk_sizes)); + drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + } else { + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", + &vm->xe->drm, NULL, 0, 0, 0, NULL, + NULL, 0); + } - return 0; + return err; } /** @@ -716,7 +821,7 @@ bool xe_svm_range_validate(struct xe_vm *vm, xe_svm_notifier_lock(vm); ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && - (devmem_preferred == range->base.flags.has_devmem_pages); + (devmem_preferred == range->base.pages.flags.has_devmem_pages); xe_svm_notifier_unlock(vm); @@ -755,49 +860,48 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; struct drm_buddy_block *block; + struct xe_validation_ctx vctx; struct list_head *blocks; + struct drm_exec exec; struct xe_bo *bo; - ktime_t time_end = 0; - int err, idx; + int err = 0, idx; if (!drm_dev_enter(&xe->drm, &idx)) return -ENODEV; xe_pm_runtime_get(xe); - retry: - bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start, - ttm_bo_type_device, - (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | - XE_BO_FLAG_CPU_ADDR_MIRROR); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &time_end)) - goto retry; - goto out_pm_put; - } - - drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, - &dpagemap_devmem_ops, dpagemap, end - start); + xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + bo = xe_bo_create_locked(xe, NULL, NULL, end - start, + ttm_bo_type_device, + (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | + XE_BO_FLAG_CPU_ADDR_MIRROR, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&vctx, &err); + break; + } - blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; - list_for_each_entry(block, blocks, link) - block->private = vr; + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, dpagemap, end - start); - xe_bo_get(bo); + blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; + list_for_each_entry(block, blocks, link) + block->private = vr; - /* Ensure the device has a pm ref while there are device pages active. */ - xe_pm_runtime_get_noresume(xe); - err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, - start, end, timeslice_ms, - xe_svm_devm_owner(xe)); - if (err) - xe_svm_devmem_release(&bo->devmem_allocation); + xe_bo_get(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - -out_pm_put: + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); + if (err) + xe_svm_devmem_release(&bo->devmem_allocation); + xe_bo_unlock(bo); + xe_bo_put(bo); + } xe_pm_runtime_put(xe); drm_dev_exit(idx); @@ -827,17 +931,17 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm struct xe_vm *vm = range_to_vm(&range->base); u64 range_size = xe_svm_range_size(range); - if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) + if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram) return false; xe_assert(vm->xe, IS_DGFX(vm->xe)); - if (preferred_region_is_vram && xe_svm_range_in_vram(range)) { + if (xe_svm_range_in_vram(range)) { drm_info(&vm->xe->drm, "Range is already in VRAM\n"); return false; } - if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) { + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } @@ -845,27 +949,78 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm return true; } +#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \ +static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \ + struct xe_svm_range *range) \ +{ \ + switch (xe_svm_range_size(range)) { \ + case SZ_4K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \ + break; \ + case SZ_64K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \ + break; \ + case SZ_2M: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \ + break; \ + } \ +} \ + +DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT) +DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT) +DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE) + +#define DECL_SVM_RANGE_US_STATS(elem, stat) \ +static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ + struct xe_svm_range *range, \ + ktime_t start) \ +{ \ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); \ +\ + switch (xe_svm_range_size(range)) { \ + case SZ_4K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \ + us_delta); \ + break; \ + case SZ_64K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \ + us_delta); \ + break; \ + case SZ_2M: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \ + us_delta); \ + break; \ + } \ +} \ + +DECL_SVM_RANGE_US_STATS(migrate, MIGRATE) +DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES) +DECL_SVM_RANGE_US_STATS(bind, BIND) +DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT) + static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct xe_gt *gt, u64 fault_addr, bool need_vram) { + int devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), - .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), - .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, - .devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), - .timeslice_ms = need_vram && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? + .devmem_possible = devmem_possible, + .check_pages_threshold = devmem_possible ? SZ_64K : 0, + .devmem_only = need_vram && devmem_possible, + .timeslice_ms = need_vram && devmem_possible ? vm->xe->atomic_svm_timeslice_ms : 0, + .device_private_page_owner = xe_svm_devm_owner(vm->xe), }; + struct xe_validation_ctx vctx; + struct drm_exec exec; struct xe_svm_range *range; struct dma_fence *fence; struct drm_pagemap *dpagemap; struct xe_tile *tile = gt_to_tile(gt); int migrate_try_count = ctx.devmem_only ? 3 : 1; - ktime_t end = 0; + ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start; int err; lockdep_assert_held_write(&vm->lock); @@ -884,23 +1039,34 @@ retry: if (IS_ERR(range)) return PTR_ERR(range); - if (ctx.devmem_only && !range->base.flags.migrate_devmem) - return -EACCES; + xe_svm_range_fault_count_stats_incr(gt, range); - if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) - return 0; + if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { + err = -EACCES; + goto out; + } + + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) { + xe_svm_range_valid_fault_count_stats_incr(gt, range); + range_debug(range, "PAGE FAULT - VALID"); + goto out; + } range_debug(range, "PAGE FAULT"); dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { + ktime_t migrate_start = xe_svm_stats_ktime_get(); + /* TODO : For multi-device dpagemap will be used to find the * remote tile and remote device. Will need to modify * xe_svm_alloc_vram to use dpagemap for future multi-device * support. */ + xe_svm_range_migrate_count_stats_incr(gt, range); err = xe_svm_alloc_vram(tile, range, &ctx); + xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { @@ -917,6 +1083,8 @@ retry: } } + get_pages_start = xe_svm_stats_ktime_get(); + range_debug(range, "GET PAGES"); err = xe_svm_range_get_pages(vm, range, &ctx); /* Corner where CPU mappings have changed */ @@ -936,32 +1104,45 @@ retry: } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); - goto err_out; + goto out; } + xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); range_debug(range, "PAGE FAULT - BIND"); -retry_bind: - xe_vm_lock(vm, false); - fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); - if (IS_ERR(fence)) { - xe_vm_unlock(vm); - err = PTR_ERR(fence); - if (err == -EAGAIN) { - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ - range_debug(range, "PAGE FAULT - RETRY BIND"); - goto retry; + bind_start = xe_svm_stats_ktime_get(); + xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + + xe_vm_set_validation_exec(vm, &exec); + fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); + if (IS_ERR(fence)) { + drm_exec_retry_on_contention(&exec); + err = PTR_ERR(fence); + xe_validation_retry_on_oom(&vctx, &err); + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); + break; } - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry_bind; - goto err_out; } - xe_vm_unlock(vm); + if (err) + goto err_out; dma_fence_wait(fence, false); dma_fence_put(fence); + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); + +out: + xe_svm_range_fault_us_stats_incr(gt, range, start); + return 0; err_out: + if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + range_debug(range, "PAGE FAULT - RETRY BIND"); + goto retry; + } return err; } @@ -1089,7 +1270,7 @@ struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), xe_vma_start(vma), xe_vma_end(vma), ctx); if (IS_ERR(r)) - return ERR_PTR(PTR_ERR(r)); + return ERR_CAST(r); return to_xe_range(r); } @@ -1221,7 +1402,7 @@ int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, { struct drm_pagemap *dpagemap; - xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); + xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem); range_debug(range, "ALLOCATE VRAM"); dpagemap = tile_local_pagemap(tile); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 9d6a8840a8b7..0955d2ac8d74 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,6 +6,20 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +struct xe_device; + +/** + * xe_svm_devm_owner() - Return the owner of device private memory + * @xe: The xe device. + * + * Return: The owner of this device's device private memory to use in + * hmm_range_fault()- + */ +static inline void *xe_svm_devm_owner(struct xe_device *xe) +{ + return xe; +} + #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) #include <drm/drm_pagemap.h> @@ -105,7 +119,7 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) { lockdep_assert_held(&range->base.gpusvm->notifier_lock); - return range->base.flags.has_dma_mapping; + return range->base.pages.flags.has_dma_mapping; } /** @@ -155,19 +169,11 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) return drm_gpusvm_range_size(&range->base); } -#define xe_svm_assert_in_notifier(vm__) \ - lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) - -#define xe_svm_notifier_lock(vm__) \ - drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) - -#define xe_svm_notifier_unlock(vm__) \ - drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) - void xe_svm_flush(struct xe_vm *vm); #else #include <linux/interval_tree.h> +#include "xe_vm.h" struct drm_pagemap_addr; struct drm_gpusvm_ctx; @@ -184,7 +190,9 @@ struct xe_vram_region; struct xe_svm_range { struct { struct interval_tree_node itree; - const struct drm_pagemap_addr *dma_addr; + struct { + const struct drm_pagemap_addr *dma_addr; + } pages; } base; u32 tile_present; u32 tile_invalidated; @@ -204,12 +212,21 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) static inline int xe_svm_init(struct xe_vm *vm) { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) + return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm, + NULL, NULL, 0, 0, 0, NULL, NULL, 0); +#else return 0; +#endif } static inline void xe_svm_fini(struct xe_vm *vm) { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) + xe_assert(vm->xe, xe_vm_is_closed(vm)); + drm_gpusvm_fini(&vm->svm.gpusvm); +#endif } static inline @@ -326,19 +343,47 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t return NULL; } -#define xe_svm_assert_in_notifier(...) do {} while (0) +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} #define xe_svm_range_has_dma_mapping(...) false +#endif /* CONFIG_DRM_XE_GPUSVM */ + +#if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */ +#define xe_svm_assert_in_notifier(vm__) \ + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_assert_held_read(vm__) \ + lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_lock(vm__) \ + drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) + +#define xe_svm_notifier_lock_interruptible(vm__) \ + down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_unlock(vm__) \ + drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) + +#else +#define xe_svm_assert_in_notifier(...) do {} while (0) + +static inline void xe_svm_assert_held_read(struct xe_vm *vm) +{ +} static inline void xe_svm_notifier_lock(struct xe_vm *vm) { } -static inline void xe_svm_notifier_unlock(struct xe_vm *vm) +static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm) { + return 0; } -static inline void xe_svm_flush(struct xe_vm *vm) +static inline void xe_svm_notifier_unlock(struct xe_vm *vm) { } -#endif +#endif /* CONFIG_DRM_GPUSVM */ + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c new file mode 100644 index 000000000000..5523874cba7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> + +#include "xe_pm.h" +#include "xe_sa.h" +#include "xe_tile_debugfs.h" + +static struct xe_tile *node_to_tile(struct drm_info_node *node) +{ + return node->dent->d_parent->d_inode->i_private; +} + +/** + * tile_debugfs_simple_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_tile specific. + * + * It is assumed that those debugfs files will be created on directory entry + * which struct dentry d_inode->i_private points to &xe_tile. + * + * /sys/kernel/debug/dri/0/ + * ├── tile0/ # tile = dentry->d_inode->i_private + * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_tile *, struct drm_printer *) + * + * Example:: + * + * int tile_id(struct xe_tile *tile, struct drm_printer *p) + * { + * drm_printf(p, "%u\n", tile->id); + * return 0; + * } + * + * static const struct drm_info_list info[] = { + * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id }, + * }; + * + * dir = debugfs_create_dir("tile0", parent); + * dir->d_inode->i_private = tile; + * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +static int tile_debugfs_simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct xe_tile *tile = node_to_tile(node); + int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data; + + return print(tile, &p); +} + +/** + * tile_debugfs_show_with_rpm - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref. + * + * Return: 0 on success or a negative error code on failure. + */ +static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct xe_tile *tile = node_to_tile(node); + struct xe_device *xe = tile_to_xe(tile); + int ret; + + xe_pm_runtime_get(xe); + ret = tile_debugfs_simple_show(m, data); + xe_pm_runtime_put(xe); + + return ret; +} + +static int sa_info(struct xe_tile *tile, struct drm_printer *p) +{ + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, + xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); + + return 0; +} + +/* only for debugfs files which can be safely used on the VF */ +static const struct drm_info_list vf_safe_debugfs_list[] = { + { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info }, +}; + +/** + * xe_tile_debugfs_register - Register tile's debugfs attributes + * @tile: the &xe_tile to register + * + * Create debugfs sub-directory with a name that includes a tile ID and + * then creates set of debugfs files (attributes) specific to this tile. + */ +void xe_tile_debugfs_register(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct drm_minor *minor = xe->drm.primary; + struct dentry *root = minor->debugfs_root; + char name[8]; + + snprintf(name, sizeof(name), "tile%u", tile->id); + tile->debugfs = debugfs_create_dir(name, root); + if (IS_ERR(tile->debugfs)) + return; + + /* + * Store the xe_tile pointer as private data of the tile/ directory + * node so other tile specific attributes under that directory may + * refer to it by looking at its parent node private data. + */ + tile->debugfs->d_inode->i_private = tile; + + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), + tile->debugfs, minor); +} diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h new file mode 100644 index 000000000000..0e5f724de37f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_DEBUGFS_H_ +#define _XE_TILE_DEBUGFS_H_ + +struct xe_tile; + +void xe_tile_debugfs_register(struct xe_tile *tile); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h new file mode 100644 index 000000000000..63640a42685d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_printk.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _xe_tile_printk_H_ +#define _xe_tile_printk_H_ + +#include "xe_printk.h" + +#define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args + +#define xe_tile_printk(_tile, _level, _fmt, ...) \ + xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_err(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__) + +#define xe_tile_err_once(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__) + +#define xe_tile_err_ratelimited(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_tile_warn(_tile, _fmt, ...) \ + xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__) + +#define xe_tile_notice(_tile, _fmt, ...) \ + xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__) + +#define xe_tile_info(_tile, _fmt, ...) \ + xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__) + +#define xe_tile_dbg(_tile, _fmt, ...) \ + xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__) + +#define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \ + xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__) + +#define xe_tile_WARN(_tile, _condition, _fmt, ...) \ + xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \ + xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_WARN_ON(_tile, _condition) \ + xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) + +#define xe_tile_WARN_ON_ONCE(_tile, _condition) \ + xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) + +static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + + xe_tile_err(tile, "%pV", vaf); +} + +static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + + xe_tile_info(tile, "%pV", vaf); +} + +static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + struct drm_printer dbg; + + /* + * The original xe_tile_dbg() callsite annotations are useless here, + * redirect to the tweaked xe_dbg_printer() instead. + */ + dbg = xe_dbg_printer(tile->xe); + dbg.origin = p->origin; + + drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf)); +} + +/** + * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err() + * @tile: the &xe_tile pointer to use in xe_tile_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_err, + .arg = tile, + }; + return p; +} + +/** + * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info() + * @tile: the &xe_tile pointer to use in xe_tile_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_info, + .arg = tile, + }; + return p; +} + +/** + * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg() + * @tile: the &xe_tile pointer to use in xe_tile_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_dbg, + .arg = tile, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index b804234a6551..9e1236a9ec67 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile) kt->tile = tile; err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); - if (err) { - kobject_put(&kt->base); - return err; - } + if (err) + goto err_object; tile->sysfs = &kt->base; err = xe_vram_freq_sysfs_init(tile); if (err) - return err; + goto err_object; return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); + +err_object: + kobject_put(&kt->base); + return err; } diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index e6e97b5a7b5c..918a59e686ea 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -10,11 +10,10 @@ #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_gt_stats.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_tlb_inval.h" -#include "xe_gt_stats.h" -#include "xe_tlb_inval.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_tlb_inval.h" diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 9bbdde604923..622b76078567 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -115,8 +115,8 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ @@ -328,7 +328,7 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } -static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info) { struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; @@ -343,11 +343,12 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) return -EINVAL; } - compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version); - compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version); - compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version); + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version); - uc_fw->private_data_size = css->private_data_size; + uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info); + uc_fw->private_data_size = guc_info->private_data_size; return 0; } @@ -416,8 +417,8 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t css = (struct uc_css_header *)fw_data; /* Check integrity of size values inside CSS header */ - size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - - css->exponent_size_dw) * sizeof(u32); + size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw - + css->rsa_info.exponent_size_dw) * sizeof(u32); if (unlikely(size != sizeof(struct uc_css_header))) { drm_warn(&xe->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", @@ -430,7 +431,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); /* now RSA */ - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32); /* At least, it should have header, uCode and RSA. Size of all three. */ size = sizeof(struct uc_css_header) + uc_fw->ucode_size + @@ -443,12 +444,12 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t } /* Get version numbers from the CSS header */ - release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); - release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); - release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version); + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version); + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version); if (uc_fw->type == XE_UC_FW_TYPE_GUC) - return guc_read_css_info(uc_fw, css); + return guc_read_css_info(uc_fw, &css->guc_info); return 0; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index 87ade41209d0..3c9a63d13032 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -44,6 +44,39 @@ * in fw. So driver will load a truncated firmware in this case. */ +struct uc_css_rsa_info { + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; +} __packed; + +struct uc_css_guc_info { + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_TIME_MIN (0xFF << 8) +#define CSS_TIME_SEC (0xFFFF << 16) + u32 reserved0[5]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + u32 submission_version; + u32 reserved1[11]; + u32 header_info; +#define CSS_HEADER_INFO_SVN (0xFF) +#define CSS_HEADER_INFO_COPY_VALID (0x1 << 31) + u32 private_data_size; + u32 ukernel_info; +#define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16) +#define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8) +#define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2) +#define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0 +#define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1 +#define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2 +#define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1) +#define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0) +} __packed; + struct uc_css_header { u32 module_type; /* @@ -52,36 +85,21 @@ struct uc_css_header { */ u32 header_size_dw; u32 header_version; - u32 module_id; + u32 reserved0; u32 module_vendor; u32 date; -#define CSS_DATE_DAY (0xFF << 0) -#define CSS_DATE_MONTH (0xFF << 8) -#define CSS_DATE_YEAR (0xFFFF << 16) +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) u32 size_dw; /* uCode plus header_size_dw */ - u32 key_size_dw; - u32 modulus_size_dw; - u32 exponent_size_dw; - u32 time; -#define CSS_TIME_HOUR (0xFF << 0) -#define CSS_DATE_MIN (0xFF << 8) -#define CSS_DATE_SEC (0xFFFF << 16) - char username[8]; - char buildnumber[12]; - u32 sw_version; -#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) union { - u32 submission_version; /* only applies to GuC */ - u32 reserved2; + u32 reserved1[3]; + struct uc_css_rsa_info rsa_info; }; - u32 reserved0[12]; union { - u32 private_data_size; /* only applies to GuC */ - u32 reserved1; + u32 reserved2[22]; + struct uc_css_guc_info guc_info; }; - u32 header_info; } __packed; static_assert(sizeof(struct uc_css_header) == 128); @@ -318,4 +336,70 @@ struct gsc_manifest_header { u32 exponent_size; /* in dwords */ } __packed; +/** + * DOC: Late binding Firmware Layout + * + * The Late binding binary starts with FPT header, which contains locations + * of various partitions of the binary. Here we're interested in finding out + * manifest version. To the manifest version, we need to locate CPD header + * one of the entry in CPD header points to manifest header. Manifest header + * contains the version. + * + * +================================================+ + * | FPT Header | + * +================================================+ + * | FPT entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "LTES" | + * | ... | + * | offset >-----------------------------|------o + * +================================================+ | + * | + * +================================================+ | + * | CPD Header |<-----o + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "LTES.man" | + * | ... | + * | offset >----------------------------|------o + * +================================================+ | + * | + * +================================================+ | + * | Manifest Header |<-----o + * | ... | + * | FW version | + * | ... | + * +================================================+ + */ + +/* FPT Headers */ +struct csc_fpt_header { + u32 header_marker; +#define CSC_FPT_HEADER_MARKER 0x54504624 + u32 num_of_entries; + u8 header_version; + u8 entry_version; + u8 header_length; /* in bytes */ + u8 flags; + u16 ticks_to_add; + u16 tokens_to_add; + u32 uma_size; + u32 crc32; + struct gsc_version fitc_version; +} __packed; + +struct csc_fpt_entry { + u8 name[4]; /* partition name */ + u32 reserved1; + u32 offset; /* offset from beginning of CSE region */ + u32 length; /* partition length in bytes */ + u32 reserved2[3]; + u32 partition_flags; +} __packed; + #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 914026015019..77a1dcf8b4ed 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -147,6 +147,9 @@ struct xe_uc_fw { /** @private_data_size: size of private data found in uC css header */ u32 private_data_size; + + /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */ + u32 build_type; }; #endif diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c new file mode 100644 index 000000000000..f16e92cd8090 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_userptr.h" + +#include <linux/mm.h> + +#include "xe_trace_bo.h" + +/** + * xe_vma_userptr_check_repin() - Advisory check for repin needed + * @uvma: The userptr vma + * + * Check if the userptr vma has been invalidated since last successful + * repin. The check is advisory only and can the function can be called + * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the + * vma userptr will remain valid after a lockless check, so typically + * the call needs to be followed by a proper check under the notifier_lock. + * + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + */ +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) +{ + return mmu_interval_check_retry(&uvma->userptr.notifier, + uvma->userptr.pages.notifier_seq) ? + -EAGAIN : 0; +} + +/** + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs + * that need repinning. + * @vm: The VM. + * + * This function checks for whether the VM has userptrs that need repinning, + * and provides a release-type barrier on the svm.gpusvm.notifier_lock after + * checking. + * + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. + */ +int __xe_vm_userptr_needs_repin(struct xe_vm *vm) +{ + lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock); + + return (list_empty(&vm->userptr.repin_list) && + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) +{ + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + struct drm_gpusvm_ctx ctx = { + .read_only = xe_vma_read_only(vma), + .device_private_page_owner = NULL, + }; + + lockdep_assert_held(&vm->lock); + xe_assert(xe, xe_vma_is_userptr(vma)); + + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + uvma->userptr.notifier.mm, + &uvma->userptr.notifier, + xe_vma_userptr(vma), + xe_vma_userptr(vma) + xe_vma_size(vma), + &ctx); +} + +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct drm_gpusvm_ctx ctx = { + .in_notifier = true, + .read_only = xe_vma_read_only(vma), + }; + long err; + + /* + * Tell exec and rebind worker they need to repin and rebind this + * userptr. + */ + if (!xe_vm_in_fault_mode(vm) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&userptr->invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + } + + /* + * Preempt fences turn into schedule disables, pipeline these. + * Note that even in fault mode, we need to wait for binds and + * unbinds to complete, and those are attached as BOOKMARK fences + * to the vm. + */ + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { + err = xe_vm_invalidate_vma(vma); + XE_WARN_ON(err); + } + + drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + xe_vma_size(vma) >> PAGE_SHIFT, &ctx); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->svm.gpusvm.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->svm.gpusvm.notifier_lock); + trace_xe_vma_userptr_invalidate_complete(vma); + + return true; +} + +static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { + .invalidate = vma_userptr_invalidate, +}; + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.pages.notifier_seq)) + uvma->userptr.pages.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + +int xe_vm_userptr_pin(struct xe_vm *vm) +{ + struct xe_userptr_vma *uvma, *next; + int err = 0; + + xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); + lockdep_assert_held_write(&vm->lock); + + /* Collect invalidated userptrs */ + spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, + userptr.invalidate_link) { + list_del_init(&uvma->userptr.invalidate_link); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); + } + spin_unlock(&vm->userptr.invalidated_lock); + + /* Pin and move to bind list */ + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + err = xe_vma_userptr_pin_pages(uvma); + if (err == -EFAULT) { + list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); + + /* Wait for pending binds */ + xe_vm_lock(vm, false); + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + down_read(&vm->svm.gpusvm.notifier_lock); + err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->svm.gpusvm.notifier_lock); + xe_vm_unlock(vm); + if (err) + break; + } else { + if (err) + break; + + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, + &vm->rebind_list); + } + } + + if (err) { + down_write(&vm->svm.gpusvm.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->svm.gpusvm.notifier_lock); + } + return err; +} + +/** + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs + * that need repinning. + * @vm: The VM. + * + * This function does an advisory check for whether the VM has userptrs that + * need repinning. + * + * Return: 0 if there are no indications of userptrs needing repinning, + * -EAGAIN if there are. + */ +int xe_vm_userptr_check_repin(struct xe_vm *vm) +{ + return (list_empty_careful(&vm->userptr.repin_list) && + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, + unsigned long range) +{ + struct xe_userptr *userptr = &uvma->userptr; + int err; + + INIT_LIST_HEAD(&userptr->invalidate_link); + INIT_LIST_HEAD(&userptr->repin_link); + + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, + start, range, + &vma_userptr_notifier_ops); + if (err) + return err; + + userptr->pages.notifier_seq = LONG_MAX; + + return 0; +} + +void xe_userptr_remove(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + struct xe_userptr *userptr = &uvma->userptr; + + drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + xe_vma_size(&uvma->vma) >> PAGE_SHIFT); + + /* + * Since userptr pages are not pinned, we can't remove + * the notifier until we're sure the GPU is not accessing + * them anymore + */ + mmu_interval_notifier_remove(&userptr->notifier); +} + +void xe_userptr_destroy(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link)); + list_del(&uvma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); +} diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h new file mode 100644 index 000000000000..ef801234991e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_userptr.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_USERPTR_H_ +#define _XE_USERPTR_H_ + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/scatterlist.h> +#include <linux/spinlock.h> + +#include <drm/drm_gpusvm.h> + +struct xe_vm; +struct xe_vma; +struct xe_userptr_vma; + +/** struct xe_userptr_vm - User pointer VM level state */ +struct xe_userptr_vm { + /** + * @userptr.repin_list: list of VMAs which are user pointers, + * and needs repinning. Protected by @lock. + */ + struct list_head repin_list; + /** + * @userptr.invalidated_lock: Protects the + * @userptr.invalidated list. + */ + spinlock_t invalidated_lock; + /** + * @userptr.invalidated: List of invalidated userptrs, not yet + * picked + * up for revalidation. Protected from access with the + * @invalidated_lock. Removing items from the list + * additionally requires @lock in write mode, and adding + * items to the list requires either the @svm.gpusvm.notifier_lock in + * write mode, OR @lock in write mode. + */ + struct list_head invalidated; +}; + +/** struct xe_userptr - User pointer */ +struct xe_userptr { + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** @userptr: link into VM repin list if userptr. */ + struct list_head repin_link; + /** + * @pages: gpusvm pages for this user pointer. + */ + struct drm_gpusvm_pages pages; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held. + * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif +}; + +#if IS_ENABLED(CONFIG_DRM_GPUSVM) +void xe_userptr_remove(struct xe_userptr_vma *uvma); +int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, + unsigned long range); +void xe_userptr_destroy(struct xe_userptr_vma *uvma); + +int xe_vm_userptr_pin(struct xe_vm *vm); +int __xe_vm_userptr_needs_repin(struct xe_vm *vm); +int xe_vm_userptr_check_repin(struct xe_vm *vm); +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); +#else +static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {} + +static inline int xe_userptr_setup(struct xe_userptr_vma *uvma, + unsigned long start, unsigned long range) +{ + return -ENODEV; +} + +static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {} + +static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; } +static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; } +static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; } +static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; } +static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; }; +#endif + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_validation.c b/drivers/gpu/drm/xe/xe_validation.c new file mode 100644 index 000000000000..826cd09966ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_validation.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ +#include "xe_bo.h" +#include <drm/drm_exec.h> +#include <drm/drm_gem.h> +#include <drm/drm_gpuvm.h> + +#include "xe_assert.h" +#include "xe_validation.h" + +#ifdef CONFIG_DRM_XE_DEBUG +/** + * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable + * for validation. + * @xe: Pointer to the xe device. + * @exec: The drm_exec pointer to check. + * @obj: Pointer to the object subject to validation. + * + * NULL exec pointers are not allowed. + * For XE_VALIDATION_UNIMPLEMENTED, no checking. + * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test + * For XE_VALIDATION_UNSUPPORTED, check that the object subject to + * validation is a dma-buf, for which support for ww locking is + * not in place in the dma-buf layer. + */ +void xe_validation_assert_exec(const struct xe_device *xe, + const struct drm_exec *exec, + const struct drm_gem_object *obj) +{ + xe_assert(xe, exec); + if (IS_ERR(exec)) { + switch (PTR_ERR(exec)) { + case __XE_VAL_UNIMPLEMENTED: + break; + case __XE_VAL_UNSUPPORTED: + xe_assert(xe, !!obj->dma_buf); + break; +#if IS_ENABLED(CONFIG_KUNIT) + case __XE_VAL_OPT_OUT: + xe_assert(xe, current->kunit_test); + break; +#endif + default: + xe_assert(xe, false); + } + } +} +#endif + +static int xe_validation_lock(struct xe_validation_ctx *ctx) +{ + struct xe_validation_device *val = ctx->val; + int ret = 0; + + if (ctx->val_flags.interruptible) { + if (ctx->request_exclusive) + ret = down_write_killable(&val->lock); + else + ret = down_read_interruptible(&val->lock); + } else { + if (ctx->request_exclusive) + down_write(&val->lock); + else + down_read(&val->lock); + } + + if (!ret) { + ctx->lock_held = true; + ctx->lock_held_exclusive = ctx->request_exclusive; + } + + return ret; +} + +static int xe_validation_trylock(struct xe_validation_ctx *ctx) +{ + struct xe_validation_device *val = ctx->val; + bool locked; + + if (ctx->request_exclusive) + locked = down_write_trylock(&val->lock); + else + locked = down_read_trylock(&val->lock); + + if (locked) { + ctx->lock_held = true; + ctx->lock_held_exclusive = ctx->request_exclusive; + } + + return locked ? 0 : -EWOULDBLOCK; +} + +static void xe_validation_unlock(struct xe_validation_ctx *ctx) +{ + if (!ctx->lock_held) + return; + + if (ctx->lock_held_exclusive) + up_write(&ctx->val->lock); + else + up_read(&ctx->val->lock); + + ctx->lock_held = false; +} + +/** + * xe_validation_ctx_init() - Initialize an xe_validation_ctx + * @ctx: The xe_validation_ctx to initialize. + * @val: The xe_validation_device representing the validation domain. + * @exec: The struct drm_exec to use for the transaction. May be NULL. + * @flags: The flags to use for initialization. + * + * Initialize and lock a an xe_validation transaction using the validation domain + * represented by @val. Also initialize the drm_exec object forwarding parts of + * @flags to the drm_exec initialization. The @flags.exclusive flag should + * typically be set to false to avoid locking out other validators from the + * domain until an OOM is hit. For testing- or final attempt purposes it can, + * however, be set to true. + * + * Return: %0 on success, %-EINTR if interruptible initial locking failed with a + * signal pending. If @flags.no_block is set to true, a failed trylock + * returns %-EWOULDBLOCK. + */ +int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, + struct drm_exec *exec, const struct xe_val_flags flags) +{ + int ret; + + ctx->exec = exec; + ctx->val = val; + ctx->lock_held = false; + ctx->lock_held_exclusive = false; + ctx->request_exclusive = flags.exclusive; + ctx->val_flags = flags; + ctx->exec_flags = 0; + ctx->nr = 0; + + if (flags.no_block) + ret = xe_validation_trylock(ctx); + else + ret = xe_validation_lock(ctx); + if (ret) + return ret; + + if (exec) { + if (flags.interruptible) + ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT; + if (flags.exec_ignore_duplicates) + ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES; + drm_exec_init(exec, ctx->exec_flags, ctx->nr); + } + + return 0; +} + +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH +/* + * This abuses both drm_exec and ww_mutex internals and should be + * replaced by checking for -EDEADLK when we can make TTM + * stop converting -EDEADLK to -ENOMEM. + * An alternative is to not have exhaustive eviction with + * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens. + */ +static bool xe_validation_contention_injected(struct drm_exec *exec) +{ + return !!exec->ticket.contending_lock; +} + +#else + +static bool xe_validation_contention_injected(struct drm_exec *exec) +{ + return false; +} + +#endif + +static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret) +{ + if (ret == -ENOMEM && + ((ctx->request_exclusive && + xe_validation_contention_injected(ctx->exec)) || + !ctx->request_exclusive)) { + ctx->request_exclusive = true; + return true; + } + + return false; +} + +/** + * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation + * transaction. + * @ctx: An uninitialized xe_validation_ctx. + * @vm_exec: An initialized struct vm_exec. + * @val: The validation domain. + * + * The drm_gpuvm_exec_lock() function internally initializes its drm_exec + * transaction and therefore doesn't lend itself very well to be using + * xe_validation_ctx_init(). Provide a helper that takes an uninitialized + * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry. + * + * Return: %0 on success, negative error code on failure. + */ +int xe_validation_exec_lock(struct xe_validation_ctx *ctx, + struct drm_gpuvm_exec *vm_exec, + struct xe_validation_device *val) +{ + int ret; + + memset(ctx, 0, sizeof(*ctx)); + ctx->exec = &vm_exec->exec; + ctx->exec_flags = vm_exec->flags; + ctx->val = val; + if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT) + ctx->val_flags.interruptible = 1; + if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES) + ctx->val_flags.exec_ignore_duplicates = 1; +retry: + ret = xe_validation_lock(ctx); + if (ret) + return ret; + + ret = drm_gpuvm_exec_lock(vm_exec); + if (ret) { + xe_validation_unlock(ctx); + if (__xe_validation_should_retry(ctx, ret)) + goto retry; + } + + return ret; +} + +/** + * xe_validation_ctx_fini() - Finalize a validation transaction + * @ctx: The Validation transaction to finalize. + * + * Finalize a validation transaction and its related drm_exec transaction. + */ +void xe_validation_ctx_fini(struct xe_validation_ctx *ctx) +{ + if (ctx->exec) + drm_exec_fini(ctx->exec); + xe_validation_unlock(ctx); +} + +/** + * xe_validation_should_retry() - Determine if a validation transaction should retry + * @ctx: The validation transaction. + * @ret: Pointer to a return value variable. + * + * Determines whether a validation transaction should retry based on the + * internal transaction state and the return value pointed to by @ret. + * If a validation should be retried, the transaction is prepared for that, + * and the validation locked might be re-locked in exclusive mode, and *@ret + * is set to %0. If the re-locking errors, typically due to interruptible + * locking with signal pending, *@ret is instead set to -EINTR and the + * function returns %false. + * + * Return: %true if validation should be retried, %false otherwise. + */ +bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret) +{ + if (__xe_validation_should_retry(ctx, *ret)) { + drm_exec_fini(ctx->exec); + *ret = 0; + if (ctx->request_exclusive != ctx->lock_held_exclusive) { + xe_validation_unlock(ctx); + *ret = xe_validation_lock(ctx); + } + drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr); + return !*ret; + } + + return false; +} diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h new file mode 100644 index 000000000000..fec331d791e7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_VALIDATION_H_ +#define _XE_VALIDATION_H_ + +#include <linux/dma-resv.h> +#include <linux/types.h> +#include <linux/rwsem.h> + +struct drm_exec; +struct drm_gem_object; +struct drm_gpuvm_exec; +struct xe_device; + +#ifdef CONFIG_PROVE_LOCKING +/** + * xe_validation_lockdep() - Assert that a drm_exec locking transaction can + * be initialized at this point. + */ +static inline void xe_validation_lockdep(void) +{ + struct ww_acquire_ctx ticket; + + ww_acquire_init(&ticket, &reservation_ww_class); + ww_acquire_fini(&ticket); +} +#else +static inline void xe_validation_lockdep(void) +{ +} +#endif + +/* + * Various values of the drm_exec pointer where we've not (yet) + * implemented full ww locking. + * + * XE_VALIDATION_UNIMPLEMENTED means implementation is pending. + * A lockdep check is made to assure that a drm_exec locking + * transaction can actually take place where the macro is + * used. If this asserts, the exec pointer needs to be assigned + * higher up in the callchain and passed down. + * + * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where + * the dma-buf layer doesn't support WW locking. + * + * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where + * exhaustive eviction isn't necessary. + */ +#define __XE_VAL_UNIMPLEMENTED -EINVAL +#define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \ + (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED)) + +#define __XE_VAL_UNSUPPORTED -EOPNOTSUPP +#define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED)) + +#define __XE_VAL_OPT_OUT -ENOMEM +#define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \ + (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT)) +#ifdef CONFIG_DRM_XE_DEBUG +void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec, + const struct drm_gem_object *obj); +#else +#define xe_validation_assert_exec(_xe, _exec, _obj) \ + do { \ + (void)_xe; (void)_exec; (void)_obj; \ + } while (0) +#endif + +/** + * struct xe_validation_device - The domain for exhaustive eviction + * @lock: The lock used to exclude other processes from allocating graphics memory + * + * The struct xe_validation_device represents the domain for which we want to use + * exhaustive eviction. The @lock is typically grabbed in read mode for allocations + * but when graphics memory allocation fails, it is retried with the write mode held. + */ +struct xe_validation_device { + struct rw_semaphore lock; +}; + +/** + * struct xe_val_flags - Flags for xe_validation_ctx_init(). + * @exclusive: Start the validation transaction by locking out all other validators. + * @no_block: Don't block on initialization. + * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec + * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag. + * @exec_ignore_duplicates: Initialize the drm_exec context with the + * DRM_EXEC_IGNORE_DUPLICATES flag. + */ +struct xe_val_flags { + u32 exclusive :1; + u32 no_block :1; + u32 interruptible :1; + u32 exec_ignore_duplicates :1; +}; + +/** + * struct xe_validation_ctx - A struct drm_exec subclass with support for + * exhaustive eviction + * @exec: The drm_exec object base class. Note that we use a pointer instead of + * embedding to avoid diamond inheritance. + * @val: The exhaustive eviction domain. + * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init. + * @lock_held: Whether The domain lock is currently held. + * @lock_held_exclusive: Whether the domain lock is held in exclusive mode. + * @request_exclusive: Whether to lock exclusively (write mode) the next time + * the domain lock is locked. + * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization. + * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton. + */ +struct xe_validation_ctx { + struct drm_exec *exec; + struct xe_validation_device *val; + struct xe_val_flags val_flags; + bool lock_held; + bool lock_held_exclusive; + bool request_exclusive; + u32 exec_flags; + unsigned int nr; +}; + +int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, + struct drm_exec *exec, const struct xe_val_flags flags); + +int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec, + struct xe_validation_device *val); + +void xe_validation_ctx_fini(struct xe_validation_ctx *ctx); + +bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret); + +/** + * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction + * @_ctx: Pointer to the xe_validation_ctx + * @_ret: The current error value possibly holding -ENOMEM + * + * Use this in way similar to drm_exec_retry_on_contention(). + * If @_ret contains -ENOMEM the tranaction is restarted once in a way that + * blocks other transactions and allows exhastive eviction. If the transaction + * was already restarted once, Just return the -ENOMEM. May also set + * _ret to -EINTR if not retrying and waits are interruptible. + * May only be used within a drm_exec_until_all_locked() loop. + */ +#define xe_validation_retry_on_oom(_ctx, _ret) \ + do { \ + if (xe_validation_should_retry(_ctx, _ret)) \ + goto *__drm_exec_retry_ptr; \ + } while (0) + +/** + * xe_validation_device_init - Initialize a struct xe_validation_device + * @val: The xe_validation_device to init. + */ +static inline void +xe_validation_device_init(struct xe_validation_device *val) +{ + init_rwsem(&val->lock); +} + +/* + * Make guard() and scoped_guard() work with xe_validation_ctx + * so that we can exit transactions without caring about the + * cleanup. + */ +DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, + if (_T) xe_validation_ctx_fini(_T);, + ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + _ret ? NULL : _ctx; }), + struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, + struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); +static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) +{return *_T; } +#define class_xe_validation_is_conditional true + +/** + * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction + * @_ctx: The xe_validation_ctx. + * @_val: The xe_validation_device. + * @_exec: The struct drm_exec object + * @_flags: Flags for the xe_validation_ctx initialization. + * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called. + * + * This macro is will initiate a drm_exec transaction with additional support for + * exhaustive eviction. + */ +#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + drm_exec_until_all_locked(_exec) + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c00a5ff31817..027e6ce648c5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -41,7 +41,6 @@ #include "xe_tlb_inval.h" #include "xe_trace_bo.h" #include "xe_wa.h" -#include "xe_hmm.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { @@ -49,34 +48,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) } /** - * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @uvma: The userptr vma + * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction + * @vm: The vm whose resv is to be locked. + * @exec: The drm_exec transaction. * - * Check if the userptr vma has been invalidated since last successful - * repin. The check is advisory only and can the function can be called - * without the vm->userptr.notifier_lock held. There is no guarantee that the - * vma userptr will remain valid after a lockless check, so typically - * the call needs to be followed by a proper check under the notifier_lock. + * Helper to lock the vm's resv as part of a drm_exec transaction. * - * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + * Return: %0 on success. See drm_exec_lock_obj() for error codes. */ -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) { - return mmu_interval_check_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq) ? - -EAGAIN : 0; -} - -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) -{ - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - lockdep_assert_held(&vm->lock); - xe_assert(xe, xe_vma_is_userptr(vma)); - - return xe_hmm_userptr_populate_range(uvma, false); + return drm_exec_lock_obj(exec, xe_vm_obj(vm)); } static bool preempt_fences_waiting(struct xe_vm *vm) @@ -228,6 +210,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) .num_fences = 1, }; struct drm_exec *exec = &vm_exec.exec; + struct xe_validation_ctx ctx; struct dma_fence *pfence; int err; bool wait; @@ -235,14 +218,14 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); down_write(&vm->lock); - err = drm_gpuvm_exec_lock(&vm_exec); + err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); if (err) goto out_up_write; pfence = xe_preempt_fence_create(q, q->lr.context, ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; + if (IS_ERR(pfence)) { + err = PTR_ERR(pfence); goto out_fini; } @@ -250,7 +233,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) ++vm->preempt.num_exec_queues; q->lr.pfence = pfence; - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); @@ -264,10 +247,10 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) if (wait) dma_fence_enable_sw_signaling(pfence); - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); out_fini: - drm_exec_fini(exec); + xe_validation_ctx_fini(&ctx); out_up_write: up_write(&vm->lock); @@ -300,25 +283,6 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_write(&vm->lock); } -/** - * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs - * that need repinning. - * @vm: The VM. - * - * This function checks for whether the VM has userptrs that need repinning, - * and provides a release-type barrier on the userptr.notifier_lock after - * checking. - * - * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. - */ -int __xe_vm_userptr_needs_repin(struct xe_vm *vm) -{ - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - return (list_empty(&vm->userptr.repin_list) && - list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 /** @@ -350,39 +314,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked) /* TODO: Inform user the VM is banned */ } -/** - * xe_vm_validate_should_retry() - Whether to retry after a validate error. - * @exec: The drm_exec object used for locking before validation. - * @err: The error returned from ttm_bo_validate(). - * @end: A ktime_t cookie that should be set to 0 before first use and - * that should be reused on subsequent calls. - * - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. Check if that is necessary, and - * if so unlock the drm_exec's objects while keeping the ticket to prepare - * for a rerun. - * - * Return: true if a retry after drm_exec_init() is recommended; - * false otherwise. - */ -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) -{ - ktime_t cur; - - if (err != -ENOMEM) - return false; - - cur = ktime_get(); - *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); - if (!ktime_before(cur, *end)) - return false; - - msleep(20); - return true; -} - static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); @@ -397,7 +328,7 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) if (!try_wait_for_completion(&vm->xe->pm_block)) return -EAGAIN; - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); if (ret) return ret; @@ -513,10 +444,10 @@ void xe_vm_resume_rebind_worker(struct xe_vm *vm) static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); + struct xe_validation_ctx ctx; struct drm_exec exec; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); - ktime_t end = 0; int err = 0; long wait; int __maybe_unused tries = 0; @@ -544,18 +475,19 @@ retry: goto out_unlock_outer; } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, + (struct xe_val_flags) {.interruptible = true}); + if (err) + goto out_unlock_outer; drm_exec_until_all_locked(&exec) { bool done = false; err = xe_preempt_work_begin(&exec, vm, &done); drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); if (err || done) { - drm_exec_fini(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - + xe_validation_ctx_fini(&ctx); goto out_unlock_outer; } } @@ -564,7 +496,9 @@ retry: if (err) goto out_unlock; + xe_vm_set_validation_exec(vm, &exec); err = xe_vm_rebind(vm, true); + xe_vm_set_validation_exec(vm, NULL); if (err) goto out_unlock; @@ -582,9 +516,9 @@ retry: (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ __xe_vm_userptr_needs_repin(__vm)) - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); if (retry_required(tries, vm)) { - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); err = -EAGAIN; goto out_unlock; } @@ -598,10 +532,10 @@ retry: /* Point of no return. */ arm_preempt_fences(vm, &preempt_fences); resume_and_reinstall_preempt_fences(vm, &exec); - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); out_unlock: - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); out_unlock_outer: if (err == -EAGAIN) { trace_xe_vm_rebind_worker_retry(vm); @@ -619,203 +553,6 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - /* - * Tell exec and rebind worker they need to repin and rebind this - * userptr. - */ - if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&userptr->invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - } - - /* - * Preempt fences turn into schedule disables, pipeline these. - * Note that even in fault mode, we need to wait for binds and - * unbinds to complete, and those are attached as BOOKMARK fences - * to the vm. - */ - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - - if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { - err = xe_vm_invalidate_vma(vma); - XE_WARN_ON(err); - } - - xe_hmm_userptr_unmap(uvma); -} - -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) -{ - struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - __vma_userptr_invalidate(vm, uvma); - up_write(&vm->userptr.notifier_lock); - trace_xe_vma_userptr_invalidate_complete(vma); - - return true; -} - -static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { - .invalidate = vma_userptr_invalidate, -}; - -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) -/** - * xe_vma_userptr_force_invalidate() - force invalidate a userptr - * @uvma: The userptr vma to invalidate - * - * Perform a forced userptr invalidation for testing purposes. - */ -void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) -{ - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - /* Protect against concurrent userptr pinning */ - lockdep_assert_held(&vm->lock); - /* Protect against concurrent notifiers */ - lockdep_assert_held(&vm->userptr.notifier_lock); - /* - * Protect against concurrent instances of this function and - * the critical exec sections - */ - xe_vm_assert_held(vm); - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq)) - uvma->userptr.notifier_seq -= 2; - __vma_userptr_invalidate(vm, uvma); -} -#endif - -int xe_vm_userptr_pin(struct xe_vm *vm) -{ - struct xe_userptr_vma *uvma, *next; - int err = 0; - - xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); - lockdep_assert_held_write(&vm->lock); - - /* Collect invalidated userptrs */ - spin_lock(&vm->userptr.invalidated_lock); - xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); - list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, - userptr.invalidate_link) { - list_del_init(&uvma->userptr.invalidate_link); - list_add_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); - } - spin_unlock(&vm->userptr.invalidated_lock); - - /* Pin and move to bind list */ - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - err = xe_vma_userptr_pin_pages(uvma); - if (err == -EFAULT) { - list_del_init(&uvma->userptr.repin_link); - /* - * We might have already done the pin once already, but - * then had to retry before the re-bind happened, due - * some other condition in the caller, but in the - * meantime the userptr got dinged by the notifier such - * that we need to revalidate here, but this time we hit - * the EFAULT. In such a case make sure we remove - * ourselves from the rebind list to avoid going down in - * flames. - */ - if (!list_empty(&uvma->vma.combined_links.rebind)) - list_del_init(&uvma->vma.combined_links.rebind); - - /* Wait for pending binds */ - xe_vm_lock(vm, false); - dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - - down_read(&vm->userptr.notifier_lock); - err = xe_vm_invalidate_vma(&uvma->vma); - up_read(&vm->userptr.notifier_lock); - xe_vm_unlock(vm); - if (err) - break; - } else { - if (err) - break; - - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->vma.combined_links.rebind, - &vm->rebind_list); - } - } - - if (err) { - down_write(&vm->userptr.notifier_lock); - spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - } - spin_unlock(&vm->userptr.invalidated_lock); - up_write(&vm->userptr.notifier_lock); - } - return err; -} - -/** - * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs - * that need repinning. - * @vm: The VM. - * - * This function does an advisory check for whether the VM has userptrs that - * need repinning. - * - * Return: 0 if there are no indications of userptrs needing repinning, - * -EAGAIN if there are. - */ -int xe_vm_userptr_check_repin(struct xe_vm *vm) -{ - return (list_empty_careful(&vm->userptr.repin_list) && - list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) { int i; @@ -1280,25 +1017,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null && !is_cpu_addr_mirror) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + struct xe_userptr_vma *uvma = to_userptr_vma(vma); u64 size = end - start + 1; int err; - INIT_LIST_HEAD(&userptr->invalidate_link); - INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; - mutex_init(&userptr->unmap_mutex); - err = mmu_interval_notifier_insert(&userptr->notifier, - current->mm, - xe_vma_userptr(vma), size, - &vma_userptr_notifier_ops); + err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); if (err) { xe_vma_free(vma); return ERR_PTR(err); } - - userptr->notifier_seq = LONG_MAX; } xe_vm_get(vm); @@ -1318,18 +1047,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma) if (xe_vma_is_userptr(vma)) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); - struct xe_userptr *userptr = &uvma->userptr; - - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - /* - * Since userptr pages are not pinned, we can't remove - * the notifier until we're sure the GPU is not accessing - * them anymore - */ - mmu_interval_notifier_remove(&userptr->notifier); - mutex_destroy(&userptr->unmap_mutex); + xe_userptr_remove(uvma); xe_vm_put(vm); } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { xe_vm_put(vm); @@ -1366,11 +1085,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) if (xe_vma_is_userptr(vma)) { xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); - - spin_lock(&vm->userptr.invalidated_lock); - xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); - list_del(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); + xe_userptr_destroy(to_userptr_vma(vma)); } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -1418,20 +1133,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) static void xe_vma_destroy_unlocked(struct xe_vma *vma) { + struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_validation_ctx ctx; struct drm_exec exec; - int err; + int err = 0; - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; + xe_vma_destroy(vma, NULL); } - - xe_vma_destroy(vma, NULL); - - drm_exec_fini(&exec); + xe_assert(xe, !err); } struct xe_vma * @@ -1656,6 +1370,7 @@ static void vm_destroy_work_func(struct work_struct *w); * @xe: xe device. * @tile: tile to set up for. * @vm: vm to set up for. + * @exec: The struct drm_exec object used to lock the vm resv. * * Sets up a pagetable tree with one page-table per level and a single * leaf PTE. All pagetable entries point to the single page-table or, @@ -1665,20 +1380,19 @@ static void vm_destroy_work_func(struct work_struct *w); * Return: 0 on success, negative error code on error. */ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm) + struct xe_vm *vm, struct drm_exec *exec) { u8 id = tile->id; int i; for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); if (IS_ERR(vm->scratch_pt[id][i])) { int err = PTR_ERR(vm->scratch_pt[id][i]); vm->scratch_pt[id][i] = NULL; return err; } - xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } @@ -1706,9 +1420,26 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } +static void xe_vm_pt_destroy(struct xe_vm *vm) +{ + struct xe_tile *tile; + u8 id; + + xe_vm_assert_held(vm); + + for_each_tile(tile, vm->xe, id) { + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } + } +} + struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_vm *vm; int err, number_tiles = 0; struct xe_tile *tile; @@ -1752,7 +1483,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_LIST_HEAD(&vm->userptr.repin_list); INIT_LIST_HEAD(&vm->userptr.invalidated); - init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); ttm_lru_bulk_move_init(&vm->lru_bulk_move); @@ -1779,11 +1509,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_LIST_HEAD(&vm->preempt.pm_activate_link); } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_no_resv; - } + err = xe_svm_init(vm); + if (err) + goto err_no_resv; vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1796,49 +1524,68 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) drm_gem_object_put(vm_resv_obj); - err = xe_vm_lock(vm, true); - if (err) - goto err_close; - - if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) - vm->flags |= XE_VM_FLAG_64K; - - for_each_tile(tile, xe, id) { - if (flags & XE_VM_FLAG_MIGRATION && - tile->id != XE_VM_FLAG_TILE_ID(flags)) - continue; + err = 0; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); - vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); - if (IS_ERR(vm->pt_root[id])) { - err = PTR_ERR(vm->pt_root[id]); - vm->pt_root[id] = NULL; - goto err_unlock_close; - } - } + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + vm->flags |= XE_VM_FLAG_64K; - if (xe_vm_has_scratch(vm)) { for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) + if (flags & XE_VM_FLAG_MIGRATION && + tile->id != XE_VM_FLAG_TILE_ID(flags)) continue; - err = xe_vm_create_scratch(xe, tile, vm); + vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, + &exec); + if (IS_ERR(vm->pt_root[id])) { + err = PTR_ERR(vm->pt_root[id]); + vm->pt_root[id] = NULL; + xe_vm_pt_destroy(vm); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } + if (err) + break; + + if (xe_vm_has_scratch(vm)) { + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; + + err = xe_vm_create_scratch(xe, tile, vm, &exec); + if (err) { + xe_vm_free_scratch(vm); + xe_vm_pt_destroy(vm); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } if (err) - goto err_unlock_close; + break; + vm->batch_invalidate_tlb = true; } - vm->batch_invalidate_tlb = true; - } - if (vm->flags & XE_VM_FLAG_LR_MODE) - vm->batch_invalidate_tlb = false; + if (vm->flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + vm->batch_invalidate_tlb = false; + } - /* Fill pt_root after allocating scratch tables */ - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; + /* Fill pt_root after allocating scratch tables */ + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; - xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + } } - xe_vm_unlock(vm); + if (err) + goto err_close; /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1871,7 +1618,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) &xe->usm.next_asid, GFP_KERNEL); up_write(&xe->usm.lock); if (err < 0) - goto err_unlock_close; + goto err_close; vm->usm.asid = asid; } @@ -1880,8 +1627,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) return vm; -err_unlock_close: - xe_vm_unlock(vm); err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); @@ -1988,9 +1733,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) vma = gpuva_to_vma(gpuva); if (xe_vma_has_no_bo(vma)) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); } xe_vm_remove_vma(vm, vma); @@ -2014,13 +1759,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) * destroy the pagetables immediately. */ xe_vm_free_scratch(vm); - - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) { - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - vm->pt_root[id] = NULL; - } - } + xe_vm_pt_destroy(vm); xe_vm_unlock(vm); /* @@ -2034,8 +1773,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vma_destroy_unlocked(vma); } - if (xe_vm_in_fault_mode(vm)) - xe_svm_fini(vm); + xe_svm_fini(vm); up_write(&vm->lock); @@ -2328,6 +2066,8 @@ int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_ err = copy_to_user(attrs_user, mem_attrs, args->sizeof_mem_range_attr * args->num_mem_ranges); + if (err) + err = -EFAULT; free_mem_attrs: kvfree(mem_attrs); @@ -2376,9 +2116,9 @@ static const u32 region_to_mem_type[] = { static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, bool post_commit) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_remove_vma(vm, vma); } @@ -2639,6 +2379,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, struct xe_vma_mem_attr *attr, unsigned int flags) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; + struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vma *vma; int err = 0; @@ -2646,9 +2387,9 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, lockdep_assert_held_write(&vm->lock); if (bo) { - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = 0; + err = 0; + xe_validation_guard(&ctx, &vm->xe->val, &exec, + (struct xe_val_flags) {.interruptible = true}, err) { if (!bo->vm) { err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); drm_exec_retry_on_contention(&exec); @@ -2657,27 +2398,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, err = drm_exec_lock_obj(&exec, &bo->ttm.base); drm_exec_retry_on_contention(&exec); } - if (err) { - drm_exec_fini(&exec); + if (err) return ERR_PTR(err); - } - } - } - vma = xe_vma_create(vm, bo, op->gem.offset, - op->va.addr, op->va.addr + - op->va.range - 1, attr, flags); - if (IS_ERR(vma)) - goto err_unlock; - if (xe_vma_is_userptr(vma)) - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - else if (!xe_vma_has_no_bo(vma) && !bo->vm) - err = add_preempt_fences(vm, bo); + vma = xe_vma_create(vm, bo, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, attr, flags); + if (IS_ERR(vma)) + return vma; -err_unlock: - if (bo) - drm_exec_fini(&exec); + if (!bo->vm) { + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy(vma, NULL); + } + } + } + if (err) + return ERR_PTR(err); + } else { + vma = xe_vma_create(vm, NULL, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, attr, flags); + if (IS_ERR(vma)) + return vma; + if (xe_vma_is_userptr(vma)) + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + } if (err) { prep_vma_destroy(vm, vma, false); xe_vma_destroy_unlocked(vma); @@ -3021,9 +2770,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); if (vma) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_insert_vma(vm, vma); } @@ -3042,9 +2791,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_vma_destroy_unlocked(op->remap.next); } if (vma) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_insert_vma(vm, vma); } @@ -3094,7 +2843,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm)); + !xe_vm_in_preempt_fence_mode(vm), exec); } return err; @@ -3132,6 +2881,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = devmem_possible; ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); /* TODO: Threading the migration */ xa_for_each(&op->prefetch_range.range, i, svm_range) { @@ -3212,7 +2962,9 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), - region_to_mem_type[region]); + region_to_mem_type[region], + NULL, + exec); break; } default: @@ -3475,35 +3227,37 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, struct xe_vma_ops *vops) { + struct xe_validation_ctx ctx; struct drm_exec exec; struct dma_fence *fence; - int err; + int err = 0; lockdep_assert_held_write(&vm->lock); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { + xe_validation_guard(&ctx, &vm->xe->val, &exec, + ((struct xe_val_flags) { + .interruptible = true, + .exec_ignore_duplicates = true, + }), err) { err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); drm_exec_retry_on_contention(&exec); - if (err) { - fence = ERR_PTR(err); - goto unlock; - } + xe_validation_retry_on_oom(&ctx, &err); + if (err) + return ERR_PTR(err); + xe_vm_set_validation_exec(vm, &exec); fence = ops_execute(vm, vops); + xe_vm_set_validation_exec(vm, NULL); if (IS_ERR(fence)) { if (PTR_ERR(fence) == -ENODATA) vm_bind_ioctl_ops_fini(vm, vops, NULL); - goto unlock; + return fence; } vm_bind_ioctl_ops_fini(vm, vops, fence); } -unlock: - drm_exec_fini(&exec); - return fence; + return err ? ERR_PTR(err) : fence; } ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); @@ -3619,6 +3373,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && + !IS_ENABLED(CONFIG_DRM_GPUSVM)) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, prefetch_region && @@ -4054,10 +3810,14 @@ release_vm_lock: */ int xe_vm_lock(struct xe_vm *vm, bool intr) { + int ret; + if (intr) - return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + else + ret = dma_resv_lock(xe_vm_resv(vm), NULL); - return dma_resv_lock(xe_vm_resv(vm), NULL); + return ret; } /** @@ -4164,13 +3924,13 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { - lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || - (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && lockdep_is_held(&xe_vm_resv(vm)->lock.base))); WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, - to_userptr_vma(vma)->userptr.notifier_seq)); + to_userptr_vma(vma)->userptr.pages.notifier_seq)); WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index d631c4b25c51..ef8a5019574e 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -220,12 +220,6 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); -int xe_vm_userptr_pin(struct xe_vm *vm); - -int __xe_vm_userptr_needs_repin(struct xe_vm *vm); - -int xe_vm_userptr_check_repin(struct xe_vm *vm); - int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask); @@ -266,12 +260,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); - -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); - -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); - int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, @@ -302,6 +290,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked); */ #define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) #define vm_dbg drm_dbg #else @@ -331,7 +321,7 @@ static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) if (vm && !allow_res_evict) { xe_vm_assert_held(vm); /* Pairs with READ_ONCE in xe_vm_is_validating() */ - WRITE_ONCE(vm->validating, current); + WRITE_ONCE(vm->validation.validating, current); } } @@ -349,7 +339,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict { if (vm && !allow_res_evict) { /* Pairs with READ_ONCE in xe_vm_is_validating() */ - WRITE_ONCE(vm->validating, NULL); + WRITE_ONCE(vm->validation.validating, NULL); } } @@ -367,7 +357,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict static inline bool xe_vm_is_validating(struct xe_vm *vm) { /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ - if (READ_ONCE(vm->validating) == current) { + if (READ_ONCE(vm->validation.validating) == current) { xe_vm_assert_held(vm); return true; } @@ -375,6 +365,34 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm) } /** + * xe_vm_set_validation_exec() - Accessor to set the drm_exec object + * @vm: The vm we want to register a drm_exec object with. + * @exec: The exec object we want to register. + * + * Set the drm_exec object used to lock the vm's resv. + */ +static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec) +{ + xe_vm_assert_held(vm); + xe_assert(vm->xe, !!exec ^ !!vm->validation._exec); + vm->validation._exec = exec; +} + +/** + * xe_vm_set_validation_exec() - Accessor to read the drm_exec object + * @vm: The vm we want to register a drm_exec object with. + * + * Return: The drm_exec object used to lock the vm's resv. The value + * is a valid pointer, %NULL, or one of the special values defined in + * xe_validation.h. + */ +static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm) +{ + xe_vm_assert_held(vm); + return vm->validation._exec; +} + +/** * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has * a valid GPU mapping * @tile: The tile which the GPU mapping belongs to @@ -393,11 +411,4 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm) #define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) -void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); -#else -static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) -{ -} -#endif #endif diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 09c5783ee523..cad3cf627c3f 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -18,9 +18,8 @@ struct xe_vmas_in_madvise_range { u64 range; struct xe_vma **vmas; int num_vmas; - bool has_svm_vmas; bool has_bo_vmas; - bool has_userptr_vmas; + bool has_svm_userptr_vmas; }; static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) @@ -46,10 +45,8 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r if (xe_vma_bo(vma)) madvise_range->has_bo_vmas = true; - else if (xe_vma_is_cpu_addr_mirror(vma)) - madvise_range->has_svm_vmas = true; - else if (xe_vma_is_userptr(vma)) - madvise_range->has_userptr_vmas = true; + else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma)) + madvise_range->has_svm_userptr_vmas = true; if (madvise_range->num_vmas == max_vmas) { max_vmas <<= 1; @@ -127,8 +124,6 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, vmas[i]->attr.atomic_access = op->atomic.val; } - vmas[i]->attr.atomic_access = op->atomic.val; - bo = xe_vma_bo(vmas[i]); if (!bo || bo->attr.atomic_access == op->atomic.val) continue; @@ -201,12 +196,12 @@ static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) if (xe_pt_zap_ptes(tile, vma)) { tile_mask |= BIT(id); - /* - * WRITE_ONCE pairs with READ_ONCE - * in xe_vm_has_valid_gpu_mapping() - */ - WRITE_ONCE(vma->tile_invalidated, - vma->tile_invalidated | BIT(id)); + /* + * WRITE_ONCE pairs with READ_ONCE + * in xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(id)); } } } @@ -256,7 +251,7 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) return false; - if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) return false; break; } @@ -409,29 +404,20 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } } - if (madvise_range.has_userptr_vmas) { - err = down_read_interruptible(&vm->userptr.notifier_lock); + if (madvise_range.has_svm_userptr_vmas) { + err = xe_svm_notifier_lock_interruptible(vm); if (err) goto err_fini; } - if (madvise_range.has_svm_vmas) { - err = down_read_interruptible(&vm->svm.gpusvm.notifier_lock); - if (err) - goto unlock_userptr; - } - attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); - if (madvise_range.has_svm_vmas) + if (madvise_range.has_svm_userptr_vmas) xe_svm_notifier_unlock(vm); -unlock_userptr: - if (madvise_range.has_userptr_vmas) - up_read(&vm->userptr.notifier_lock); err_fini: if (madvise_range.has_bo_vmas) drm_exec_fini(&exec); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index e1a786db5f89..da39940501d8 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -17,6 +17,7 @@ #include "xe_device_types.h" #include "xe_pt_types.h" #include "xe_range_fence.h" +#include "xe_userptr.h" struct xe_bo; struct xe_svm_range; @@ -46,37 +47,6 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) -/** struct xe_userptr - User pointer */ -struct xe_userptr { - /** @invalidate_link: Link for the vm::userptr.invalidated list */ - struct list_head invalidate_link; - /** @userptr: link into VM repin list if userptr. */ - struct list_head repin_link; - /** - * @notifier: MMU notifier for user pointer (invalidation call back) - */ - struct mmu_interval_notifier notifier; - /** @sgt: storage for a scatter gather table */ - struct sg_table sgt; - /** @sg: allocated scatter gather table */ - struct sg_table *sg; - /** @notifier_seq: notifier sequence number */ - unsigned long notifier_seq; - /** @unmap_mutex: Mutex protecting dma-unmapping */ - struct mutex unmap_mutex; - /** - * @initial_bind: user pointer has been bound at least once. - * write: vm->userptr.notifier_lock in read mode and vm->resv held. - * read: vm->userptr.notifier_lock in write mode or vm->resv held. - */ - bool initial_bind; - /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ - bool mapped; -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) - u32 divisor; -#endif -}; - /** * struct xe_vma_mem_attr - memory attributes associated with vma */ @@ -140,10 +110,10 @@ struct xe_vma { /** * @tile_invalidated: Tile mask of binding are invalidated for this VMA. - * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in - * write mode for writing or vm->userptr.notifier_lock in read mode and + * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in + * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and * the vm->resv. For stable reading, BO's resv or userptr - * vm->userptr.notifier_lock in read mode is required. Can be + * vm->svm.gpusvm.notifier_lock in read mode is required. Can be * opportunistically read with READ_ONCE outside of locks. */ u8 tile_invalidated; @@ -154,7 +124,7 @@ struct xe_vma { /** * @tile_present: Tile mask of binding are present for this VMA. * protected by vm->lock, vm->resv and for userptrs, - * vm->userptr.notifier_lock for writing. Needs either for reading, + * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading, * but if reading is done under the vm->lock only, it needs to be held * in write mode. */ @@ -289,33 +259,7 @@ struct xe_vm { const struct xe_pt_ops *pt_ops; /** @userptr: user pointer state */ - struct { - /** - * @userptr.repin_list: list of VMAs which are user pointers, - * and needs repinning. Protected by @lock. - */ - struct list_head repin_list; - /** - * @notifier_lock: protects notifier in write mode and - * submission in read mode. - */ - struct rw_semaphore notifier_lock; - /** - * @userptr.invalidated_lock: Protects the - * @userptr.invalidated list. - */ - spinlock_t invalidated_lock; - /** - * @userptr.invalidated: List of invalidated userptrs, not yet - * picked - * up for revalidation. Protected from access with the - * @invalidated_lock. Removing items from the list - * additionally requires @lock in write mode, and adding - * items to the list requires either the @userptr.notifier_lock in - * write mode, OR @lock in write mode. - */ - struct list_head invalidated; - } userptr; + struct xe_userptr_vm userptr; /** @preempt: preempt state */ struct { @@ -363,18 +307,34 @@ struct xe_vm { } error_capture; /** + * @validation: Validation data only valid with the vm resv held. + * Note: This is really task state of the task holding the vm resv, + * and moving forward we should + * come up with a better way of passing this down the call- + * chain. + */ + struct { + /** + * @validation.validating: The task that is currently making bos resident. + * for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; + /** + * @validation.exec The drm_exec context used when locking the vm resv. + * Protected by the vm's resv. + */ + struct drm_exec *_exec; + } validation; + + /** * @tlb_flush_seqno: Required TLB flush seqno for the next exec. * protected by the vm resv. */ u64 tlb_flush_seqno; - /** - * @validating: The task that is currently making bos resident for this vm. - * Protected by the VM's resv for writing. Opportunistic reading can be done - * using READ_ONCE. Note: This is a workaround for the - * TTM eviction_valuable() callback not being passed a struct - * ttm_operation_context(). Future work might want to address this. - */ - struct task_struct *validating; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 52c7df4c3afd..cd03891654a1 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -39,7 +39,8 @@ * Register Immediate commands) once when initializing the device and saved in * the default context. That default context is then used on every context * creation to have a "primed golden context", i.e. a context image that - * already contains the changes needed to all the registers. + * already contains the changes needed to all the registers. See + * drivers/gpu/drm/xe/xe_lrc.c for default context handling. * * - Engine workarounds: the list of these WAs is applied whenever the specific * engine is reset. It's also possible that a set of engine classes share a @@ -48,10 +49,10 @@ * them need to keeep the workaround programming: the approach taken in the * driver is to tie those workarounds to the first compute/render engine that * is registered. When executing with GuC submission, engine resets are - * outside of kernel driver control, hence the list of registers involved in + * outside of kernel driver control, hence the list of registers involved is * written once, on engine initialization, and then passed to GuC, that * saves/restores their values before/after the reset takes place. See - * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. + * drivers/gpu/drm/xe/xe_guc_ads.c for reference. * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -66,21 +67,39 @@ * hardware on every HW context restore. These buffers are created and * programmed in the default context so the hardware always go through those * programming sequences when switching contexts. The support for workaround - * batchbuffers is enabled these hardware mechanisms: + * batchbuffers is enabled via these hardware mechanisms: * - * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default - * context, pointing the hardware to jump to that location when that offset - * is reached in the context restore. Workaround batchbuffer in the driver - * currently uses this mechanism for all platforms. + * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer + * and an offset are provided in the default context, pointing the hardware + * to jump to that location when that offset is reached in the context + * restore. When a context is being restored, this is executed after the + * ring context, in the middle (or beginning) of the engine context image. * - * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, - * pointing the hardware to a buffer to continue executing after the - * engine registers are restored in a context restore sequence. This is - * currently not used in the driver. + * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A + * batchbuffer is provided in the default context, pointing the hardware to + * a buffer to continue executing after the engine registers are restored + * in a context restore sequence. + * + * Below is the timeline for a context restore sequence: + * + * .. code:: + * + * INDIRECT_CTX_OFFSET + * |----------->| + * .------------.------------.-------------.------------.--------------.-----------. + * |Ring | Engine | Mid-context | Engine | Post-context | Ring | + * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution | + * `------------'------------'-------------'------------'--------------'-----------' * * - Other/OOB: There are WAs that, due to their nature, cannot be applied from * a central place. Those are peppered around the rest of the code, as needed. - * Workarounds related to the display IP are the main example. + * There's a central place to control which workarounds are enabled: + * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and + * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds. + * These files only record which workarounds are enabled: during early device + * initialization those rules are evaluated and recorded by the driver. Then + * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to + * implement them. * * .. [1] Technically, some registers are powercontext saved & restored, so they * survive a suspend/resume. In practice, writing them again is not too @@ -612,6 +631,13 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_not_sriov_vf), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, /* Xe2_LPM */ @@ -672,6 +698,13 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_not_sriov_vf), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -879,6 +912,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 338c344dcd7d..f3a6d5d239ce 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -49,7 +49,6 @@ 16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) -no_media_l3 MEDIA_VERSION_RANGE(3000, 3002) 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) 16021333562 GRAPHICS_VERSION_RANGE(1200, 1274) diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 274989ea1fb4..edc72052e27a 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*, sizes::SZ_16M, sync::Arc}; +use kernel::{ + auxiliary, c_str, + device::Core, + pci, + pci::{Class, ClassMask, Vendor}, + prelude::*, + sizes::SZ_16M, + sync::Arc, +}; use crate::gpu::Gpu; @@ -18,10 +26,25 @@ kernel::pci_device_table!( PCI_TABLE, MODULE_PCI_TABLE, <NovaCore as pci::Driver>::IdInfo, - [( - pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as u32), - () - )] + [ + // Modern NVIDIA GPUs will show up as either VGA or 3D controllers. + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_VGA, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_3D, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ] ); impl pci::Driver for NovaCore { @@ -34,14 +57,19 @@ impl pci::Driver for NovaCore { pdev.enable_device_mem()?; pdev.set_master(); - let bar = Arc::pin_init( + let devres_bar = Arc::pin_init( pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")), GFP_KERNEL, )?; + // Used to provided a `&Bar0` to `Gpu::new` without tying it to the lifetime of + // `devres_bar`. + let bar_clone = Arc::clone(&devres_bar); + let bar = bar_clone.access(pdev.as_ref())?; + let this = KBox::pin_init( try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar)?, + gpu <- Gpu::new(pdev, devres_bar, bar), _reg: auxiliary::Registration::new( pdev.as_ref(), c_str!("nova-drm"), @@ -54,4 +82,8 @@ impl pci::Driver for NovaCore { Ok(this) } + + fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { + this.gpu.unbind(pdev.as_ref()); + } } diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 50437c67c14a..37e6298195e4 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -4,16 +4,17 @@ use core::ops::Deref; use hal::FalconHal; -use kernel::bindings; use kernel::device; +use kernel::dma::DmaAddress; use kernel::prelude::*; +use kernel::sync::aref::ARef; use kernel::time::Delta; -use kernel::types::ARef; use crate::dma::DmaObject; use crate::driver::Bar0; use crate::gpu::Chipset; use crate::regs; +use crate::regs::macros::RegisterBase; use crate::util; pub(crate) mod gsp; @@ -274,14 +275,25 @@ impl From<bool> for FalconFbifMemType { } } -/// Trait defining the parameters of a given Falcon instance. -pub(crate) trait FalconEngine: Sync { - /// Base I/O address for the falcon, relative from which its registers are accessed. - const BASE: usize; +/// Type used to represent the `PFALCON` registers address base for a given falcon engine. +pub(crate) struct PFalconBase(()); + +/// Type used to represent the `PFALCON2` registers address base for a given falcon engine. +pub(crate) struct PFalcon2Base(()); + +/// Trait defining the parameters of a given Falcon engine. +/// +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used +/// to identify a given Falcon instance with register I/O methods. +pub(crate) trait FalconEngine: + Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized +{ + /// Singleton of the engine, used to identify it with register I/O methods. + const ID: Self; } /// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FalconLoadTarget { /// Offset from the start of the source object to copy from. pub(crate) src_start: u32, @@ -292,7 +304,7 @@ pub(crate) struct FalconLoadTarget { } /// Parameters for the falcon boot ROM. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct FalconBromParams { /// Offset in `DMEM`` of the firmware's signature. pub(crate) pkc_data_offset: u32, @@ -343,13 +355,13 @@ impl<E: FalconEngine + 'static> Falcon<E> { bar: &Bar0, need_riscv: bool, ) -> Result<Self> { - let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, E::BASE); + let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); // Check that the revision and security model contain valid values. let _ = hwcfg1.core_rev()?; let _ = hwcfg1.security_model()?; if need_riscv { - let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if !hwcfg2.riscv() { dev_err!( dev, @@ -369,7 +381,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. util::wait_on(Delta::from_millis(20), || { - if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE).mem_scrubbing_done() { + if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { Some(()) } else { None @@ -379,12 +391,12 @@ impl<E: FalconEngine + 'static> Falcon<E> { /// Reset the falcon engine. fn reset_eng(&self, bar: &Bar0) -> Result { - let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. let _ = util::wait_on(Delta::from_micros(150), || { - let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); if r.reset_ready() { Some(()) } else { @@ -392,13 +404,13 @@ impl<E: FalconEngine + 'static> Falcon<E> { } }); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true)); + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. let _: Result = util::wait_on(Delta::from_micros(10), || None); - regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(false)); + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); self.reset_wait_mem_scrubbing(bar)?; @@ -413,7 +425,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { regs::NV_PFALCON_FALCON_RM::default() .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } @@ -443,7 +455,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { fw.dma_handle_with_offset(load_offsets.src_start as usize)?, ), }; - if dma_start % bindings::dma_addr_t::from(DMA_LEN) > 0 { + if dma_start % DmaAddress::from(DMA_LEN) > 0 { dev_err!( self.dev, "DMA transfer start addresses must be a multiple of {}", @@ -451,44 +463,57 @@ impl<E: FalconEngine + 'static> Falcon<E> { ); return Err(EINVAL); } - if load_offsets.len % DMA_LEN > 0 { - dev_err!( - self.dev, - "DMA transfer length must be a multiple of {}", - DMA_LEN - ); - return Err(EINVAL); - } + + // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we + // need to perform. + let num_transfers = load_offsets.len.div_ceil(DMA_LEN); + + // Check that the area we are about to transfer is within the bounds of the DMA object. + // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`. + match num_transfers + .checked_mul(DMA_LEN) + .and_then(|size| size.checked_add(load_offsets.src_start)) + { + None => { + dev_err!(self.dev, "DMA transfer length overflow"); + return Err(EOVERFLOW); + } + Some(upper_bound) if upper_bound as usize > fw.size() => { + dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); + return Err(EINVAL); + } + Some(_) => (), + }; // Set up the base source DMA address. regs::NV_PFALCON_FALCON_DMATRFBASE::default() .set_base((dma_start >> 8) as u32) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFBASE1::default() .set_base((dma_start >> 40) as u16) - .write(bar, E::BASE); + .write(bar, &E::ID); let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() .set_size(DmaTrfCmdSize::Size256B) .set_imem(target_mem == FalconMem::Imem) .set_sec(if sec { 1 } else { 0 }); - for pos in (0..load_offsets.len).step_by(DMA_LEN as usize) { + for pos in (0..num_transfers).map(|i| i * DMA_LEN) { // Perform a transfer of size `DMA_LEN`. regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() .set_offs(load_offsets.dst_start + pos) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() .set_offs(src_start + pos) - .write(bar, E::BASE); - cmd.write(bar, E::BASE); + .write(bar, &E::ID); + cmd.write(bar, &E::ID); // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); if r.idle() { Some(()) } else { @@ -502,9 +527,9 @@ impl<E: FalconEngine + 'static> Falcon<E> { /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. pub(crate) fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { - regs::NV_PFALCON_FBIF_CTL::alter(bar, E::BASE, |v| v.set_allow_phys_no_ctx(true)); - regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, E::BASE); - regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, E::BASE, |v| { + regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { v.set_target(FalconFbifTarget::CoherentSysmem) .set_mem_type(FalconFbifMemType::Physical) }); @@ -517,7 +542,7 @@ impl<E: FalconEngine + 'static> Falcon<E> { // Set `BootVec` to start of non-secure code. regs::NV_PFALCON_FALCON_BOOTVEC::default() .set_value(fw.boot_addr()) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } @@ -538,27 +563,27 @@ impl<E: FalconEngine + 'static> Falcon<E> { if let Some(mbox0) = mbox0 { regs::NV_PFALCON_FALCON_MAILBOX0::default() .set_value(mbox0) - .write(bar, E::BASE); + .write(bar, &E::ID); } if let Some(mbox1) = mbox1 { regs::NV_PFALCON_FALCON_MAILBOX1::default() .set_value(mbox1) - .write(bar, E::BASE); + .write(bar, &E::ID); } - match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE).alias_en() { + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() .set_startcpu(true) - .write(bar, E::BASE), + .write(bar, &E::ID), false => regs::NV_PFALCON_FALCON_CPUCTL::default() .set_startcpu(true) - .write(bar, E::BASE), + .write(bar, &E::ID), } // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. util::wait_on(Delta::from_secs(2), || { - let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE); + let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); if r.halted() { Some(()) } else { @@ -567,8 +592,8 @@ impl<E: FalconEngine + 'static> Falcon<E> { })?; let (mbox0, mbox1) = ( - regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, E::BASE).value(), - regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, E::BASE).value(), + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), ); Ok((mbox0, mbox1)) diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs index d622e9a64470..f17599cb49fa 100644 --- a/drivers/gpu/nova-core/falcon/gsp.rs +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -2,23 +2,31 @@ use crate::{ driver::Bar0, - falcon::{Falcon, FalconEngine}, - regs, + falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, + regs::{self, macros::RegisterBase}, }; /// Type specifying the `Gsp` falcon engine. Cannot be instantiated. pub(crate) struct Gsp(()); -impl FalconEngine for Gsp { +impl RegisterBase<PFalconBase> for Gsp { const BASE: usize = 0x00110000; } +impl RegisterBase<PFalcon2Base> for Gsp { + const BASE: usize = 0x00111000; +} + +impl FalconEngine for Gsp { + const ID: Self = Gsp(()); +} + impl Falcon<Gsp> { /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to /// allow GSP to signal CPU for processing new messages in message queue. pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { regs::NV_PFALCON_FALCON_IRQSCLR::default() .set_swgen0(true) - .write(bar, Gsp::BASE); + .write(bar, &Gsp::ID); } } diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index b233bc365882..bba288455617 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -13,7 +13,7 @@ mod ga102; /// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] /// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing /// registers. -pub(crate) trait FalconHal<E: FalconEngine>: Sync { +pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync { /// Activates the Falcon core if the engine is a risvc/falcon dual engine. fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result { Ok(()) diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 52c33d3f22a8..0b1cbe7853b3 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -16,15 +16,15 @@ use crate::util; use super::FalconHal; fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { - let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { regs::NV_PRISCV_RISCV_BCR_CTRL::default() .set_core_select(PeregrineCoreSelect::Falcon) - .write(bar, E::BASE); + .write(bar, &E::ID); // TIMEOUT: falcon core should take less than 10ms to report being enabled. util::wait_on(Delta::from_millis(10), || { - let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); if r.valid() { Some(()) } else { @@ -42,50 +42,47 @@ fn signature_reg_fuse_version_ga102( engine_id_mask: u16, ucode_id: u8, ) -> Result<u32> { - // TODO[REGA]: The ucode fuse versions are contained in the - // FUSE_OPT_FPF_<ENGINE>_UCODE<X>_VERSION registers, which are an array. Our register - // definition macros do not allow us to manage them properly, so we need to hardcode their - // addresses for now. Clean this up once we support register arrays. + const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; // Each engine has 16 ucode version registers numbered from 1 to 16. - if ucode_id == 0 || ucode_id > 16 { - dev_err!(dev, "invalid ucode id {:#x}", ucode_id); - return Err(EINVAL); - } + let ucode_idx = match ucode_id { + 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + _ => { + dev_err!(dev, "invalid ucode id {:#x}", ucode_id); + return Err(EINVAL); + } + }; - // Base address of the FUSE registers array corresponding to the engine. - let reg_fuse_base = if engine_id_mask & 0x0001 != 0 { - regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::OFFSET + // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid + // at build-time. + let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { + regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() } else if engine_id_mask & 0x0004 != 0 { - regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::OFFSET + regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() } else if engine_id_mask & 0x0400 != 0 { - regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::OFFSET + regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() } else { dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask); return Err(EINVAL); }; - // Read `reg_fuse_base[ucode_id - 1]`. - let reg_fuse_version = - bar.read32(reg_fuse_base + ((ucode_id - 1) as usize * core::mem::size_of::<u32>())); - // TODO[NUMM]: replace with `last_set_bit` once it lands. - Ok(u32::BITS - reg_fuse_version.leading_zeros()) + Ok(u16::BITS - reg_fuse_version.leading_zeros()) } fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result { regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() .set_value(params.pkc_data_offset) - .write(bar, E::BASE); + .write(bar, &E::ID, 0); regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() .set_value(u32::from(params.engine_id_mask)) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() .set_ucode_id(params.ucode_id) - .write(bar, E::BASE); + .write(bar, &E::ID); regs::NV_PFALCON2_FALCON_MOD_SEL::default() .set_algo(FalconModSelAlgo::Rsa3k) - .write(bar, E::BASE); + .write(bar, &E::ID); Ok(()) } diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs index 5147d9e2a7fe..815786c8480d 100644 --- a/drivers/gpu/nova-core/falcon/sec2.rs +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -1,10 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 -use crate::falcon::FalconEngine; +use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; +use crate::regs::macros::RegisterBase; /// Type specifying the `Sec2` falcon engine. Cannot be instantiated. pub(crate) struct Sec2(()); -impl FalconEngine for Sec2 { +impl RegisterBase<PFalconBase> for Sec2 { const BASE: usize = 0x00840000; } + +impl RegisterBase<PFalcon2Base> for Sec2 { + const BASE: usize = 0x00841000; +} + +impl FalconEngine for Sec2 { + const ID: Self = Sec2(()); +} diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 4a702525fff4..27d9edab8347 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -3,8 +3,9 @@ use core::ops::Range; use kernel::prelude::*; +use kernel::ptr::{Alignable, Alignment}; use kernel::sizes::*; -use kernel::types::ARef; +use kernel::sync::aref::ARef; use kernel::{dev_warn, device}; use crate::dma::DmaObject; @@ -130,10 +131,9 @@ impl FbLayout { }; let frts = { - const FRTS_DOWN_ALIGN: u64 = SZ_128K as u64; + const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>(); const FRTS_SIZE: u64 = SZ_1M as u64; - // TODO[NUMM]: replace with `align_down` once it lands. - let frts_base = (vga_workspace.start & !(FRTS_DOWN_ALIGN - 1)) - FRTS_SIZE; + let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; frts_base..frts_base + FRTS_SIZE }; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 2931912ddba0..4179a74a2342 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -4,48 +4,36 @@ //! to be loaded into a given execution unit. use core::marker::PhantomData; +use core::mem::size_of; use kernel::device; use kernel::firmware; use kernel::prelude::*; use kernel::str::CString; +use kernel::transmute::FromBytes; use crate::dma::DmaObject; use crate::falcon::FalconFirmware; use crate::gpu; -use crate::gpu::Chipset; +pub(crate) mod booter; pub(crate) mod fwsec; - -pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; - -/// Structure encapsulating the firmware blobs required for the GPU to operate. -#[expect(dead_code)] -pub(crate) struct Firmware { - booter_load: firmware::Firmware, - booter_unload: firmware::Firmware, - bootloader: firmware::Firmware, - gsp: firmware::Firmware, -} - -impl Firmware { - pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result<Firmware> { - let mut chip_name = CString::try_from_fmt(fmt!("{chipset}"))?; - chip_name.make_ascii_lowercase(); - let chip_name = &*chip_name; - - let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name_}-{ver}.bin")) - .and_then(|path| firmware::Firmware::request(&path, dev)) - }; - - Ok(Firmware { - booter_load: request("booter_load")?, - booter_unload: request("booter_unload")?, - bootloader: request("bootloader")?, - gsp: request("gsp")?, - }) - } +pub(crate) mod gsp; +pub(crate) mod riscv; + +pub(crate) const FIRMWARE_VERSION: &str = "570.144"; + +/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. +fn request_firmware( + dev: &device::Device, + chipset: gpu::Chipset, + name: &str, + ver: &str, +) -> Result<firmware::Firmware> { + let chip_name = chipset.name(); + + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin")) + .and_then(|path| firmware::Firmware::request(&path, dev)) } /// Structure used to describe some firmwares, notably FWSEC-FRTS. @@ -150,6 +138,65 @@ impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> { } } +/// Header common to most firmware files. +#[repr(C)] +#[derive(Debug, Clone)] +struct BinHdr { + /// Magic number, must be `0x10de`. + bin_magic: u32, + /// Version of the header. + bin_ver: u32, + /// Size in bytes of the binary (to be ignored). + bin_size: u32, + /// Offset of the start of the application-specific header. + header_offset: u32, + /// Offset of the start of the data payload. + data_offset: u32, + /// Size in bytes of the data payload. + data_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for BinHdr {} + +// A firmware blob starting with a `BinHdr`. +struct BinFirmware<'a> { + hdr: BinHdr, + fw: &'a [u8], +} + +impl<'a> BinFirmware<'a> { + /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the + /// corresponding [`BinFirmware`] that can be used to extract its payload. + fn new(fw: &'a firmware::Firmware) -> Result<Self> { + const BIN_MAGIC: u32 = 0x10de; + let fw = fw.data(); + + fw.get(0..size_of::<BinHdr>()) + // Extract header. + .and_then(BinHdr::from_bytes_copy) + // Validate header. + .and_then(|hdr| { + if hdr.bin_magic == BIN_MAGIC { + Some(hdr) + } else { + None + } + }) + .map(|hdr| Self { hdr, fw }) + .ok_or(EINVAL) + } + + /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of + /// the firmware image. + fn data(&self) -> Option<&[u8]> { + let fw_start = self.hdr.data_offset as usize; + let fw_size = self.hdr.data_size as usize; + + self.fw.get(fw_start..fw_start + fw_size) + } +} + pub(crate) struct ModInfoBuilder<const N: usize>(firmware::ModInfoBuilder<N>); impl<const N: usize> ModInfoBuilder<N> { @@ -180,8 +227,8 @@ impl<const N: usize> ModInfoBuilder<N> { let mut this = Self(firmware::ModInfoBuilder::new(module_name)); let mut i = 0; - while i < gpu::Chipset::NAMES.len() { - this = this.make_entry_chipset(gpu::Chipset::NAMES[i]); + while i < gpu::Chipset::ALL.len() { + this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); i += 1; } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs new file mode 100644 index 000000000000..b4ff1b17e4a0 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware +//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon +//! (and optionally unload it through a separate firmware image). + +use core::marker::PhantomData; +use core::mem::size_of; +use core::ops::Deref; + +use kernel::device; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::sec2::Sec2; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::gpu::Chipset; + +/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at +/// `offset` in `slice`. +fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> { + slice + .get(offset..offset + size_of::<S>()) + .and_then(S::from_bytes_copy) + .ok_or(EINVAL) +} + +/// Heavy-Secured firmware header. +/// +/// Such firmwares have an application-specific payload that needs to be patched with a given +/// signature. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsHeaderV2 { + /// Offset to the start of the signatures. + sig_prod_offset: u32, + /// Size in bytes of the signatures. + sig_prod_size: u32, + /// Offset to a `u32` containing the location at which to patch the signature in the microcode + /// image. + patch_loc_offset: u32, + /// Offset to a `u32` containing the index of the signature to patch. + patch_sig_offset: u32, + /// Start offset to the signature metadata. + meta_data_offset: u32, + /// Size in bytes of the signature metadata. + meta_data_size: u32, + /// Offset to a `u32` containing the number of signatures in the signatures section. + num_sig_offset: u32, + /// Offset of the application-specific header. + header_offset: u32, + /// Size in bytes of the application-specific header. + header_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsHeaderV2 {} + +/// Heavy-Secured Firmware image container. +/// +/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to +/// read from in the firmware data. +struct HsFirmwareV2<'a> { + hdr: HsHeaderV2, + fw: &'a [u8], +} + +impl<'a> HsFirmwareV2<'a> { + /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of + /// `HsFirmwareV2` for further parsing. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> { + frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset as usize) + .map(|hdr| Self { hdr, fw: bin_fw.fw }) + } + + /// Returns the location at which the signatures should be patched in the microcode image. + /// + /// Fails if the offset of the patch location is outside the bounds of the firmware + /// image. + fn patch_location(&self) -> Result<u32> { + frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset as usize) + } + + /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the + /// firmware is unsigned. + /// + /// Fails if the pointed signatures are outside the bounds of the firmware image. + fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> { + let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset as usize)?; + let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { + // If there are no signatures, return an iterator that will yield zero elements. + None => (&[] as &[u8]).chunks_exact(1), + Some(sig_size) => { + let patch_sig = frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset as usize)?; + let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + + self.fw + // Get signatures range. + .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .ok_or(EINVAL)? + .chunks_exact(sig_size as usize) + } + }; + + // Map the byte slices into signatures. + Ok(iter.map(BooterSignature)) + } +} + +/// Signature parameters, as defined in the firmware. +#[repr(C)] +struct HsSignatureParams { + /// Fuse version to use. + fuse_ver: u32, + /// Mask of engine IDs this firmware applies to. + engine_id_mask: u32, + /// ID of the microcode. + ucode_id: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsSignatureParams {} + +impl HsSignatureParams { + /// Returns the signature parameters contained in `hs_fw`. + /// + /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or + /// if its size doesn't match that of [`HsSignatureParams`]. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + let start = hs_fw.hdr.meta_data_offset as usize; + let end = start + .checked_add(hs_fw.hdr.meta_data_size as usize) + .ok_or(EINVAL)?; + + hs_fw + .fw + .get(start..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// Header for code and data load offsets. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2 { + // Offset at which the code starts. + os_code_offset: u32, + // Total size of the code, for all apps. + os_code_size: u32, + // Offset at which the data starts. + os_data_offset: u32, + // Size of the data. + os_data_size: u32, + // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`]. + num_apps: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2 {} + +impl HsLoadHeaderV2 { + /// Returns the load header contained in `hs_fw`. + /// + /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset as usize) + } +} + +/// Header for app code loader. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2App { + /// Offset at which to load the app code. + offset: u32, + /// Length in bytes of the app code. + len: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2App {} + +impl HsLoadHeaderV2App { + /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`. + /// + /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is + /// not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> { + let load_hdr = HsLoadHeaderV2::new(hs_fw)?; + if idx >= load_hdr.num_apps { + Err(EINVAL) + } else { + frombytes_at::<Self>( + hs_fw.fw, + (hs_fw.hdr.header_offset as usize) + // Skip the load header... + .checked_add(size_of::<HsLoadHeaderV2>()) + // ... and jump to app header `idx`. + .and_then(|offset| { + offset.checked_add((idx as usize).checked_mul(size_of::<Self>())?) + }) + .ok_or(EINVAL)?, + ) + } + } +} + +/// Signature for Booter firmware. Their size is encoded into the header and not known a compile +/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`]. +struct BooterSignature<'a>(&'a [u8]); + +impl<'a> AsRef<[u8]> for BooterSignature<'a> { + fn as_ref(&self) -> &[u8] { + self.0 + } +} + +impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {} + +/// The `Booter` loader firmware, responsible for loading the GSP. +pub(crate) struct BooterFirmware { + // Load parameters for `IMEM` falcon memory. + imem_load_target: FalconLoadTarget, + // Load parameters for `DMEM` falcon memory. + dmem_load_target: FalconLoadTarget, + // BROM falcon parameters. + brom_params: FalconBromParams, + // Device-mapped firmware image. + ucode: FirmwareDmaObject<Self, Signed>, +} + +impl FirmwareDmaObject<BooterFirmware, Unsigned> { + fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { + DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum BooterKind { + Loader, + #[expect(unused)] + Unloader, +} + +impl BooterFirmware { + /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is + /// ready to be loaded and run on `falcon`. + pub(crate) fn new( + dev: &device::Device<device::Bound>, + kind: BooterKind, + chipset: Chipset, + ver: &str, + falcon: &Falcon<<Self as FalconFirmware>::Target>, + bar: &Bar0, + ) -> Result<Self> { + let fw_name = match kind { + BooterKind::Loader => "booter_load", + BooterKind::Unloader => "booter_unload", + }; + let fw = super::request_firmware(dev, chipset, fw_name, ver)?; + let bin_fw = BinFirmware::new(&fw)?; + + // The binary firmware embeds a Heavy-Secured firmware. + let hs_fw = HsFirmwareV2::new(&bin_fw)?; + + // The Heavy-Secured firmware embeds a firmware load descriptor. + let load_hdr = HsLoadHeaderV2::new(&hs_fw)?; + + // Offset in `ucode` where to patch the signature. + let patch_loc = hs_fw.patch_location()?; + + let sig_params = HsSignatureParams::new(&hs_fw)?; + let brom_params = FalconBromParams { + // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the + // signature patch location. + pkc_data_offset: patch_loc + .checked_sub(load_hdr.os_data_offset) + .ok_or(EINVAL)?, + engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?, + ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?, + }; + let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?; + + // Object containing the firmware microcode to be signature-patched. + let ucode = bin_fw + .data() + .ok_or(EINVAL) + .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?; + + let ucode_signed = { + let mut signatures = hs_fw.signatures_iter()?.peekable(); + + if signatures.peek().is_none() { + // If there are no signatures, then the firmware is unsigned. + ucode.no_patch_signature() + } else { + // Obtain the version from the fuse register, and extract the corresponding + // signature. + let reg_fuse_version = falcon.signature_reg_fuse_version( + bar, + brom_params.engine_id_mask, + brom_params.ucode_id, + )?; + + // `0` means the last signature should be used. + const FUSE_VERSION_USE_LAST_SIG: u32 = 0; + let signature = match reg_fuse_version { + FUSE_VERSION_USE_LAST_SIG => signatures.last(), + // Otherwise hardware fuse version needs to be subtracted to obtain the index. + reg_fuse_version => { + let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else { + dev_err!(dev, "invalid fuse version for Booter firmware\n"); + return Err(EINVAL); + }; + signatures.nth(idx as usize) + } + } + .ok_or(EINVAL)?; + + ucode.patch_signature(&signature, patch_loc as usize)? + } + }; + + Ok(Self { + imem_load_target: FalconLoadTarget { + src_start: app0.offset, + dst_start: 0, + len: app0.len, + }, + dmem_load_target: FalconLoadTarget { + src_start: load_hdr.os_data_offset, + dst_start: 0, + len: load_hdr.os_data_size, + }, + brom_params, + ucode: ucode_signed, + }) + } +} + +impl FalconLoadParams for BooterFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + self.imem_load_target.clone() + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + self.dmem_load_target.clone() + } + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + self.imem_load_target.src_start + } +} + +impl Deref for BooterFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; +} diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 0dff3cfa90af..8edbb5c0572c 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -202,9 +202,6 @@ pub(crate) struct FwsecFirmware { ucode: FirmwareDmaObject<Self, Signed>, } -// We need to load full DMEM pages. -const DMEM_LOAD_SIZE_ALIGN: u32 = 256; - impl FalconLoadParams for FwsecFirmware { fn imem_load_params(&self) -> FalconLoadTarget { FalconLoadTarget { @@ -218,11 +215,7 @@ impl FalconLoadParams for FwsecFirmware { FalconLoadTarget { src_start: self.desc.imem_load_size, dst_start: self.desc.dmem_phys_base, - // TODO[NUMM]: replace with `align_up` once it lands. - len: self - .desc - .dmem_load_size - .next_multiple_of(DMEM_LOAD_SIZE_ALIGN), + len: self.desc.dmem_load_size, } } @@ -253,8 +246,8 @@ impl FalconFirmware for FwsecFirmware { impl FirmwareDmaObject<FwsecFirmware, Unsigned> { fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { - let desc = bios.fwsec_image().header(dev)?; - let ucode = bios.fwsec_image().ucode(dev, desc)?; + let desc = bios.fwsec_image().header()?; + let ucode = bios.fwsec_image().ucode(desc)?; let mut dma_object = DmaObject::from_data(dev, ucode)?; let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; @@ -343,7 +336,7 @@ impl FwsecFirmware { let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?; // Patch signature if needed. - let desc = bios.fwsec_image().header(dev)?; + let desc = bios.fwsec_image().header()?; let ucode_signed = if desc.signature_count != 0 { let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; let desc_sig_versions = u32::from(desc.signature_versions); @@ -382,7 +375,7 @@ impl FwsecFirmware { dev_dbg!(dev, "patching signature with index {}\n", signature_idx); let signature = bios .fwsec_image() - .sigs(dev, desc) + .sigs(desc) .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; ucode_dma.patch_signature(signature, sig_base_img)? diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs new file mode 100644 index 000000000000..9b70095434c6 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::mem::size_of_val; + +use kernel::device; +use kernel::dma::{DataDirection, DmaAddress}; +use kernel::kvec; +use kernel::prelude::*; +use kernel::scatterlist::{Owned, SGTable}; + +use crate::dma::DmaObject; +use crate::firmware::riscv::RiscvFirmware; +use crate::gpu::{Architecture, Chipset}; +use crate::gsp::GSP_PAGE_SIZE; + +/// Ad-hoc and temporary module to extract sections from ELF images. +/// +/// Some firmware images are currently packaged as ELF files, where sections names are used as keys +/// to specific and related bits of data. Future firmware versions are scheduled to move away from +/// that scheme before nova-core becomes stable, which means this module will eventually be +/// removed. +mod elf { + use core::mem::size_of; + + use kernel::bindings; + use kernel::str::CStr; + use kernel::transmute::FromBytes; + + /// Newtype to provide a [`FromBytes`] implementation. + #[repr(transparent)] + struct Elf64Hdr(bindings::elf64_hdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64Hdr {} + + #[repr(transparent)] + struct Elf64SHdr(bindings::elf64_shdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64SHdr {} + + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::<bindings::elf64_hdr>()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::<Elf64SHdr>()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find(|&sh| { + let Some(hdr) = Elf64SHdr::from_bytes(sh) else { + return false; + }; + + let Some(name_idx) = strhdr + .0 + .sh_offset + .checked_add(u64::from(hdr.0.sh_name)) + .and_then(|idx| usize::try_from(idx).ok()) + else { + return false; + }; + + // Get the start of the name. + elf.get(name_idx..) + // Stop at the first `0`. + .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b == 0)?)) + // Convert into CStr. This should never fail because of the line above. + .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok()) + // Convert into str. + .and_then(|c_str| c_str.to_str().ok()) + // Check that the name matches. + .map(|str| str == name) + .unwrap_or(false) + }) + // Return the slice containing the section. + .and_then(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} + +/// GSP firmware with 3-level radix page tables for the GSP bootloader. +/// +/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address +/// space: +/// +/// ```text +/// Level 0: 1 page, 1 entry -> points to first level 1 page +/// Level 1: Multiple pages/entries -> each entry points to a level 2 page +/// Level 2: Multiple pages/entries -> each entry points to a firmware page +/// ``` +/// +/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address). +/// Also known as "Radix3" firmware. +#[pin_data] +pub(crate) struct GspFirmware { + /// The GSP firmware inside a [`VVec`], device-mapped via a SG table. + #[pin] + fw: SGTable<Owned<VVec<u8>>>, + /// Level 2 page table whose entries contain DMA addresses of firmware pages. + #[pin] + level2: SGTable<Owned<VVec<u8>>>, + /// Level 1 page table whose entries contain DMA addresses of level 2 pages. + #[pin] + level1: SGTable<Owned<VVec<u8>>>, + /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. + level0: DmaObject, + /// Size in bytes of the firmware contained in [`Self::fw`]. + size: usize, + /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. + signatures: DmaObject, + /// GSP bootloader, verifies the GSP firmware before loading and running it. + bootloader: RiscvFirmware, +} + +impl GspFirmware { + /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page + /// tables expected by the GSP bootloader to load it. + pub(crate) fn new<'a, 'b>( + dev: &'a device::Device<device::Bound>, + chipset: Chipset, + ver: &'b str, + ) -> Result<impl PinInit<Self, Error> + 'a> { + let fw = super::request_firmware(dev, chipset, "gsp", ver)?; + + let fw_section = elf::elf64_section(fw.data(), ".fwimage").ok_or(EINVAL)?; + + let sigs_section = match chipset.arch() { + Architecture::Ampere => ".fwsignature_ga10x", + _ => return Err(ENOTSUPP), + }; + let signatures = elf::elf64_section(fw.data(), sigs_section) + .ok_or(EINVAL) + .and_then(|data| DmaObject::from_data(dev, data))?; + + let size = fw_section.len(); + + // Move the firmware into a vmalloc'd vector and map it into the device address + // space. + let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL) + .and_then(|mut v| { + v.extend_from_slice(fw_section, GFP_KERNEL)?; + Ok(v) + }) + .map_err(|_| ENOMEM)?; + + let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; + let bootloader = RiscvFirmware::new(dev, &bl)?; + + Ok(try_pin_init!(Self { + fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), + level2 <- { + // Allocate the level 2 page table, map the firmware onto it, and map it into the + // device address space. + VVec::<u8>::with_capacity( + fw.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level2| map_into_lvl(&fw, level2)) + .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))? + }, + level1 <- { + // Allocate the level 1 page table, map the level 2 page table onto it, and map it + // into the device address space. + VVec::<u8>::with_capacity( + level2.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level1| map_into_lvl(&level2, level1)) + .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))? + }, + level0: { + // Allocate the level 0 page table as a device-visible DMA object, and map the + // level 1 page table onto it. + + // Level 0 page table data. + let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; + + // Fill level 1 page entry. + #[allow(clippy::useless_conversion)] + let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); + let dst = &mut level0_data[..size_of_val(&level1_entry)]; + dst.copy_from_slice(&level1_entry.to_le_bytes()); + + // Turn the level0 page table into a [`DmaObject`]. + DmaObject::from_data(dev, &level0_data)? + }, + size, + signatures, + bootloader, + })) + } + + #[expect(unused)] + /// Returns the DMA handle of the radix3 level 0 page table. + pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { + self.level0.dma_handle() + } +} + +/// Build a page table from a scatter-gather list. +/// +/// Takes each DMA-mapped region from `sg_table` and writes page table entries +/// for all 4KB pages within that region. For example, a 16KB SG entry becomes +/// 4 consecutive page table entries. +fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> { + for sg_entry in sg_table.iter() { + // Number of pages we need to map. + let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + + for i in 0..num_pages { + let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; + } + } + + Ok(dst) +} diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs new file mode 100644 index 000000000000..afb08f5bc4ba --- /dev/null +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a +//! dedicated header. + +use core::mem::size_of; + +use kernel::device; +use kernel::firmware::Firmware; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::firmware::BinFirmware; + +/// Descriptor for microcode running on a RISC-V core. +#[repr(C)] +#[derive(Debug)] +struct RmRiscvUCodeDesc { + version: u32, + bootloader_offset: u32, + bootloader_size: u32, + bootloader_param_offset: u32, + bootloader_param_size: u32, + riscv_elf_offset: u32, + riscv_elf_size: u32, + app_version: u32, + manifest_offset: u32, + manifest_size: u32, + monitor_data_offset: u32, + monitor_data_size: u32, + monitor_code_offset: u32, + monitor_code_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for RmRiscvUCodeDesc {} + +impl RmRiscvUCodeDesc { + /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { + let offset = bin_fw.hdr.header_offset as usize; + + bin_fw + .fw + .get(offset..offset + size_of::<Self>()) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// A parsed firmware for a RISC-V core, ready to be loaded and run. +#[expect(unused)] +pub(crate) struct RiscvFirmware { + /// Offset at which the code starts in the firmware image. + code_offset: u32, + /// Offset at which the data starts in the firmware image. + data_offset: u32, + /// Offset at which the manifest starts in the firmware image. + manifest_offset: u32, + /// Application version. + app_version: u32, + /// Device-mapped firmware image. + ucode: DmaObject, +} + +impl RiscvFirmware { + /// Parses the RISC-V firmware image contained in `fw`. + pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> { + let bin_fw = BinFirmware::new(fw)?; + + let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; + + let ucode = { + let start = bin_fw.hdr.data_offset as usize; + let len = bin_fw.hdr.data_size as usize; + + DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? + }; + + Ok(Self { + ucode, + code_offset: riscv_desc.monitor_code_offset, + data_offset: riscv_desc.monitor_data_offset, + manifest_offset: riscv_desc.manifest_offset, + app_version: riscv_desc.app_version, + }) + } +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index b5c9786619a9..af20e2daea24 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -1,18 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{device, devres::Devres, error::code::*, pci, prelude::*, sync::Arc}; +use kernel::{device, devres::Devres, error::code::*, fmt, pci, prelude::*, sync::Arc}; use crate::driver::Bar0; -use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; -use crate::fb::FbLayout; +use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; use crate::fb::SysmemFlush; -use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware}; -use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; +use crate::gsp::Gsp; use crate::regs; -use crate::util; -use crate::vbios::Vbios; -use core::fmt; macro_rules! define_chipset { ({ $($variant:ident = $value:expr),* $(,)* }) => @@ -28,13 +23,23 @@ macro_rules! define_chipset { $( Chipset::$variant, )* ]; - pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [ - $( util::const_bytes_to_str( - util::to_lowercase_bytes::<{ stringify!($variant).len() }>( - stringify!($variant) - ).as_slice() - ), )* - ]; + ::kernel::macros::paste!( + /// Returns the name of this chipset, in lowercase. + /// + /// # Examples + /// + /// ``` + /// let chipset = Chipset::GA102; + /// assert_eq!(chipset.name(), "ga102"); + /// ``` + pub(crate) const fn name(&self) -> &'static str { + match *self { + $( + Chipset::$variant => stringify!([<$variant:lower>]), + )* + } + } + ); } // TODO[FPRI]: replace with something like derive(FromPrimitive) @@ -163,150 +168,74 @@ impl Spec { } /// Structure holding the resources required to operate the GPU. -#[pin_data(PinnedDrop)] +#[pin_data] pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 bar: Arc<Devres<Bar0>>, - fw: Firmware, /// System memory page required for flushing all pending GPU-side memory writes done through /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). sysmem_flush: SysmemFlush, -} - -#[pinned_drop] -impl PinnedDrop for Gpu { - fn drop(self: Pin<&mut Self>) { - // Unregister the sysmem flush page before we release it. - self.bar - .try_access_with(|b| self.sysmem_flush.unregister(b)); - } + /// GSP falcon instance, used for GSP boot up and cleanup. + gsp_falcon: Falcon<GspFalcon>, + /// SEC2 falcon instance, used for GSP boot up and cleanup. + sec2_falcon: Falcon<Sec2Falcon>, + /// GSP runtime data. Temporarily an empty placeholder. + #[pin] + gsp: Gsp, } impl Gpu { - /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly - /// created the WPR2 region. - /// - /// TODO: this needs to be moved into a larger type responsible for booting the whole GSP - /// (`GspBooter`?). - fn run_fwsec_frts( - dev: &device::Device<device::Bound>, - falcon: &Falcon<Gsp>, - bar: &Bar0, - bios: &Vbios, - fb_layout: &FbLayout, - ) -> Result<()> { - // Check that the WPR2 region does not already exists - if it does, we cannot run - // FWSEC-FRTS until the GPU is reset. - if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { - dev_err!( - dev, - "WPR2 region already exists - GPU needs to be reset to proceed\n" - ); - return Err(EBUSY); - } + pub(crate) fn new<'a>( + pdev: &'a pci::Device<device::Bound>, + devres_bar: Arc<Devres<Bar0>>, + bar: &'a Bar0, + ) -> impl PinInit<Self, Error> + 'a { + try_pin_init!(Self { + spec: Spec::new(bar).inspect(|spec| { + dev_info!( + pdev.as_ref(), + "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", + spec.chipset, + spec.chipset.arch(), + spec.revision + ); + })?, - let fwsec_frts = FwsecFirmware::new( - dev, - falcon, - bar, - bios, - FwsecCommand::Frts { - frts_addr: fb_layout.frts.start, - frts_size: fb_layout.frts.end - fb_layout.frts.start, + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; }, - )?; - // Run FWSEC-FRTS to create the WPR2 region. - fwsec_frts.run(dev, falcon, bar)?; + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, - // SCRATCH_E contains the error code for FWSEC-FRTS. - let frts_status = regs::NV_PBUS_SW_SCRATCH_0E::read(bar).frts_err_code(); - if frts_status != 0 { - dev_err!( - dev, - "FWSEC-FRTS returned with error code {:#x}", - frts_status - ); + gsp_falcon: Falcon::new( + pdev.as_ref(), + spec.chipset, + bar, + spec.chipset > Chipset::GA100, + ) + .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, - return Err(EIO); - } + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, - // Check that the WPR2 region has been created as we requested. - let (wpr2_lo, wpr2_hi) = ( - regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), - regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), - ); + gsp <- Gsp::new(), - match (wpr2_lo, wpr2_hi) { - (_, 0) => { - dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, - Err(EIO) - } - (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { - dev_err!( - dev, - "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", - wpr2_lo, - fb_layout.frts.start, - ); - - Err(EIO) - } - (wpr2_lo, wpr2_hi) => { - dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); - dev_dbg!(dev, "GPU instance built\n"); - - Ok(()) - } - } + bar: devres_bar, + }) } - pub(crate) fn new( - pdev: &pci::Device<device::Bound>, - devres_bar: Arc<Devres<Bar0>>, - ) -> Result<impl PinInit<Self>> { - let bar = devres_bar.access(pdev.as_ref())?; - let spec = Spec::new(bar)?; - let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; - - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); - - // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. - gfw::wait_gfw_boot_completion(bar) - .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; - - let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?; - - let gsp_falcon = Falcon::<Gsp>::new( - pdev.as_ref(), - spec.chipset, - bar, - spec.chipset > Chipset::GA100, - )?; - gsp_falcon.clear_swgen0_intr(bar); - - let _sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?; - - let fb_layout = FbLayout::new(spec.chipset, bar)?; - dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); - - let bios = Vbios::new(pdev, bar)?; - - Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; - - Ok(pin_init!(Self { - spec, - bar: devres_bar, - fw, - sysmem_flush, - })) + /// Called when the corresponding [`Device`](device::Device) is unbound. + /// + /// Note: This method must only be called from `Driver::unbind`. + pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) { + kernel::warn_on!(self + .bar + .access(dev) + .inspect(|bar| self.sysmem_flush.unregister(bar)) + .is_err()); } } diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs new file mode 100644 index 000000000000..64e472e7a9d3 --- /dev/null +++ b/drivers/gpu/nova-core/gsp.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod boot; + +use kernel::prelude::*; + +mod fw; + +pub(crate) const GSP_PAGE_SHIFT: usize = 12; +pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; + +/// GSP runtime data. +/// +/// This is an empty pinned placeholder for now. +#[pin_data] +pub(crate) struct Gsp {} + +impl Gsp { + pub(crate) fn new() -> impl PinInit<Self> { + pin_init!(Self {}) + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs new file mode 100644 index 000000000000..2800f3aee37d --- /dev/null +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::device; +use kernel::pci; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; +use crate::fb::FbLayout; +use crate::firmware::{ + booter::{BooterFirmware, BooterKind}, + fwsec::{FwsecCommand, FwsecFirmware}, + gsp::GspFirmware, + FIRMWARE_VERSION, +}; +use crate::gpu::Chipset; +use crate::regs; +use crate::vbios::Vbios; + +impl super::Gsp { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + fn run_fwsec_frts( + dev: &device::Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; + + // Run FWSEC-FRTS to create the WPR2 region. + fwsec_frts.run(dev, falcon, bar)?; + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), + regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + + /// Attempt to boot the GSP. + /// + /// This is a GPU-dependent and complex procedure that involves loading firmware files from + /// user-space, patching them with signatures, and building firmware-specific intricate data + /// structures that the GSP will use at runtime. + /// + /// Upon return, the GSP is up and running, and its runtime object given as return value. + pub(crate) fn boot( + self: Pin<&mut Self>, + pdev: &pci::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + gsp_falcon: &Falcon<Gsp>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result { + let dev = pdev.as_ref(); + + let bios = Vbios::new(dev, bar)?; + + let _gsp_fw = KBox::pin_init( + GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, + GFP_KERNEL, + )?; + + let fb_layout = FbLayout::new(chipset, bar)?; + dev_dbg!(dev, "{:#x?}\n", fb_layout); + + Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + + let _booter_loader = BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?; + + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs new file mode 100644 index 000000000000..34226dd00982 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod r570_144; + +// Alias to avoid repeating the version number with every use. +#[expect(unused)] +use r570_144 as bindings; diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs new file mode 100644 index 000000000000..35cb0370a7c9 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This module may not be directly used. Please abstract or re-export the needed symbols in the +//! parent module instead. + +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + dead_code, + unused_imports, + clippy::all, + clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, + clippy::ref_as_ptr, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] +use kernel::ffi; +include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs new file mode 100644 index 000000000000..cec594032515 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -0,0 +1 @@ +// SPDX-License-Identifier: GPL-2.0 diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index cb2bbb30cba1..fffcaee2249f 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -9,6 +9,7 @@ mod fb; mod firmware; mod gfw; mod gpu; +mod gsp; mod regs; mod util; mod vbios; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index d49fddf6a3c6..206dab2e1335 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -5,11 +5,11 @@ #![allow(non_camel_case_types)] #[macro_use] -mod macros; +pub(crate) mod macros; use crate::falcon::{ DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, - FalconModSelAlgo, FalconSecurityModel, PeregrineCoreSelect, + FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, }; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; @@ -28,7 +28,7 @@ impl NV_PMC_BOOT_0 { /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. pub(crate) fn architecture(self) -> Result<Architecture> { Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0.len()), + self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), ) } @@ -36,7 +36,8 @@ impl NV_PMC_BOOT_0 { pub(crate) fn chipset(self) -> Result<Chipset> { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | u32::from(self.implementation()) + ((arch as u32) << Self::IMPLEMENTATION_RANGE.len()) + | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } @@ -44,8 +45,10 @@ impl NV_PMC_BOOT_0 { // PBUS -// TODO[REGA]: this is an array of registers. -register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 { +register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); + +register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], + "scratch register 0xe used as FRTS firmware error code" { 31:16 frts_err_code as u16; }); @@ -123,13 +126,12 @@ register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; }); -// TODO[REGA]: This is an array of registers. -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234 { - 31:0 value as u32; -}); +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); register!( - NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05, + NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], "Scratch group 05 register 0 used as GFW boot progress indicator" { 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; } @@ -180,38 +182,40 @@ impl NV_PDISP_VGA_WORKSPACE_BASE { // FUSE -register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 { +pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; + +register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); -register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140 { +register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); -register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0 { +register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { 15:0 data as u16; }); // PFALCON -register!(NV_PFALCON_FALCON_IRQSCLR @ +0x00000004 { +register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { 4:4 halt as bool; 6:6 swgen0 as bool; }); -register!(NV_PFALCON_FALCON_MAILBOX0 @ +0x00000040 { +register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_MAILBOX1 @ +0x00000044 { +register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_RM @ +0x00000084 { +register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_HWCFG2 @ +0x000000f4 { +register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { 10:10 riscv as bool; 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; @@ -224,17 +228,17 @@ impl NV_PFALCON_FALCON_HWCFG2 { } } -register!(NV_PFALCON_FALCON_CPUCTL @ +0x00000100 { +register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { 1:1 startcpu as bool; 4:4 halted as bool; 6:6 alias_en as bool; }); -register!(NV_PFALCON_FALCON_BOOTVEC @ +0x00000104 { +register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { 31:0 value as u32; }); -register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c { +register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { 0:0 require_ctx as bool; 1:1 dmem_scrubbing as bool; 2:2 imem_scrubbing as bool; @@ -242,15 +246,15 @@ register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c { 7:7 secure_stat as bool; }); -register!(NV_PFALCON_FALCON_DMATRFBASE @ +0x00000110 { +register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { 31:0 base as u32; }); -register!(NV_PFALCON_FALCON_DMATRFMOFFS @ +0x00000114 { +register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { 23:0 offs as u32; }); -register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 { +register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { 0:0 full as bool; 1:1 idle as bool; 3:2 sec as u8; @@ -261,60 +265,62 @@ register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 { 16:16 set_dmtag as u8; }); -register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ +0x0000011c { +register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { 31:0 offs as u32; }); -register!(NV_PFALCON_FALCON_DMATRFBASE1 @ +0x00000128 { +register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { 8:0 base as u16; }); -register!(NV_PFALCON_FALCON_HWCFG1 @ +0x0000012c { +register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; }); -register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ +0x00000130 { +register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { 1:1 startcpu as bool; }); // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon // instance. -register!(NV_PFALCON_FALCON_ENGINE @ +0x000003c0 { +register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { 0:0 reset as bool; }); -// TODO[REGA]: this is an array of registers. -register!(NV_PFALCON_FBIF_TRANSCFG @ +0x00000600 { +register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { 1:0 target as u8 ?=> FalconFbifTarget; 2:2 mem_type as bool => FalconFbifMemType; }); -register!(NV_PFALCON_FBIF_CTL @ +0x00000624 { +register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { 7:7 allow_phys_no_ctx as bool; }); -register!(NV_PFALCON2_FALCON_MOD_SEL @ +0x00001180 { +/* PFALCON2 */ + +register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { 7:0 algo as u8 ?=> FalconModSelAlgo; }); -register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ +0x00001198 { +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { 7:0 ucode_id as u8; }); -register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ +0x0000119c { +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { 31:0 value as u32; }); -// TODO[REGA]: this is an array of registers. -register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ +0x00001210 { +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { 31:0 value as u32; }); // PRISCV -register!(NV_PRISCV_RISCV_BCR_CTRL @ +0x00001668 { +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { 0:0 valid as bool; 4:4 core_select as bool => PeregrineCoreSelect; 8:8 br_fetch as bool; diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index a3e6de1779d4..8058e1696df9 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -1,17 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 -//! Macro to define register layout and accessors. +//! `register!` macro to define register layout and accessors. //! //! A single register typically includes several fields, which are accessed through a combination //! of bit-shift and mask operations that introduce a class of potential mistakes, notably because //! not all possible field values are necessarily valid. //! -//! The macro in this module allow to define, using an intruitive and readable syntax, a dedicated -//! type for each register with its own field accessors that can return an error is a field's value -//! is invalid. +//! The `register!` macro in this module provides an intuitive and readable syntax for defining a +//! dedicated type for each register. Each such type comes with its own field accessors that can +//! return an error if a field's value is invalid. -/// Defines a dedicated type for a register with an absolute offset, alongside with getter and -/// setter methods for its fields and methods to read and write it from an `Io` region. +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub(crate) trait RegisterBase<T> { + const BASE: usize; +} + +/// Defines a dedicated type for a register with an absolute offset, including getter and setter +/// methods for its fields and methods to read and write it from an `Io` region. /// /// Example: /// @@ -24,7 +34,7 @@ /// ``` /// /// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` -/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 less +/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least /// significant bits of the register. Each field can be accessed and modified using accessor /// methods: /// @@ -33,130 +43,344 @@ /// let boot0 = BOOT_0::read(&bar); /// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); /// -/// // `Chipset::try_from` will be called with the value of the field and returns an error if the -/// // value is invalid. +/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an +/// // error if it is invalid. /// let chipset = boot0.chipset()?; /// /// // Update some fields and write the value back. /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// -/// // Or just read and update the register in a single step: +/// // Or, just read and update the register in a single step: /// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields can be defined as follows: +/// Fields are defined as follows: /// -/// - `as <type>` simply returns the field value casted as the requested integer type, typically -/// `u32`, `u16`, `u8` or `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. /// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns /// the result. /// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation -/// and returns the result. This is useful on fields for which not all values are value. +/// and returns the result. This is useful with fields for which not all values are valid. /// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// -/// Putting a `+` before the address of the register makes it relative to a base: the `read` and -/// `write` methods take a `base` argument that is added to the specified address before access, -/// and `try_read` and `try_write` methods are also created, allowing access with offsets unknown -/// at compile-time: +/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful +/// for cases where a register's interpretation depends on the context: /// /// ```no_run -/// register!(CPU_CTL @ +0x0000010, "CPU core control" { -/// 0:0 start as bool, "Start the CPU core"; +/// register!(SCRATCH @ 0x00000200, "Scratch register" { +/// 31:0 value as u32, "Raw value"; /// }); /// -/// // Flip the `start` switch for the CPU core which base address is at `CPU_BASE`. -/// let cpuctl = CPU_CTL::read(&bar, CPU_BASE); -/// pr_info!("CPU CTL: {:#x}", cpuctl); -/// cpuctl.set_start(true).write(&bar, CPU_BASE); +/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { +/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// }); /// ``` /// -/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful -/// for cases where a register's interpretation depends on the context: +/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also +/// providing its own `completed` field. +/// +/// ## Relative registers +/// +/// A register can be defined as being accessible from a fixed offset of a provided base. For +/// instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition +/// +/// This can be done using the `Base[Offset]` syntax when specifying the register's address. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase<Base>`. Here is the above example translated +/// into code: /// /// ```no_run -/// register!(SCRATCH_0 @ 0x0000100, "Scratch register 0" { -/// 31:0 value as u32, "Raw value"; +/// // Type used to identify the base. +/// pub(crate) struct CpuCtlBase; /// -/// register!(SCRATCH_0_BOOT_STATUS => SCRATCH_0, "Boot status of the firmware" { -/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`. +/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { +/// 0:0 start as bool, "Start the CPU core"; +/// }); +/// +/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // that is used to resolve its final address by adding its `BASE` to the offset of the +/// // register. +/// +/// // Start `CPU0`. +/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// +/// // Start `CPU1`. +/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// +/// // Aliases can also be defined for relative register. +/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { +/// 1:1 alias_start as bool, "Start the aliased CPU core"; +/// }); +/// +/// // Start the aliased `CPU0`. +/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); /// ``` /// -/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH_0`, while also -/// providing its own `completed` method. +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas +/// can be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add +/// an `idx` parameter to their `read`, `write` and `alter` methods: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = SCRATCH::read(bar, 0).value(); +/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// let scratch_15 = SCRATCH::read(bar, 15).value(); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = SCRATCH::read(bar, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); +/// +/// // Alias to a particular register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let status = FIRMWARE_STATUS::read(bar).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` +/// +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // 64 per-cpu scratch registers, arranged as an contiguous array. +/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); +/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime value returns an error if it is out-of-bounds. +/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); +/// +/// // `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], +/// "Per-CPU firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], +/// "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], +/// "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. + ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $offset); + }; + + // Creates an alias register of fixed offset register `alias` with its own fields. + ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET); + }; + + // Creates a register at a relative offset from a base address provider. + ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $offset ]); + }; + + // Creates an alias register of relative offset register `alias` with its own fields. + ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET ]); + }; + + // Creates an array of registers at a fixed offset of the MMIO space. ( - $name:ident @ $offset:literal $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $offset $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ $offset); + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_array $name @ $offset [ $size ; $stride ]); }; - // Creates a alias register of fixed offset register `alias` with its own fields. + // Shortcut for contiguous array of registers (stride == size of element). ( - $name:ident => $alias:ident $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $alias::OFFSET $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ $alias::OFFSET); + register!($name @ $offset [ $size ; ::core::mem::size_of::<u32>() ] $(, $comment)? { + $($fields)* + } ); + }; + + // Creates an array of registers at a relative offset from a base address provider. + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] + $(, $comment:literal)? { $($fields:tt)* } + ) => { + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); }; - // Creates a register at a relative offset from a base address. + // Shortcut for contiguous array of relative registers (stride == size of element). ( - $name:ident @ + $offset:literal $(, $comment:literal)? { + $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $offset $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io$name @ + $offset); + register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::<u32>() ] ] + $(, $comment)? { $($fields)* } ); }; - // Creates a alias register of relative offset register `alias` with its own fields. + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. ( - $name:ident => + $alias:ident $(, $comment:literal)? { + $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name @ $alias::OFFSET $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ + $alias::OFFSET); + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + // This rule belongs to the (non-relative) register arrays set, but needs to be put last + // to avoid it being interpreted in place of the relative register array alias rule. + ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); }; // All rules below are helpers. - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, - // and conversion to regular `u32`). - (@common $name:ident @ $offset:expr $(, $comment:literal)?) => { + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. + (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] - #[derive(Clone, Copy, Default)] + #[derive(Clone, Copy)] pub(crate) struct $name(u32); - #[allow(dead_code)] - impl $name { - pub(crate) const OFFSET: usize = $offset; - } - - // TODO[REGA]: display the raw hex value, then the value of all the fields. This requires - // matching the fields, which will complexify the syntax considerably... - impl ::core::fmt::Debug for $name { - fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { - f.debug_tuple(stringify!($name)) - .field(&format_args!("0x{0:x}", &self.0)) - .finish() - } - } - impl ::core::ops::BitOr for $name { type Output = Self; @@ -170,6 +394,34 @@ macro_rules! register { reg.0 } } + + register!(@fields_dispatcher $name { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + register!(@field_accessors $name { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + register!(@debug $name { $($field;)* }); + register!(@default $name { $($field;)* }); }; // Defines all the field getter/methods methods for `$name`. @@ -228,7 +480,7 @@ macro_rules! register { $(, $comment:literal)?; ) => { register!( - @leaf_accessor $name $hi:$lo $field as bool + @leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } $into_type => $into_type $(, $comment)?; ); @@ -246,7 +498,7 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => ::core::result::Result< $try_into_type, @@ -260,11 +512,11 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); }; - // Shortcut for fields defined as non-`bool` without the `=>` or `?=>` syntax. + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; @@ -274,11 +526,11 @@ macro_rules! register { // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty + @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { ::kernel::macros::paste!( - const [<$field:upper>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); ); @@ -287,7 +539,7 @@ macro_rules! register { #[doc="Returns the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn $field(self) -> $res_type { ::kernel::macros::paste!( const MASK: u32 = $name::[<$field:upper _MASK>]; @@ -303,7 +555,7 @@ macro_rules! register { #[doc="Sets the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn [<set_ $field>](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; @@ -315,25 +567,64 @@ macro_rules! register { ); }; - // Creates the IO accessors for a fixed offset register. - (@io $name:ident @ $offset:expr) => { + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the register where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[<set_ $field>](Default::default()); + )* + ); + + value + } + } + }; + + // Generates the IO accessors for a fixed offset register. + (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>(io: &T) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { Self(io.read32($offset)) } - #[inline] + /// Write the value contained in `self` to the register address in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>(self, io: &T) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { io.write32(self.0, $offset) } - #[inline] + /// Read the register from its address in `io` and run `f` on its value to obtain a new + /// value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, f: F, @@ -347,76 +638,322 @@ macro_rules! register { } }; - // Create the IO accessors for a relative offset register. - (@io $name:ident @ + $offset:literal) => { + // Generates the IO accessors for a relative offset register. + (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + let value = io.read32( + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + io.write32( + self.0, + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + } + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it, then run `f` on its value to obtain a new value to + /// write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base)); + reg.write(io, base); + } + } + }; + + // Generates the IO accessors for an array of registers. + (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - Self(io.read32(base + $offset)) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>( self, io: &T, - base: usize, + idx: usize ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.write32(self.0, base + $offset) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, f: F, ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::read(io, base)); - reg.write(io, base); + let reg = f(Self::read(io, idx)); + reg.write(io, idx); } - #[inline] + /// Read the array register at index `idx` from its address in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> ::kernel::error::Result<Self> where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_read32(base + $offset).map(Self) + if idx < Self::SIZE { + Ok(Self::read(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_write<const SIZE: usize, T>( self, io: &T, - base: usize, - ) -> ::kernel::error::Result<()> where + idx: usize, + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_write32(self.0, base + $offset) + if idx < Self::SIZE { + Ok(self.write(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, + f: F, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + F: ::core::ops::FnOnce(Self) -> Self, + { + if idx < Self::SIZE { + Ok(Self::alter(io, idx, f)) + } else { + Err(EINVAL) + } + } + } + }; + + // Generates the IO accessors for an array of relative registers. + ( + @io_relative_array $name:ident @ $base:ty + [ $offset:literal [ $size:expr ; $stride:expr ] ] + ) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base, idx)); + reg.write(io, base, idx); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_read<const SIZE: usize, T, B>( + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result<Self> where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(Self::read(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_write<const SIZE: usize, T, B>( + self, + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(self.write(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, f: F, - ) -> ::kernel::error::Result<()> where + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::try_read(io, base)?); - reg.try_write(io, base) + if idx < Self::SIZE { + Ok(Self::alter(io, base, idx, f)) + } else { + Err(EINVAL) + } } } }; diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 76cedf3710d7..bf35f00cb732 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -3,26 +3,6 @@ use kernel::prelude::*; use kernel::time::{Delta, Instant, Monotonic}; -pub(crate) const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] { - let src = s.as_bytes(); - let mut dst = [0; N]; - let mut i = 0; - - while i < src.len() && i < N { - dst[i] = (src[i] as char).to_ascii_lowercase() as u8; - i += 1; - } - - dst -} - -pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { - match core::str::from_utf8(bytes) { - Ok(string) => string, - Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), - } -} - /// Wait until `cond` is true or `timeout` elapsed. /// /// When `cond` evaluates to `Some`, its return value is returned. diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 5b5d9f38cbb3..71fbe71b84db 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -8,8 +8,9 @@ use crate::firmware::FalconUCodeDescV3; use core::convert::TryFrom; use kernel::device; use kernel::error::Result; -use kernel::pci; use kernel::prelude::*; +use kernel::ptr::{Alignable, Alignment}; +use kernel::types::ARef; /// The offset of the VBIOS ROM in the BAR0 space. const ROM_OFFSET: usize = 0x300000; @@ -31,7 +32,7 @@ const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85; /// Vbios Reader for constructing the VBIOS data. struct VbiosIterator<'a> { - pdev: &'a pci::Device, + dev: &'a device::Device, bar0: &'a Bar0, /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference /// or copying into other data structures. It is the entire scanned contents of the VBIOS which @@ -46,9 +47,9 @@ struct VbiosIterator<'a> { } impl<'a> VbiosIterator<'a> { - fn new(pdev: &'a pci::Device, bar0: &'a Bar0) -> Result<Self> { + fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> { Ok(Self { - pdev, + dev, bar0, data: KVec::new(), current_offset: 0, @@ -64,7 +65,7 @@ impl<'a> VbiosIterator<'a> { // Ensure length is a multiple of 4 for 32-bit reads if len % core::mem::size_of::<u32>() != 0 { dev_err!( - self.pdev.as_ref(), + self.dev, "VBIOS read length {} is not a multiple of 4\n", len ); @@ -89,7 +90,7 @@ impl<'a> VbiosIterator<'a> { /// Read bytes at a specific offset, filling any gap. fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { if offset > BIOS_MAX_SCAN_LEN { - dev_err!(self.pdev.as_ref(), "Error: exceeded BIOS scan limit.\n"); + dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n"); return Err(EINVAL); } @@ -115,7 +116,7 @@ impl<'a> VbiosIterator<'a> { if offset + len > data_len { self.read_more_at_offset(offset, len).inspect_err(|e| { dev_err!( - self.pdev.as_ref(), + self.dev, "Failed to read more at offset {:#x}: {:?}\n", offset, e @@ -123,9 +124,9 @@ impl<'a> VbiosIterator<'a> { })?; } - BiosImage::new(self.pdev, &self.data[offset..offset + len]).inspect_err(|err| { + BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| { dev_err!( - self.pdev.as_ref(), + self.dev, "Failed to {} at offset {:#x}: {:?}\n", context, offset, @@ -146,10 +147,7 @@ impl<'a> Iterator for VbiosIterator<'a> { } if self.current_offset > BIOS_MAX_SCAN_LEN { - dev_err!( - self.pdev.as_ref(), - "Error: exceeded BIOS scan limit, stopping scan\n" - ); + dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n"); return None; } @@ -177,8 +175,7 @@ impl<'a> Iterator for VbiosIterator<'a> { // Advance to next image (aligned to 512 bytes). self.current_offset += image_size; - // TODO[NUMM]: replace with `align_up` once it lands. - self.current_offset = self.current_offset.next_multiple_of(512); + self.current_offset = self.current_offset.align_up(Alignment::new::<512>())?; Some(Ok(full_image)) } @@ -192,18 +189,18 @@ impl Vbios { /// Probe for VBIOS extraction. /// /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. - pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result<Vbios> { + pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> { // Images to extract from iteration let mut pci_at_image: Option<PciAtBiosImage> = None; let mut first_fwsec_image: Option<FwSecBiosBuilder> = None; let mut second_fwsec_image: Option<FwSecBiosBuilder> = None; // Parse all VBIOS images in the ROM - for image_result in VbiosIterator::new(pdev, bar0)? { + for image_result in VbiosIterator::new(dev, bar0)? { let full_image = image_result?; dev_dbg!( - pdev.as_ref(), + dev, "Found BIOS image: size: {:#x}, type: {}, last: {}\n", full_image.image_size_bytes(), full_image.image_type_str(), @@ -234,14 +231,14 @@ impl Vbios { (second_fwsec_image, first_fwsec_image, pci_at_image) { second - .setup_falcon_data(pdev, &pci_at, &first) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Falcon data setup failed: {:?}\n", e))?; + .setup_falcon_data(&pci_at, &first) + .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; Ok(Vbios { - fwsec_image: second.build(pdev)?, + fwsec_image: second.build()?, }) } else { dev_err!( - pdev.as_ref(), + dev, "Missing required images for falcon data setup, skipping\n" ); Err(EINVAL) @@ -284,9 +281,9 @@ struct PcirStruct { } impl PcirStruct { - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { if data.len() < core::mem::size_of::<PcirStruct>() { - dev_err!(pdev.as_ref(), "Not enough data for PcirStruct\n"); + dev_err!(dev, "Not enough data for PcirStruct\n"); return Err(EINVAL); } @@ -295,11 +292,7 @@ impl PcirStruct { // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). if &signature != b"PCIR" && &signature != b"NPDS" { - dev_err!( - pdev.as_ref(), - "Invalid signature for PcirStruct: {:?}\n", - signature - ); + dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); return Err(EINVAL); } @@ -308,7 +301,7 @@ impl PcirStruct { let image_len = u16::from_le_bytes([data[16], data[17]]); if image_len == 0 { - dev_err!(pdev.as_ref(), "Invalid image length: 0\n"); + dev_err!(dev, "Invalid image length: 0\n"); return Err(EINVAL); } @@ -345,7 +338,7 @@ impl PcirStruct { /// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the /// [`FwSecBiosImage`]. #[derive(Debug, Clone, Copy)] -#[expect(dead_code)] +#[repr(C)] struct BitHeader { /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) id: u16, @@ -365,7 +358,7 @@ struct BitHeader { impl BitHeader { fn new(data: &[u8]) -> Result<Self> { - if data.len() < 12 { + if data.len() < core::mem::size_of::<Self>() { return Err(EINVAL); } @@ -467,7 +460,7 @@ struct PciRomHeader { } impl PciRomHeader { - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { if data.len() < 26 { // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. return Err(EINVAL); @@ -479,7 +472,7 @@ impl PciRomHeader { match signature { 0xAA55 | 0xBB77 | 0x4E56 => {} _ => { - dev_err!(pdev.as_ref(), "ROM signature unknown {:#x}\n", signature); + dev_err!(dev, "ROM signature unknown {:#x}\n", signature); return Err(EINVAL); } } @@ -538,9 +531,9 @@ struct NpdeStruct { } impl NpdeStruct { - fn new(pdev: &pci::Device, data: &[u8]) -> Option<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Option<Self> { if data.len() < core::mem::size_of::<Self>() { - dev_dbg!(pdev.as_ref(), "Not enough data for NpdeStruct\n"); + dev_dbg!(dev, "Not enough data for NpdeStruct\n"); return None; } @@ -549,17 +542,13 @@ impl NpdeStruct { // Signature should be "NPDE" (0x4544504E). if &signature != b"NPDE" { - dev_dbg!( - pdev.as_ref(), - "Invalid signature for NpdeStruct: {:?}\n", - signature - ); + dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); return None; } let subimage_len = u16::from_le_bytes([data[8], data[9]]); if subimage_len == 0 { - dev_dbg!(pdev.as_ref(), "Invalid subimage length: 0\n"); + dev_dbg!(dev, "Invalid subimage length: 0\n"); return None; } @@ -584,7 +573,7 @@ impl NpdeStruct { /// Try to find NPDE in the data, the NPDE is right after the PCIR. fn find_in_data( - pdev: &pci::Device, + dev: &device::Device, data: &[u8], rom_header: &PciRomHeader, pcir: &PcirStruct, @@ -596,12 +585,12 @@ impl NpdeStruct { // Check if we have enough data if npde_start + core::mem::size_of::<Self>() > data.len() { - dev_dbg!(pdev.as_ref(), "Not enough data for NPDE\n"); + dev_dbg!(dev, "Not enough data for NPDE\n"); return None; } // Try to create NPDE from the data - NpdeStruct::new(pdev, &data[npde_start..]) + NpdeStruct::new(dev, &data[npde_start..]) } } @@ -669,10 +658,10 @@ impl BiosImage { /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which /// triggers the constructor of the specific BiosImage enum variant. - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { - let base = BiosImageBase::new(pdev, data)?; + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + let base = BiosImageBase::new(dev, data)?; let image = base.into_image().inspect_err(|e| { - dev_err!(pdev.as_ref(), "Failed to create BiosImage: {:?}\n", e); + dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); })?; Ok(image) @@ -754,9 +743,10 @@ impl TryFrom<BiosImageBase> for BiosImage { /// /// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that /// Rust favors composition of types over inheritance. -#[derive(Debug)] #[expect(dead_code)] struct BiosImageBase { + /// Used for logging. + dev: ARef<device::Device>, /// PCI ROM Expansion Header rom_header: PciRomHeader, /// PCI Data Structure @@ -773,16 +763,16 @@ impl BiosImageBase { } /// Creates a new BiosImageBase from raw byte data. - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { // Ensure we have enough data for the ROM header. if data.len() < 26 { - dev_err!(pdev.as_ref(), "Not enough data for ROM header\n"); + dev_err!(dev, "Not enough data for ROM header\n"); return Err(EINVAL); } // Parse the ROM header. - let rom_header = PciRomHeader::new(pdev, &data[0..26]) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PciRomHeader: {:?}\n", e))?; + let rom_header = PciRomHeader::new(dev, &data[0..26]) + .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; // Get the PCI Data Structure using the pointer from the ROM header. let pcir_offset = rom_header.pci_data_struct_offset as usize; @@ -791,28 +781,29 @@ impl BiosImageBase { .ok_or(EINVAL) .inspect_err(|_| { dev_err!( - pdev.as_ref(), + dev, "PCIR offset {:#x} out of bounds (data length: {})\n", pcir_offset, data.len() ); dev_err!( - pdev.as_ref(), + dev, "Consider reading more data for construction of BiosImage\n" ); })?; - let pcir = PcirStruct::new(pdev, pcir_data) - .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PcirStruct: {:?}\n", e))?; + let pcir = PcirStruct::new(dev, pcir_data) + .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?; // Look for NPDE structure if this is not an NBSI image (type != 0x70). - let npde = NpdeStruct::find_in_data(pdev, data, &rom_header, &pcir); + let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir); // Create a copy of the data. let mut data_copy = KVec::new(); data_copy.extend_from_slice(data, GFP_KERNEL)?; Ok(BiosImageBase { + dev: dev.into(), rom_header, pcir, npde, @@ -848,7 +839,7 @@ impl PciAtBiosImage { /// /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC /// image. - fn falcon_data_ptr(&self, pdev: &pci::Device) -> Result<u32> { + fn falcon_data_ptr(&self) -> Result<u32> { let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; // Make sure we don't go out of bounds @@ -859,14 +850,14 @@ impl PciAtBiosImage { // read the 4 bytes at the offset specified in the token let offset = token.data_offset as usize; let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { - dev_err!(pdev.as_ref(), "Failed to convert data slice to array"); + dev_err!(self.base.dev, "Failed to convert data slice to array"); EINVAL })?; let data_ptr = u32::from_le_bytes(bytes); if (data_ptr as usize) < self.base.data.len() { - dev_err!(pdev.as_ref(), "Falcon data pointer out of bounds\n"); + dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); return Err(EINVAL); } @@ -892,7 +883,7 @@ impl TryFrom<BiosImageBase> for PciAtBiosImage { /// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. /// /// See the [`PmuLookupTable`] description for more information. -#[expect(dead_code)] +#[repr(C, packed)] struct PmuLookupTableEntry { application_id: u8, target_id: u8, @@ -901,7 +892,7 @@ struct PmuLookupTableEntry { impl PmuLookupTableEntry { fn new(data: &[u8]) -> Result<Self> { - if data.len() < 6 { + if data.len() < core::mem::size_of::<Self>() { return Err(EINVAL); } @@ -928,7 +919,7 @@ struct PmuLookupTable { } impl PmuLookupTable { - fn new(pdev: &pci::Device, data: &[u8]) -> Result<Self> { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { if data.len() < 4 { return Err(EINVAL); } @@ -940,10 +931,7 @@ impl PmuLookupTable { let required_bytes = header_len + (entry_count * entry_len); if data.len() < required_bytes { - dev_err!( - pdev.as_ref(), - "PmuLookupTable data length less than required\n" - ); + dev_err!(dev, "PmuLookupTable data length less than required\n"); return Err(EINVAL); } @@ -956,11 +944,7 @@ impl PmuLookupTable { // Debug logging of entries (dumps the table data to dmesg) for i in (header_len..required_bytes).step_by(entry_len) { - dev_dbg!( - pdev.as_ref(), - "PMU entry: {:02x?}\n", - &data[i..][..entry_len] - ); + dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); } Ok(PmuLookupTable { @@ -997,11 +981,10 @@ impl PmuLookupTable { impl FwSecBiosBuilder { fn setup_falcon_data( &mut self, - pdev: &pci::Device, pci_at_image: &PciAtBiosImage, first_fwsec: &FwSecBiosBuilder, ) -> Result { - let mut offset = pci_at_image.falcon_data_ptr(pdev)? as usize; + let mut offset = pci_at_image.falcon_data_ptr()? as usize; let mut pmu_in_first_fwsec = false; // The falcon data pointer assumes that the PciAt and FWSEC images @@ -1024,10 +1007,15 @@ impl FwSecBiosBuilder { self.falcon_data_offset = Some(offset); if pmu_in_first_fwsec { - self.pmu_lookup_table = - Some(PmuLookupTable::new(pdev, &first_fwsec.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &first_fwsec.base.data[offset..], + )?); } else { - self.pmu_lookup_table = Some(PmuLookupTable::new(pdev, &self.base.data[offset..])?); + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &self.base.data[offset..], + )?); } match self @@ -1040,7 +1028,7 @@ impl FwSecBiosBuilder { let mut ucode_offset = entry.data as usize; ucode_offset -= pci_at_image.base.data.len(); if ucode_offset < first_fwsec.base.data.len() { - dev_err!(pdev.as_ref(), "Falcon Ucode offset not in second Fwsec.\n"); + dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); return Err(EINVAL); } ucode_offset -= first_fwsec.base.data.len(); @@ -1048,7 +1036,7 @@ impl FwSecBiosBuilder { } Err(e) => { dev_err!( - pdev.as_ref(), + self.base.dev, "PmuLookupTableEntry not found, error: {:?}\n", e ); @@ -1059,7 +1047,7 @@ impl FwSecBiosBuilder { } /// Build the final FwSecBiosImage from this builder - fn build(self, pdev: &pci::Device) -> Result<FwSecBiosImage> { + fn build(self) -> Result<FwSecBiosImage> { let ret = FwSecBiosImage { base: self.base, falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, @@ -1067,8 +1055,8 @@ impl FwSecBiosBuilder { if cfg!(debug_assertions) { // Print the desc header for debugging - let desc = ret.header(pdev.as_ref())?; - dev_dbg!(pdev.as_ref(), "PmuLookupTableEntry desc: {:#?}\n", desc); + let desc = ret.header()?; + dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc); } Ok(ret) @@ -1077,13 +1065,16 @@ impl FwSecBiosBuilder { impl FwSecBiosImage { /// Get the FwSec header ([`FalconUCodeDescV3`]). - pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> { + pub(crate) fn header(&self) -> Result<&FalconUCodeDescV3> { // Get the falcon ucode offset that was found in setup_falcon_data. let falcon_ucode_offset = self.falcon_ucode_offset; // Make sure the offset is within the data bounds. if falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>() > self.base.data.len() { - dev_err!(dev, "fwsec-frts header not contained within BIOS bounds\n"); + dev_err!( + self.base.dev, + "fwsec-frts header not contained within BIOS bounds\n" + ); return Err(ERANGE); } @@ -1095,7 +1086,7 @@ impl FwSecBiosImage { let ver = (hdr & 0xff00) >> 8; if ver != 3 { - dev_err!(dev, "invalid fwsec firmware version: {:?}\n", ver); + dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver); return Err(EINVAL); } @@ -1115,7 +1106,7 @@ impl FwSecBiosImage { } /// Get the ucode data as a byte slice - pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + pub(crate) fn ucode(&self, desc: &FalconUCodeDescV3) -> Result<&[u8]> { let falcon_ucode_offset = self.falcon_ucode_offset; // The ucode data follows the descriptor. @@ -1127,15 +1118,16 @@ impl FwSecBiosImage { .data .get(ucode_data_offset..ucode_data_offset + size) .ok_or(ERANGE) - .inspect_err(|_| dev_err!(dev, "fwsec ucode data not contained within BIOS bounds\n")) + .inspect_err(|_| { + dev_err!( + self.base.dev, + "fwsec ucode data not contained within BIOS bounds\n" + ) + }) } /// Get the signatures as a byte slice - pub(crate) fn sigs( - &self, - dev: &device::Device, - desc: &FalconUCodeDescV3, - ) -> Result<&[Bcrt30Rsa3kSignature]> { + pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { // The signatures data follows the descriptor. let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>(); let sigs_size = @@ -1144,7 +1136,7 @@ impl FwSecBiosImage { // Make sure the data is within bounds. if sigs_data_offset + sigs_size > self.base.data.len() { dev_err!( - dev, + self.base.dev, "fwsec signatures data not contained within BIOS bounds\n" ); return Err(ERANGE); |